[clang] [CIR] Upstream CUDA mangling test with LLVM and OGCG verification (PR #184444)

Thu Mar 5 10:48:20 PST 2026

https://github.com/16srivarshitha updated https://github.com/llvm/llvm-project/pull/184444

>From 224da4ccb6c136719d1b3fc39896e8b53b33d03e Mon Sep 17 00:00:00 2001
From: 16srivarshitha <mvarshitha17874 at gmail.com>
Date: Wed, 4 Mar 2026 03:37:44 +0530
Subject: [PATCH 1/3] [CIR] Upstream CUDA mangling test with LLVM/OGCG checks

---
 clang/test/CIR/CodeGen/CUDA/mangling.cu | 81 +++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 clang/test/CIR/CodeGen/CUDA/mangling.cu

diff --git a/clang/test/CIR/CodeGen/CUDA/mangling.cu b/clang/test/CIR/CodeGen/CUDA/mangling.cu
new file mode 100644
index 0000000000000..bad62892cf318
--- /dev/null
+++ b/clang/test/CIR/CodeGen/CUDA/mangling.cu
@@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -x cuda -emit-cir -target-sdk-version=12.3 %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -fcuda-is-device -emit-cir -target-sdk-version=12.3 %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -x cuda -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -fcuda-is-device -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x cuda -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+#include "../Inputs/cuda.h"
+
+namespace ns {
+    __global__ void cpp_global_function_1(int a, int* b, float c) {}
+    __global__ void cpp_global_function_2(int a, int* b, float c) {}
+    __host__ void cpp_host_function_1(int a, int* b, float c) {}
+    __host__ void cpp_host_function_2(int a, int* b, float c) {}
+    __device__ void cpp_device_function_1(int a, int* b, float c) {}
+    __device__ void cpp_device_function_2(int a, int* b, float c) {}
+}
+
+__global__ void cpp_global_function_1(int a, int* b, float c) {}
+__global__ void cpp_global_function_2(int a, int* b, float c) {}
+__host__ void cpp_host_function_1(int a, int* b, float c) {}
+__host__ void cpp_host_function_2(int a, int* b, float c) {}
+__device__ void cpp_device_function_1(int a, int* b, float c) {}
+__device__ void cpp_device_function_2(int a, int* b, float c) {}
+
+extern "C" {
+    __global__ void c_global_function_1(int a, int* b, float c) {}
+    __global__ void c_global_function_2(int a, int* b, float c) {}
+    __host__ void c_host_function_1(int a, int* b, float c) {}
+    __host__ void c_host_function_2(int a, int* b, float c) {}
+    __device__ void c_device_function_1(int a, int* b, float c) {}
+    __device__ void c_device_function_2(int a, int* b, float c) {}
+}
+
+// CIR-HOST: cir.func {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
+// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_global_function_1EiPif
+// LLVM-HOST: define {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
+// LLVM-DEVICE: define {{.*}} @_ZN2ns21cpp_global_function_1EiPif
+// OGCG-HOST: define {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
+// OGCG-DEVICE: define {{.*}} @_ZN2ns21cpp_global_function_1EiPif
+
+// CIR-HOST: cir.func {{.*}} @_ZN2ns36__device_stub__cpp_global_function_2EiPif
+// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_global_function_2EiPif
+
+// CIR-HOST: cir.func {{.*}} @_ZN2ns19cpp_host_function_1EiPif
+// CIR-HOST: cir.func {{.*}} @_ZN2ns19cpp_host_function_2EiPif
+
+// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_device_function_1EiPif
+// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_device_function_2EiPif
+
+// CIR-HOST: cir.func {{.*}} @_Z36__device_stub__cpp_global_function_1iPif
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_global_function_1iPif
+
+// CIR-HOST: cir.func {{.*}} @_Z36__device_stub__cpp_global_function_2iPif
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_global_function_2iPif
+
+// CIR-HOST: cir.func {{.*}} @_Z19cpp_host_function_1iPif
+// CIR-HOST: cir.func {{.*}} @_Z19cpp_host_function_2iPif
+
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_device_function_1iPif
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_device_function_2iPif
+
+// CIR-HOST: cir.func {{.*}} @__device_stub__c_global_function_1
+// CIR-DEVICE: cir.func {{.*}} @c_global_function_1
+
+// CIR-HOST: cir.func {{.*}} @__device_stub__c_global_function_2
+// CIR-DEVICE: cir.func {{.*}} @c_global_function_2
+
+// CIR-HOST: cir.func {{.*}} @c_host_function_1
+// CIR-HOST: cir.func {{.*}} @c_host_function_2
+
+// CIR-DEVICE: cir.func {{.*}} @c_device_function_1
+// CIR-DEVICE: cir.func {{.*}} @c_device_function_2

>From 52193d543649fba85adcf90c88acf8000555edbe Mon Sep 17 00:00:00 2001
From: 16srivarshitha <mvarshitha17874 at gmail.com>
Date: Thu, 5 Mar 2026 00:31:25 +0530
Subject: [PATCH 2/3] [CIR] Move CUDA test to standard CodeGenCUDA directory

---
 clang/test/CodeGenCUDA/Inputs/cuda.h | 183 +--------------------------
 clang/test/CodeGenCUDA/mangling.cu   |  81 ++++++++++++
 2 files changed, 83 insertions(+), 181 deletions(-)
 create mode 100644 clang/test/CodeGenCUDA/mangling.cu

diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h
index 421fa4dd7dbae..204bf2972088d 100644
--- a/clang/test/CodeGenCUDA/Inputs/cuda.h
+++ b/clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -1,5 +1,5 @@
 /* Minimal declarations for CUDA support.  Testing purposes only. */
-
+/* From test/CodeGenCUDA/Inputs/cuda.h. */
 #include <stddef.h>
 
 #if __HIP__ || __CUDA__
@@ -13,8 +13,6 @@
 #endif
 #define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
 #define __grid_constant__ __attribute__((grid_constant))
-#define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__)))
-#define __no_cluster__ __attribute__((no_cluster))
 #else
 #define __constant__
 #define __device__
@@ -24,8 +22,6 @@
 #define __managed__
 #define __launch_bounds__(...)
 #define __grid_constant__
-#define __cluster_dims__(...)
-#define __no_cluster__
 #endif
 
 struct dim3 {
@@ -72,182 +68,7 @@ extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
 extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim,
                                         dim3 blockDim, void **args,
                                         size_t sharedMem, cudaStream_t stream);
-extern "C" __device__ cudaError_t cudaLaunchDevice(void *func,
-                                                   void *parameterBuffer,
-                                                   dim3 gridDim, dim3 blockDim,
-                                                   unsigned int sharedMem,
-                                                   cudaStream_t stream);
-extern "C" __device__ void *cudaGetParameterBuffer(size_t alignment,
-                                                   size_t size);
+
 #endif
 
 extern "C" __device__ int printf(const char*, ...);
-
-struct char1 {
-  char x;
-  __host__ __device__ char1(char x = 0) : x(x) {}
-};
-struct char2 {
-  char x, y;
-  __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
-};
-struct char4 {
-  char x, y, z, w;
-  __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct uchar1 {
-  unsigned char x;
-  __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
-};
-struct uchar2 {
-  unsigned char x, y;
-  __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {}
-};
-struct uchar4 {
-  unsigned char x, y, z, w;
-  __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct short1 {
-  short x;
-  __host__ __device__ short1(short x = 0) : x(x) {}
-};
-struct short2 {
-  short x, y;
-  __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
-};
-struct short4 {
-  short x, y, z, w;
-  __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ushort1 {
-  unsigned short x;
-  __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
-};
-struct ushort2 {
-  unsigned short x, y;
-  __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {}
-};
-struct ushort4 {
-  unsigned short x, y, z, w;
-  __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct int1 {
-  int x;
-  __host__ __device__ int1(int x = 0) : x(x) {}
-};
-struct int2 {
-  int x, y;
-  __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
-};
-struct int4 {
-  int x, y, z, w;
-  __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct uint1 {
-  unsigned x;
-  __host__ __device__ uint1(unsigned x = 0) : x(x) {}
-};
-struct uint2 {
-  unsigned x, y;
-  __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
-};
-struct uint3 {
-  unsigned x, y, z;
-  __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {}
-};
-struct uint4 {
-  unsigned x, y, z, w;
-  __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct longlong1 {
-  long long x;
-  __host__ __device__ longlong1(long long x = 0) : x(x) {}
-};
-struct longlong2 {
-  long long x, y;
-  __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {}
-};
-struct longlong4 {
-  long long x, y, z, w;
-  __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ulonglong1 {
-  unsigned long long x;
-  __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
-};
-struct ulonglong2 {
-  unsigned long long x, y;
-  __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {}
-};
-struct ulonglong4 {
-  unsigned long long x, y, z, w;
-  __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct float1 {
-  float x;
-  __host__ __device__ float1(float x = 0) : x(x) {}
-};
-struct float2 {
-  float x, y;
-  __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
-};
-struct float4 {
-  float x, y, z, w;
-  __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct double1 {
-  double x;
-  __host__ __device__ double1(double x = 0) : x(x) {}
-};
-struct double2 {
-  double x, y;
-  __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
-};
-struct double4 {
-  double x, y, z, w;
-  __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-typedef unsigned long long cudaTextureObject_t;
-typedef unsigned long long cudaSurfaceObject_t;
-
-enum cudaTextureReadMode {
-  cudaReadModeNormalizedFloat,
-  cudaReadModeElementType
-};
-
-enum cudaSurfaceBoundaryMode {
-  cudaBoundaryModeZero,
-  cudaBoundaryModeClamp,
-  cudaBoundaryModeTrap
-};
-
-enum {
-  cudaTextureType1D,
-  cudaTextureType2D,
-  cudaTextureType3D,
-  cudaTextureTypeCubemap,
-  cudaTextureType1DLayered,
-  cudaTextureType2DLayered,
-  cudaTextureTypeCubemapLayered
-};
-
-struct textureReference { };
-template <class T, int texType = cudaTextureType1D,
-          enum cudaTextureReadMode mode = cudaReadModeElementType>
-struct __attribute__((device_builtin_texture_type)) texture
-    : public textureReference {};
-
-struct surfaceReference { int desc; };
-
-template <typename T, int dim = 1>
-struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {};
diff --git a/clang/test/CodeGenCUDA/mangling.cu b/clang/test/CodeGenCUDA/mangling.cu
new file mode 100644
index 0000000000000..437ae07f03725
--- /dev/null
+++ b/clang/test/CodeGenCUDA/mangling.cu
@@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -x cuda -emit-cir -target-sdk-version=12.3 %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -fcuda-is-device -emit-cir -target-sdk-version=12.3 %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -x cuda -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -fcuda-is-device -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x cuda -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+#include "Inputs/cuda.h"
+
+namespace ns {
+    __global__ void cpp_global_function_1(int a, int* b, float c) {}
+    __global__ void cpp_global_function_2(int a, int* b, float c) {}
+    __host__ void cpp_host_function_1(int a, int* b, float c) {}
+    __host__ void cpp_host_function_2(int a, int* b, float c) {}
+    __device__ void cpp_device_function_1(int a, int* b, float c) {}
+    __device__ void cpp_device_function_2(int a, int* b, float c) {}
+}
+
+__global__ void cpp_global_function_1(int a, int* b, float c) {}
+__global__ void cpp_global_function_2(int a, int* b, float c) {}
+__host__ void cpp_host_function_1(int a, int* b, float c) {}
+__host__ void cpp_host_function_2(int a, int* b, float c) {}
+__device__ void cpp_device_function_1(int a, int* b, float c) {}
+__device__ void cpp_device_function_2(int a, int* b, float c) {}
+
+extern "C" {
+    __global__ void c_global_function_1(int a, int* b, float c) {}
+    __global__ void c_global_function_2(int a, int* b, float c) {}
+    __host__ void c_host_function_1(int a, int* b, float c) {}
+    __host__ void c_host_function_2(int a, int* b, float c) {}
+    __device__ void c_device_function_1(int a, int* b, float c) {}
+    __device__ void c_device_function_2(int a, int* b, float c) {}
+}
+
+// CIR-HOST: cir.func {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
+// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_global_function_1EiPif
+// LLVM-HOST: define {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
+// LLVM-DEVICE: define {{.*}} @_ZN2ns21cpp_global_function_1EiPif
+// OGCG-HOST: define {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
+// OGCG-DEVICE: define {{.*}} @_ZN2ns21cpp_global_function_1EiPif
+
+// CIR-HOST: cir.func {{.*}} @_ZN2ns36__device_stub__cpp_global_function_2EiPif
+// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_global_function_2EiPif
+
+// CIR-HOST: cir.func {{.*}} @_ZN2ns19cpp_host_function_1EiPif
+// CIR-HOST: cir.func {{.*}} @_ZN2ns19cpp_host_function_2EiPif
+
+// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_device_function_1EiPif
+// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_device_function_2EiPif
+
+// CIR-HOST: cir.func {{.*}} @_Z36__device_stub__cpp_global_function_1iPif
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_global_function_1iPif
+
+// CIR-HOST: cir.func {{.*}} @_Z36__device_stub__cpp_global_function_2iPif
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_global_function_2iPif
+
+// CIR-HOST: cir.func {{.*}} @_Z19cpp_host_function_1iPif
+// CIR-HOST: cir.func {{.*}} @_Z19cpp_host_function_2iPif
+
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_device_function_1iPif
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_device_function_2iPif
+
+// CIR-HOST: cir.func {{.*}} @__device_stub__c_global_function_1
+// CIR-DEVICE: cir.func {{.*}} @c_global_function_1
+
+// CIR-HOST: cir.func {{.*}} @__device_stub__c_global_function_2
+// CIR-DEVICE: cir.func {{.*}} @c_global_function_2
+
+// CIR-HOST: cir.func {{.*}} @c_host_function_1
+// CIR-HOST: cir.func {{.*}} @c_host_function_2
+
+// CIR-DEVICE: cir.func {{.*}} @c_device_function_1
+// CIR-DEVICE: cir.func {{.*}} @c_device_function_2

>From f65e49e206918770e0570a3e40857daf5bb3a024 Mon Sep 17 00:00:00 2001
From: 16srivarshitha <mvarshitha17874 at gmail.com>
Date: Fri, 6 Mar 2026 00:17:42 +0530
Subject: [PATCH 3/3] [CIR] Remove duplicate cuda.h from CIR directory

---
 clang/test/CIR/CodeGen/CUDA/mangling.cu       |    81 -
 .../AArch64/aarch64-cc-structs.c              |   416 +
 .../AArch64/aarch64_be-cc-structs.c           |    17 +
 .../CallConvLowering/AArch64/basic.cpp        |    85 +
 .../CallConvLowering/AArch64/ptr-fields.c     |    49 +
 .../CallConvLowering/AArch64/struct.c         |   167 +
 .../CallConvLowering/AArch64/union.c          |    71 +
 .../CallConvLowering/AArch64/vector-fp16.c    |    84 +
 .../CallConvLowering/NVPTX/basic.cpp          |    90 +
 .../CallConvLowering/x86_64/basic.cpp         |   208 +
 .../Incubator/CallConvLowering/x86_64/fptrs.c |    47 +
 .../CallConvLowering/x86_64/int128.cpp        |    54 +
 .../CallConvLowering/x86_64/varargs.c         |    17 +
 .../CallConvLowering/x86_64/void-ptr.c        |    22 +
 .../CodeGen/AArch64/bf16-getset-intrinsics.c  |   185 +
 .../Incubator/CodeGen/AArch64/neon-arith.c    |  1017 +
 .../Incubator/CodeGen/AArch64/neon-crypto.c   |    36 +
 .../Incubator/CodeGen/AArch64/neon-ext-mov.c  |   215 +
 .../CIR/Incubator/CodeGen/AArch64/neon-fp16.c |   693 +
 .../CIR/Incubator/CodeGen/AArch64/neon-ldst.c |   768 +
 .../CIR/Incubator/CodeGen/AArch64/neon-misc.c |  2816 +++
 .../test/CIR/Incubator/CodeGen/AArch64/neon.c | 19515 ++++++++++++++++
 .../AArch64/v8.2a-neon-intrinsics-generic.c   |   529 +
 .../AArch64/v8.5a-neon-frint3264-intrinsic.c  |   206 +
 .../Incubator/CodeGen/CUDA/address-spaces.cu  |    19 +
 .../CodeGen/CUDA/addrspace-lowering.cu        |    19 +
 .../CodeGen/CUDA/builtin-functions.cu         |    79 +
 .../CodeGen/CUDA/builtins-nvptx-ptx60.cu      |    65 +
 .../Incubator/CodeGen/CUDA/builtins-sm90.cu   |    43 +
 .../CodeGen/CUDA/cuda-builtin-vars.cu         |   107 +
 .../CIR/Incubator/CodeGen/CUDA/destructor.cu  |    61 +
 .../CIR/Incubator/CodeGen/CUDA/global-vars.cu |   101 +
 .../CIR/Incubator/CodeGen/CUDA/mangling.cu    |    92 +
 .../test/CIR/Incubator/CodeGen/CUDA/printf.cu |    48 +
 .../Incubator/CodeGen/CUDA/registration.cu    |   209 +
 .../CodeGen/CUDA/simple-nvptx-triple.cu       |     9 +
 .../test/CIR/Incubator/CodeGen/CUDA/simple.cu |   141 +
 .../CIR/Incubator/CodeGen/CUDA/surface.cu     |    26 +
 .../CIR/Incubator/CodeGen/CUDA/texture.cu     |    24 +
 .../Incubator/CodeGen/HIP/address-spaces.cpp  |    20 +
 .../CodeGen/HIP/addrspace-lowering.cpp        |   259 +
 .../Incubator/CodeGen/HIP/amdgpu-attrs.hip    |   113 +
 .../CodeGen/HIP/amdgpu-hip-kernel-abi.hip     |    74 +
 .../CodeGen/HIP/amdgpu-module-flags.hip       |    30 +
 .../CodeGen/HIP/amdgpu-vec3-memory-type.hip   |    96 +
 .../CodeGen/HIP/builtins-amdgcn-gfx10.hip     |    63 +
 .../CodeGen/HIP/builtins-amdgcn-gfx11.hip     |   108 +
 .../CodeGen/HIP/builtins-amdgcn-gfx1250.hip   |    51 +
 .../HIP/builtins-amdgcn-image-sample.hip      |   256 +
 .../CodeGen/HIP/builtins-amdgcn-image.hip     |   122 +
 .../HIP/builtins-amdgcn-logb-scalbn.hip       |    74 +
 .../builtins-amdgcn-raw-buffer-atomics.hip    |    93 +
 .../HIP/builtins-amdgcn-raw-buffer.hip        |   145 +
 .../CodeGen/HIP/builtins-amdgcn-vi.hip        |   100 +
 .../Incubator/CodeGen/HIP/builtins-amdgcn.hip |   429 +
 .../HIP/calling-conv-lowering-amdgpu.hip      |    42 +
 .../CIR/Incubator/CodeGen/HIP/global-vars.cpp |   101 +
 .../CIR/Incubator/CodeGen/HIP/hip-cuid.hip    |    26 +
 .../CIR/Incubator/CodeGen/HIP/ptr-diff.cpp    |    60 +
 .../Incubator/CodeGen/HIP/registration.cpp    |   262 +
 .../test/CIR/Incubator/CodeGen/HIP/simple.cpp |   144 +
 .../CodeGen}/Inputs/cuda.h                    |     6 -
 .../Incubator/CodeGen/Inputs/std-compare.h    |   324 +
 .../CodeGen/OpenCL/addrspace-alloca.cl        |    49 +
 .../CodeGen/OpenCL/addrspace_cast.cl          |    20 +
 .../CodeGen/OpenCL/amdgpu-kernel-abi.cl       |    66 +
 .../Incubator/CodeGen/OpenCL/array-decay.cl   |    25 +
 .../CIR/Incubator/CodeGen/OpenCL/as_type.cl   |    55 +
 .../Incubator/CodeGen/OpenCL/async_copy.cl    |    34 +
 .../CodeGen/OpenCL/builtins-amdgcn-gfx10.cl   |    65 +
 .../CodeGen/OpenCL/builtins-amdgcn-gfx11.cl   |   115 +
 .../CodeGen/OpenCL/builtins-amdgcn-gfx1250.cl |    51 +
 .../OpenCL/builtins-amdgcn-image-sample.cl    |   253 +
 .../CodeGen/OpenCL/builtins-amdgcn-image.cl   |   119 +
 .../OpenCL/builtins-amdgcn-logb-scalbn.cl     |    74 +
 .../builtins-amdgcn-raw-buffer-atomics.cl     |    91 +
 .../OpenCL/builtins-amdgcn-raw-buffer.cl      |   143 +
 .../CodeGen/OpenCL/builtins-amdgcn-vi.cl      |   100 +
 .../CodeGen/OpenCL/builtins_amdgcn.cl         |   442 +
 .../CodeGen/OpenCL/cl-uniform-wg-size.cl      |    41 +
 .../Incubator/CodeGen/OpenCL/convergent.cl    |   105 +
 .../Incubator/CodeGen/OpenCL/elemwise-ops.cl  |    32 +
 .../CodeGen/OpenCL/global-var-with-ctor.clcpp |    37 +
 .../CIR/Incubator/CodeGen/OpenCL/global.cl    |    27 +
 .../OpenCL/kernel-arg-info-single-as.cl       |    14 +
 .../CodeGen/OpenCL/kernel-arg-info.cl         |    90 +
 .../CodeGen/OpenCL/kernel-arg-metadata.cl     |    12 +
 .../CodeGen/OpenCL/kernel-attributes.cl       |    35 +
 .../CodeGen/OpenCL/kernel-unit-attr.cl        |    14 +
 .../CIR/Incubator/CodeGen/OpenCL/nothrow.cl   |    26 +
 .../CIR/Incubator/CodeGen/OpenCL/null-vec.cl  |    25 +
 .../Incubator/CodeGen/OpenCL/opencl-c-lang.cl |     4 +
 .../CodeGen/OpenCL/opencl-version.cl          |    16 +
 .../CIR/Incubator/CodeGen/OpenCL/printf.cl    |    55 +
 .../CodeGen/OpenCL/spir-calling-conv.cl       |    16 +
 .../Incubator/CodeGen/OpenCL/spirv-target.cl  |    30 +
 .../CodeGen/OpenCL/static-vardecl.cl          |    24 +
 .../Incubator/CodeGen/OpenCL/str_literals.cl  |    23 +
 .../CodeGen/OpenCL/vec_initializer.cl         |    41 +
 .../CIR/Incubator/CodeGen/OpenCL/vec_logic.cl |    39 +
 .../Incubator/CodeGen/OpenCL/vec_widening.cl  |    24 +
 .../CIR/Incubator/CodeGen/OpenMP/barrier.cpp  |     8 +
 .../CIR/Incubator/CodeGen/OpenMP/parallel.cpp |    34 +
 .../CIR/Incubator/CodeGen/OpenMP/taskwait.cpp |     9 +
 .../Incubator/CodeGen/OpenMP/taskyield.cpp    |     8 +
 clang/test/CIR/Incubator/CodeGen/String.cpp   |    73 +
 .../CIR/Incubator/CodeGen/StringExample.cpp   |    34 +
 .../CIR/Incubator/CodeGen/X86/avx-builtins.c  |   232 +
 .../CodeGen/X86/avx-shuffle-builtins.c        |    95 +
 .../CodeGen/X86/avx10_2_512bf16-builtins.c    |    20 +
 .../CodeGen/X86/avx10_2bf16-builtins.c        |    60 +
 .../CIR/Incubator/CodeGen/X86/avx2-builtins.c |   145 +
 .../Incubator/CodeGen/X86/avx512bw-builtins.c |   145 +
 .../Incubator/CodeGen/X86/avx512dq-builtins.c |    77 +
 .../Incubator/CodeGen/X86/avx512f-builtins.c  |   702 +
 .../CodeGen/X86/avx512fp16-builtins.c         |    73 +
 .../CodeGen/X86/avx512vbmi2-builtins.c        |    60 +
 .../Incubator/CodeGen/X86/avx512vl-builtins.c |  1049 +
 .../CodeGen/X86/avx512vlbw-buiiltins.c        |   221 +
 .../CodeGen/X86/avx512vldq-builtins.c         |   126 +
 .../CodeGen/X86/avx512vlvbmi2-builtins.c      |   105 +
 .../CIR/Incubator/CodeGen/X86/bmi-builtins.c  |    35 +
 .../Incubator/CodeGen/X86/lzcnt-builtins.c    |    27 +
 .../CIR/Incubator/CodeGen/X86/mmx-builtins.c  |    34 +
 .../test/CIR/Incubator/CodeGen/X86/palignr.c  |    32 +
 clang/test/CIR/Incubator/CodeGen/X86/pause.c  |    22 +
 .../CIR/Incubator/CodeGen/X86/rd-builtins.c   |    34 +
 .../CIR/Incubator/CodeGen/X86/sse-builtins.c  |    81 +
 .../CIR/Incubator/CodeGen/X86/sse2-builtins.c |   119 +
 .../CIR/Incubator/CodeGen/X86/sse3-builtins.c |    21 +
 .../Incubator/CodeGen/X86/sse41-builtins.c    |   125 +
 .../CIR/Incubator/CodeGen/X86/x86_64-xsave.c  |   339 +
 .../CodeGen/aapcs-volatile-bitfields.c        |   285 +
 .../CodeGen/aarch64-neon-vdup-lane.c          |   228 +
 .../CIR/Incubator/CodeGen/abstract-cond.c     |    38 +
 .../CodeGen/address-space-cast-subscript.cpp  |    73 +
 .../CodeGen/address-space-conversion.cpp      |    68 +
 .../CIR/Incubator/CodeGen/address-space.c     |    22 +
 clang/test/CIR/Incubator/CodeGen/agg-copy.c   |    94 +
 .../Incubator/CodeGen/agg-init-inherit.cpp    |    68 +
 clang/test/CIR/Incubator/CodeGen/agg-init.cpp |    62 +
 .../test/CIR/Incubator/CodeGen/agg-init2.cpp  |    38 +
 clang/test/CIR/Incubator/CodeGen/align-load.c |    80 +
 .../test/CIR/Incubator/CodeGen/align-store.c  |    92 +
 clang/test/CIR/Incubator/CodeGen/alignment.c  |    23 +
 .../CodeGen/amdgpu-address-spaces.cpp         |    51 +
 .../CIR/Incubator/CodeGen/analysis-only.cpp   |     8 +
 .../CodeGen/annotations-declaration.c         |    30 +
 .../CIR/Incubator/CodeGen/annotations-var.c   |    41 +
 .../CodeGen/applearm64-array-cookies.cpp      |    54 +
 .../Incubator/CodeGen/array-init-destroy.cpp  |    76 +
 .../Incubator/CodeGen/array-init-partial.cpp  |    52 +
 clang/test/CIR/Incubator/CodeGen/array-init.c |   221 +
 .../test/CIR/Incubator/CodeGen/array-init.cpp |    38 +
 .../CIR/Incubator/CodeGen/array-new-init.cpp  |    60 +
 .../Incubator/CodeGen/array-unknown-bound.cpp |    14 +
 clang/test/CIR/Incubator/CodeGen/array.c      |    31 +
 clang/test/CIR/Incubator/CodeGen/array.cpp    |    95 +
 clang/test/CIR/Incubator/CodeGen/asm.c        |   349 +
 .../CIR/Incubator/CodeGen/assign-operator.cpp |   216 +
 .../CIR/Incubator/CodeGen/atomic-runtime.cpp  |   309 +
 .../Incubator/CodeGen/atomic-thread-fence.c   |   184 +
 .../Incubator/CodeGen/atomic-type-casts.cpp   |    85 +
 .../CIR/Incubator/CodeGen/atomic-xchg-field.c |    85 +
 clang/test/CIR/Incubator/CodeGen/atomic.cpp   |  1464 ++
 .../CodeGen/attribute-annotate-multiple.cpp   |    83 +
 clang/test/CIR/Incubator/CodeGen/attributes.c |    22 +
 clang/test/CIR/Incubator/CodeGen/basic.c      |    54 +
 clang/test/CIR/Incubator/CodeGen/basic.cpp    |   182 +
 clang/test/CIR/Incubator/CodeGen/bf16-ops.c   |  1639 ++
 .../test/CIR/Incubator/CodeGen/binassign.cpp  |    75 +
 clang/test/CIR/Incubator/CodeGen/binop.c      |    13 +
 clang/test/CIR/Incubator/CodeGen/binop.cpp    |   118 +
 .../CIR/Incubator/CodeGen/bitfield-union.c    |    71 +
 clang/test/CIR/Incubator/CodeGen/bitfields.c  |   158 +
 .../test/CIR/Incubator/CodeGen/bitfields.cpp  |    65 +
 .../test/CIR/Incubator/CodeGen/bitfields_be.c |    79 +
 clang/test/CIR/Incubator/CodeGen/bitint.c     |    22 +
 clang/test/CIR/Incubator/CodeGen/bitint.cpp   |    85 +
 clang/test/CIR/Incubator/CodeGen/bool.c       |    39 +
 clang/test/CIR/Incubator/CodeGen/bswap.cpp    |    30 +
 .../CIR/Incubator/CodeGen/build-deferred.cpp  |    26 +
 .../CIR/Incubator/CodeGen/builtin-abort.c     |    15 +
 .../Incubator/CodeGen/builtin-addressof.cpp   |    99 +
 .../CIR/Incubator/CodeGen/builtin-alloca.c    |    62 +
 .../Incubator/CodeGen/builtin-arm-exclusive.c |    91 +
 .../CIR/Incubator/CodeGen/builtin-assume.cpp  |    92 +
 .../CIR/Incubator/CodeGen/builtin-bcopy.cpp   |    77 +
 .../Incubator/CodeGen/builtin-bit-cast.cpp    |   136 +
 .../Incubator/CodeGen/builtin-bitreverse.c    |    44 +
 .../CIR/Incubator/CodeGen/builtin-bits.cpp    |   184 +
 .../CodeGen/builtin-constant-evaluated.cpp    |    34 +
 .../Incubator/CodeGen/builtin-constant-fold.c |    28 +
 .../Incubator/CodeGen/builtin-constant-p.c    |    26 +
 .../CIR/Incubator/CodeGen/builtin-fcmp-sse.c  |   102 +
 .../CodeGen/builtin-floating-point.c          |  1916 ++
 .../CIR/Incubator/CodeGen/builtin-isfpclass.c |   129 +
 .../Incubator/CodeGen/builtin-isinf-sign.c    |    29 +
 .../CIR/Incubator/CodeGen/builtin-ms-alloca.c |    23 +
 .../Incubator/CodeGen/builtin-nontemporal.cpp |    24 +
 .../CIR/Incubator/CodeGen/builtin-prefetch.c  |    20 +
 .../CIR/Incubator/CodeGen/builtin-rotate.c    |    89 +
 .../CodeGen/builtin-setjmp-longjmp.c          |    84 +
 .../CIR/Incubator/CodeGen/builtin-signbit.c   |    32 +
 .../CIR/Incubator/CodeGen/builtin-types.c     |     6 +
 .../Incubator/CodeGen/builtin-x86-pshufd.cpp  |   113 +
 .../Incubator/CodeGen/builtin-x86-pslldqi.cpp |   176 +
 .../Incubator/CodeGen/builtin-x86-psrldqi.cpp |   177 +
 .../Incubator/CodeGen/builtins-elementwise.c  |   416 +
 .../CIR/Incubator/CodeGen/builtins-memory.c   |   260 +
 .../Incubator/CodeGen/builtins-overflow.cpp   |   364 +
 clang/test/CIR/Incubator/CodeGen/builtins.cpp |   407 +
 clang/test/CIR/Incubator/CodeGen/c11atomic.c  |    13 +
 .../CIR/Incubator/CodeGen/c89-implicit-int.c  |    10 +
 .../Incubator/CodeGen/call-extra-attrs.cpp    |    33 +
 .../Incubator/CodeGen/call-side-effect.cpp    |    25 +
 .../CodeGen/call-via-class-member-funcptr.cpp |    57 +
 clang/test/CIR/Incubator/CodeGen/call.c       |    91 +
 clang/test/CIR/Incubator/CodeGen/call.cpp     |    14 +
 .../CIR/Incubator/CodeGen/cast-lvalue.cpp     |    76 +
 clang/test/CIR/Incubator/CodeGen/cast.c       |    20 +
 clang/test/CIR/Incubator/CodeGen/cast.cpp     |   143 +
 .../test/CIR/Incubator/CodeGen/class_cast.cpp |    89 +
 .../test/CIR/Incubator/CodeGen/clear_cache.c  |    27 +
 clang/test/CIR/Incubator/CodeGen/cmp.cpp      |    18 +
 .../test/CIR/Incubator/CodeGen/cold-attr.cpp  |    19 +
 clang/test/CIR/Incubator/CodeGen/comma.cpp    |    30 +
 .../Incubator/CodeGen/complex-arithmetic.c    |   965 +
 .../Incubator/CodeGen/complex-builtins.cpp    |    21 +
 .../test/CIR/Incubator/CodeGen/complex-cast.c |   264 +
 .../CIR/Incubator/CodeGen/complex-cast.cpp    |    45 +
 .../CodeGen/complex-compound-assignment.cpp   |    56 +
 .../CIR/Incubator/CodeGen/complex-init-list.c |    44 +
 clang/test/CIR/Incubator/CodeGen/complex.c    |   414 +
 clang/test/CIR/Incubator/CodeGen/complex.cpp  |   458 +
 .../CodeGen/compound-literal-empty.c          |    18 +
 .../CIR/Incubator/CodeGen/compound-literal.c  |   106 +
 .../CodeGen/concept-specialization.cpp        |    56 +
 clang/test/CIR/Incubator/CodeGen/cond.cpp     |    45 +
 .../Incubator/CodeGen/conditional-cleanup.cpp |   230 +
 .../CIR/Incubator/CodeGen/const-alloca.cpp    |   172 +
 .../test/CIR/Incubator/CodeGen/const-array.c  |    18 +
 .../CIR/Incubator/CodeGen/const-baseclass.cpp |    34 +
 .../CIR/Incubator/CodeGen/const-bitfields.c   |    47 +
 .../CIR/Incubator/CodeGen/const-complex.cpp   |    22 +
 .../CIR/Incubator/CodeGen/constant-expr.cpp   |    42 +
 clang/test/CIR/Incubator/CodeGen/constptr.c   |     8 +
 .../Incubator/CodeGen/copy-constructor.cpp    |    93 +
 .../test/CIR/Incubator/CodeGen/coro-task.cpp  |   431 +
 clang/test/CIR/Incubator/CodeGen/count-of.c   |    52 +
 .../test/CIR/Incubator/CodeGen/ctor-alias.cpp |    40 +
 .../CodeGen/ctor-member-lvalue-to-rvalue.cpp  |    35 +
 clang/test/CIR/Incubator/CodeGen/ctor.cpp     |    32 +
 .../CIR/Incubator/CodeGen/cxx-default-arg.cpp |    12 +
 .../test/CIR/Incubator/CodeGen/cxx-traits.cpp |    20 +
 .../CodeGen/cxx1z-inline-variables.cpp        |    50 +
 .../Incubator/CodeGen/default-address-space.c |    93 +
 .../CIR/Incubator/CodeGen/default-methods.cpp |    24 +
 .../test/CIR/Incubator/CodeGen/defaultarg.cpp |    12 +
 .../CodeGen/defined-pure-virtual-func.cpp     |    58 +
 .../CIR/Incubator/CodeGen/delegating-ctor.cpp |   102 +
 .../CIR/Incubator/CodeGen/delete-array.cpp    |     8 +
 clang/test/CIR/Incubator/CodeGen/delete.cpp   |    50 +
 .../CIR/Incubator/CodeGen/derived-cast.cpp    |    40 +
 .../CIR/Incubator/CodeGen/derived-to-base.cpp |   199 +
 clang/test/CIR/Incubator/CodeGen/dlti.c       |    32 +
 .../test/CIR/Incubator/CodeGen/dtor-alias.cpp |    16 +
 .../CIR/Incubator/CodeGen/dtors-scopes.cpp    |    36 +
 clang/test/CIR/Incubator/CodeGen/dtors.cpp    |   207 +
 .../CIR/Incubator/CodeGen/dumb-record.cpp     |    64 +
 .../dynamic-alloca-with-address-space.c       |    52 +
 .../CodeGen/dynamic-cast-address-space.cpp    |    79 +
 .../Incubator/CodeGen/dynamic-cast-exact.cpp  |   130 +
 .../CodeGen/dynamic-cast-relative-layout.cpp  |    36 +
 .../CIR/Incubator/CodeGen/dynamic-cast.cpp    |    90 +
 .../CIR/Incubator/CodeGen/dynamic-cast.mlir   |   123 +
 clang/test/CIR/Incubator/CodeGen/eh.cpp       |    63 +
 .../CIR/Incubator/CodeGen/empty-try-catch.cpp |    33 +
 clang/test/CIR/Incubator/CodeGen/error-attr.c |    27 +
 .../CIR/Incubator/CodeGen/evaluate-expr.c     |    31 +
 .../CIR/Incubator/CodeGen/expressions.cpp     |    11 +
 .../CodeGen/finegrain-bitfield-access.cpp     |   271 +
 .../Incubator/CodeGen/fixedpoint-literal.c    |    40 +
 .../test/CIR/Incubator/CodeGen/float16-ops.c  |  1636 ++
 .../CIR/Incubator/CodeGen/forward-decls.cpp   |   124 +
 clang/test/CIR/Incubator/CodeGen/fp16-ops.c   |   805 +
 clang/test/CIR/Incubator/CodeGen/fullexpr.cpp |    53 +
 clang/test/CIR/Incubator/CodeGen/fun-ptr.c    |    72 +
 .../CIR/Incubator/CodeGen/func_dsolocal_pie.c |    34 +
 .../CIR/Incubator/CodeGen/function-attrs.cpp  |    37 +
 .../CodeGen/function-to-pointer-decay.c       |    13 +
 .../CIR/Incubator/CodeGen/generic-selection.c |    18 +
 .../CodeGen/global-const-record-crash.c       |    30 +
 .../CIR/Incubator/CodeGen/global-constant.c   |    16 +
 .../Incubator/CodeGen/global-ctor-dtor.cpp    |    39 +
 .../CIR/Incubator/CodeGen/global-init.cpp     |    45 +
 .../test/CIR/Incubator/CodeGen/global-new.cpp |    84 +
 .../CodeGen/globals-neg-index-array.c         |    20 +
 .../Incubator/CodeGen/globals-ref-globals.c   |   116 +
 clang/test/CIR/Incubator/CodeGen/globals.c    |   115 +
 clang/test/CIR/Incubator/CodeGen/globals.cpp  |   137 +
 .../CIR/Incubator/CodeGen/gnu-extension.c     |    19 +
 clang/test/CIR/Incubator/CodeGen/gnu-null.cpp |    28 +
 clang/test/CIR/Incubator/CodeGen/gnu89.c      |     5 +
 clang/test/CIR/Incubator/CodeGen/goto.cpp     |   380 +
 clang/test/CIR/Incubator/CodeGen/hello.c      |    22 +
 clang/test/CIR/Incubator/CodeGen/hot-attr.cpp |    16 +
 .../CIR/Incubator/CodeGen/if-consteval.cpp    |    33 +
 .../CIR/Incubator/CodeGen/if-constexpr.cpp    |    92 +
 .../CIR/Incubator/CodeGen/implicit-return.cpp |    26 +
 clang/test/CIR/Incubator/CodeGen/inc-bool.cpp |    14 +
 clang/test/CIR/Incubator/CodeGen/inc-dec.cpp  |    55 +
 .../CodeGen/inheriting-constructor.cpp        |    12 +
 .../CIR/Incubator/CodeGen/init_priority.cpp   |    20 +
 .../Incubator/CodeGen/initlist-ptr-ptr.cpp    |    84 +
 .../CodeGen/initlist-ptr-unsigned.cpp         |    65 +
 clang/test/CIR/Incubator/CodeGen/int-wrap.c   |    30 +
 clang/test/CIR/Incubator/CodeGen/int128.cpp   |    74 +
 .../CIR/Incubator/CodeGen/kr-func-promote.c   |    13 +
 .../test/CIR/Incubator/CodeGen/label-values.c |   275 +
 clang/test/CIR/Incubator/CodeGen/lalg.c       |    20 +
 clang/test/CIR/Incubator/CodeGen/lambda.cpp   |   392 +
 clang/test/CIR/Incubator/CodeGen/libc.c       |    71 +
 clang/test/CIR/Incubator/CodeGen/libcall.cpp  |    63 +
 .../CIR/Incubator/CodeGen/link-bitcode-file.c |    41 +
 clang/test/CIR/Incubator/CodeGen/linkage.c    |    34 +
 clang/test/CIR/Incubator/CodeGen/literals.c   |     9 +
 clang/test/CIR/Incubator/CodeGen/literals.cpp |     8 +
 .../test/CIR/Incubator/CodeGen/loop-scope.cpp |    32 +
 clang/test/CIR/Incubator/CodeGen/loop.cpp     |   283 +
 .../CIR/Incubator/CodeGen/lvalue-refs.cpp     |    19 +
 .../CodeGen/materialize-temporary.cpp         |    54 +
 .../Incubator/CodeGen/member-init-struct.cpp  |    40 +
 .../CIR/Incubator/CodeGen/mms-bitfields.c     |    66 +
 clang/test/CIR/Incubator/CodeGen/module-asm.c |     6 +
 clang/test/CIR/Incubator/CodeGen/move.cpp     |    59 +
 .../Incubator/CodeGen/ms-intrinsics-other.c   |    55 +
 .../CIR/Incubator/CodeGen/multi-vtable.cpp    |   130 +
 clang/test/CIR/Incubator/CodeGen/new-null.cpp |   122 +
 clang/test/CIR/Incubator/CodeGen/new.cpp      |   370 +
 clang/test/CIR/Incubator/CodeGen/no-common.c  |    16 +
 clang/test/CIR/Incubator/CodeGen/no-pie.c     |    11 +
 .../CIR/Incubator/CodeGen/no-proto-fun-ptr.c  |    27 +
 .../Incubator/CodeGen/no-proto-is-void.cpp    |    13 +
 .../test/CIR/Incubator/CodeGen/no-prototype.c |    84 +
 .../Incubator/CodeGen/no-unique-address.cpp   |    40 +
 clang/test/CIR/Incubator/CodeGen/noexcept.cpp |    36 +
 .../CodeGen/non-odr-use-constant.cpp          |    66 +
 .../Incubator/CodeGen/nonzeroinit-struct.cpp  |    23 +
 clang/test/CIR/Incubator/CodeGen/nrvo-eh.cpp  |   127 +
 clang/test/CIR/Incubator/CodeGen/nrvo.cpp     |    81 +
 .../CodeGen/null-arithmatic-expression.c      |    12 +
 .../CIR/Incubator/CodeGen/nullptr-init.cpp    |    76 +
 clang/test/CIR/Incubator/CodeGen/offsetof.c   |    18 +
 clang/test/CIR/Incubator/CodeGen/ofstream.cpp |    87 +
 clang/test/CIR/Incubator/CodeGen/opaque.c     |    12 +
 clang/test/CIR/Incubator/CodeGen/opaque.cpp   |   152 +
 .../test/CIR/Incubator/CodeGen/operators.cpp  |    14 +
 .../Incubator/CodeGen/optimization-attr.cpp   |    32 +
 clang/test/CIR/Incubator/CodeGen/optnone.cpp  |    27 +
 .../CIR/Incubator/CodeGen/pack-indexing.cpp   |    38 +
 .../CIR/Incubator/CodeGen/packed-structs.c    |   133 +
 .../CIR/Incubator/CodeGen/paren-list-init.cpp |    69 +
 .../CIR/Incubator/CodeGen/pass-object-size.c  |    29 +
 .../CIR/Incubator/CodeGen/pointer-arith-ext.c |   126 +
 .../CodeGen/pointer-to-data-member-cast.cpp   |    98 +
 .../CodeGen/pointer-to-data-member-cmp.cpp    |    44 +
 .../CodeGen/pointer-to-data-member.cpp        |    62 +
 .../CodeGen/pointer-to-member-func.cpp        |   242 +
 clang/test/CIR/Incubator/CodeGen/pointer.cpp  |     6 +
 clang/test/CIR/Incubator/CodeGen/pointers.cpp |    49 +
 .../Incubator/CodeGen/pred-info-builtins.c    |    40 +
 .../test/CIR/Incubator/CodeGen/predefined.cpp |    22 +
 clang/test/CIR/Incubator/CodeGen/ptrdiff.c    |    18 +
 clang/test/CIR/Incubator/CodeGen/ptrdiff.cpp  |    23 +
 clang/test/CIR/Incubator/CodeGen/rangefor.cpp |    71 +
 .../CIR/Incubator/CodeGen/requires-expr.cpp   |    75 +
 clang/test/CIR/Incubator/CodeGen/return.cpp   |    66 +
 .../Incubator/CodeGen/same-mangled-name.cpp   |    15 +
 .../CodeGen/scalar_to_scalar_bitcast.cl       |    24 +
 .../CodeGen/scoped-atomic-load-store.c        |    35 +
 clang/test/CIR/Incubator/CodeGen/shift.cpp    |     8 +
 .../CIR/Incubator/CodeGen/sizeof-pack.cpp     |    36 +
 .../skip-functions-from-system-headers.cpp    |    17 +
 .../CIR/Incubator/CodeGen/source-loc-expr.cpp |   126 +
 .../CodeGen/source-location-scope.cpp         |    61 +
 .../CIR/Incubator/CodeGen/sourcelocation.cpp  |    90 +
 .../CodeGen/special-virtual-func.cpp          |    16 +
 .../Incubator/CodeGen/spelling-locations.cpp  |   100 +
 .../test/CIR/Incubator/CodeGen/static-vars.c  |    50 +
 .../CIR/Incubator/CodeGen/static-vars.cpp     |    49 +
 clang/test/CIR/Incubator/CodeGen/static.cpp   |    90 +
 .../Incubator/CodeGen/static_class_ref.cpp    |    31 +
 .../test/CIR/Incubator/CodeGen/std-array.cpp  |    13 +
 clang/test/CIR/Incubator/CodeGen/std-find.cpp |    27 +
 clang/test/CIR/Incubator/CodeGen/stmt-expr.c  |    42 +
 .../test/CIR/Incubator/CodeGen/stmt-expr.cpp  |    31 +
 .../CIR/Incubator/CodeGen/stmtexpr-init.c     |    48 +
 clang/test/CIR/Incubator/CodeGen/store.c      |    29 +
 .../CIR/Incubator/CodeGen/string-literals.c   |    24 +
 .../test/CIR/Incubator/CodeGen/struct-comma.c |    14 +
 .../test/CIR/Incubator/CodeGen/struct-empty.c |    23 +
 clang/test/CIR/Incubator/CodeGen/struct.c     |   119 +
 clang/test/CIR/Incubator/CodeGen/struct.cpp   |   230 +
 .../Incubator/CodeGen/structural-binding.cpp  |   112 +
 .../CIR/Incubator/CodeGen/switch-gnurange.cpp |   348 +
 .../switch-unreachable-after-break.cpp        |    49 +
 clang/test/CIR/Incubator/CodeGen/switch.cpp   |   381 +
 .../CodeGen/synthetic-try-resume.cpp          |    89 +
 .../test/CIR/Incubator/CodeGen/tbaa-bitinit.c |    26 +
 clang/test/CIR/Incubator/CodeGen/tbaa-enum.c  |   148 +
 .../test/CIR/Incubator/CodeGen/tbaa-enum.cpp  |   156 +
 .../CIR/Incubator/CodeGen/tbaa-pointer.cpp    |   126 +
 .../test/CIR/Incubator/CodeGen/tbaa-scalar.c  |   143 +
 .../CIR/Incubator/CodeGen/tbaa-struct.cpp     |   403 +
 clang/test/CIR/Incubator/CodeGen/tbaa-union.c |    32 +
 .../test/CIR/Incubator/CodeGen/tbaa-vptr.cpp  |    34 +
 .../CIR/Incubator/CodeGen/temporaries.cpp     |    65 +
 .../CodeGen/temporary-materialization.cpp     |    43 +
 clang/test/CIR/Incubator/CodeGen/tempref.cpp  |    42 +
 clang/test/CIR/Incubator/CodeGen/ternary.c    |    22 +
 clang/test/CIR/Incubator/CodeGen/ternary.cpp  |    97 +
 .../CIR/Incubator/CodeGen/thread-local.cpp    |    65 +
 .../CodeGen/three-way-comparison.cpp          |    68 +
 clang/test/CIR/Incubator/CodeGen/throw.cpp    |   434 +
 clang/test/CIR/Incubator/CodeGen/tls.c        |    19 +
 clang/test/CIR/Incubator/CodeGen/trap.cpp     |    28 +
 .../CIR/Incubator/CodeGen/trivial-copy.cpp    |    21 +
 .../CIR/Incubator/CodeGen/try-catch-dtors.cpp |   454 +
 .../test/CIR/Incubator/CodeGen/try-catch.cpp  |   207 +
 .../test/CIR/Incubator/CodeGen/type-trait.cpp |    29 +
 clang/test/CIR/Incubator/CodeGen/typedef.c    |    10 +
 clang/test/CIR/Incubator/CodeGen/typeinfo     |    24 +
 .../CIR/Incubator/CodeGen/types-IEEE-quad.c   |    32 +
 .../CIR/Incubator/CodeGen/types-nullptr.cpp   |     9 +
 clang/test/CIR/Incubator/CodeGen/types.c      |    46 +
 .../CIR/Incubator/CodeGen/unary-deref.cpp     |    17 +
 clang/test/CIR/Incubator/CodeGen/unary.c      |    44 +
 clang/test/CIR/Incubator/CodeGen/unary.cpp    |   232 +
 .../test/CIR/Incubator/CodeGen/union-array.c  |    59 +
 .../CIR/Incubator/CodeGen/union-empty.cpp     |    22 +
 clang/test/CIR/Incubator/CodeGen/union-init.c |    63 +
 .../CIR/Incubator/CodeGen/union-padding.c     |    32 +
 clang/test/CIR/Incubator/CodeGen/union.cpp    |    91 +
 .../CIR/Incubator/CodeGen/unreachable.cpp     |    28 +
 clang/test/CIR/Incubator/CodeGen/uwtable.cpp  |    56 +
 .../CIR/Incubator/CodeGen/var-arg-float.c     |   118 +
 .../CIR/Incubator/CodeGen/var-arg-scope.c     |   105 +
 clang/test/CIR/Incubator/CodeGen/var-arg.c    |   121 +
 .../CIR/Incubator/CodeGen/variadic-ctor.cpp   |    11 +
 clang/test/CIR/Incubator/CodeGen/variadics.c  |    38 +
 clang/test/CIR/Incubator/CodeGen/vbase.cpp    |    50 +
 .../Incubator/CodeGen/vector-ext-element.cpp  |   342 +
 clang/test/CIR/Incubator/CodeGen/vector.cpp   |    35 +
 .../CIR/Incubator/CodeGen/vectype-ext.cpp     |   608 +
 .../CIR/Incubator/CodeGen/vectype-issized.c   |    15 +
 clang/test/CIR/Incubator/CodeGen/vectype.cpp  |   248 +
 .../CIR/Incubator/CodeGen/verbose-trap.cpp    |    65 +
 .../Incubator/CodeGen/virtual-base-cast.cpp   |    61 +
 .../CodeGen/virtual-destructor-calls.cpp      |    92 +
 ...l-destructor-explicit-unqualified-call.cpp |    58 +
 .../CodeGen/virtual-function-calls.cpp        |    59 +
 .../Incubator/CodeGen/visibility-attribute.c  |    55 +
 clang/test/CIR/Incubator/CodeGen/vla.c        |   191 +
 clang/test/CIR/Incubator/CodeGen/volatile.cpp |    70 +
 .../CodeGen/vtable-available-externally.cpp   |    24 +
 .../CodeGen/vtable-comdat-divergence.cpp      |    36 +
 .../CIR/Incubator/CodeGen/vtable-emission.cpp |    30 +
 .../CIR/Incubator/CodeGen/vtable-rtti.cpp     |   104 +
 .../CodeGen/vtable-thunk-compare-codegen.cpp  |    47 +
 .../CodeGen/vtable-thunk-destructor.cpp       |    86 +
 .../CodeGen/vtable-thunk-edge-cases.cpp       |   178 +
 .../CodeGen/vtable-thunk-multibase.cpp        |    67 +
 .../vtable-thunk-virtual-inheritance.cpp      |    74 +
 .../CIR/Incubator/CodeGen/vtable-thunk.cpp    |   111 +
 .../vtable-unnamed-addr-divergence.cpp        |    36 +
 clang/test/CIR/Incubator/CodeGen/vtt.cpp      |   185 +
 clang/test/CIR/Incubator/CodeGen/weak.c       |    29 +
 .../CIR/Incubator/CodeGen/wide-string.cpp     |    26 +
 clang/test/CIR/Incubator/Driver/callconv.cpp  |     4 +
 .../CIR/Incubator/Driver/idiom-recognizer.cpp |     2 +
 clang/test/CIR/Incubator/Driver/lib-opt.cpp   |     3 +
 clang/test/CIR/Incubator/Driver/move-opt.cpp  |     2 +
 clang/test/CIR/Incubator/IR/address-space.cir |    40 +
 clang/test/CIR/Incubator/IR/aliases.cir       |    14 +
 clang/test/CIR/Incubator/IR/alloca.cir        |    21 +
 clang/test/CIR/Incubator/IR/annotations.cir   |    37 +
 clang/test/CIR/Incubator/IR/array.cir         |    13 +
 clang/test/CIR/Incubator/IR/attribute.cir     |    25 +
 clang/test/CIR/Incubator/IR/await.cir         |    21 +
 .../Incubator/IR/being_and_nothingness.cir    |    19 +
 clang/test/CIR/Incubator/IR/bit.cir           |    75 +
 clang/test/CIR/Incubator/IR/block-adress.cir  |    34 +
 clang/test/CIR/Incubator/IR/branch.cir        |    21 +
 clang/test/CIR/Incubator/IR/builtins.cir      |    16 +
 .../CIR/Incubator/IR/call-op-call-conv.cir    |    26 +
 clang/test/CIR/Incubator/IR/call.cir          |    30 +
 clang/test/CIR/Incubator/IR/cast.cir          |    33 +
 clang/test/CIR/Incubator/IR/cir-ops.cir       |   121 +
 clang/test/CIR/Incubator/IR/cold.cir          |    20 +
 clang/test/CIR/Incubator/IR/constptrattr.cir  |    10 +
 clang/test/CIR/Incubator/IR/copy.cir          |    10 +
 .../CIR/Incubator/IR/cxx-special-member.cir   |    21 +
 .../test/CIR/Incubator/IR/data-member-ptr.cir |    32 +
 clang/test/CIR/Incubator/IR/do-while.cir      |    17 +
 clang/test/CIR/Incubator/IR/dynamic-cast.cir  |    59 +
 clang/test/CIR/Incubator/IR/exceptions.cir    |    69 +
 clang/test/CIR/Incubator/IR/float.cir         |    90 +
 clang/test/CIR/Incubator/IR/for.cir           |    21 +
 .../test/CIR/Incubator/IR/func-call-conv.cir  |    24 +
 .../CIR/Incubator/IR/func-dsolocal-parser.cir |    13 +
 clang/test/CIR/Incubator/IR/func.cir          |    57 +
 clang/test/CIR/Incubator/IR/getmember.cir     |    14 +
 clang/test/CIR/Incubator/IR/global.cir        |   113 +
 clang/test/CIR/Incubator/IR/indirect-br.cir   |    46 +
 clang/test/CIR/Incubator/IR/inlineAttr.cir    |    12 +
 clang/test/CIR/Incubator/IR/int.cir           |    39 +
 .../CIR/Incubator/IR/invalid-annotations.cir  |    32 +
 .../Incubator/IR/invalid-block-address.cir    |    21 +
 .../test/CIR/Incubator/IR/invalid-complex.cir |    23 +
 .../IR/invalid-opencl-vec-type-hint.cir       |     7 +
 .../CIR/Incubator/IR/invalid-type-info.cir    |    17 +
 .../IR/invalid-vector-shuffle-wrong-index.cir |    16 +
 .../Incubator/IR/invalid-vector-zero-size.cir |    10 +
 .../test/CIR/Incubator/IR/invalid-vector.cir  |    10 +
 clang/test/CIR/Incubator/IR/invalid.cir       |  1664 ++
 clang/test/CIR/Incubator/IR/libc-fabs.cir     |    10 +
 clang/test/CIR/Incubator/IR/libc-memchr.cir   |    12 +
 clang/test/CIR/Incubator/IR/libc-memcpy.cir   |    10 +
 .../test/CIR/Incubator/IR/llvm-intrinsic.cir  |    11 +
 clang/test/CIR/Incubator/IR/module.cir        |    11 +
 clang/test/CIR/Incubator/IR/ptr_stride.cir    |    39 +
 .../Incubator/IR/resume-location-parsing.cir  |    62 +
 clang/test/CIR/Incubator/IR/scope.cir         |    56 +
 clang/test/CIR/Incubator/IR/side-effect.cir   |    20 +
 .../CIR/Incubator/IR/stack-save-restore.cir   |    23 +
 clang/test/CIR/Incubator/IR/struct.cir        |    40 +
 clang/test/CIR/Incubator/IR/switch.cir        |    38 +
 clang/test/CIR/Incubator/IR/tbaa-parse.cir    |    28 +
 clang/test/CIR/Incubator/IR/ternary.cir       |    30 +
 clang/test/CIR/Incubator/IR/try.cir           |    22 +
 clang/test/CIR/Incubator/IR/types.cir         |    13 +
 clang/test/CIR/Incubator/IR/unreachable.cir   |     8 +
 clang/test/CIR/Incubator/IR/vtableAttr.cir    |     7 +
 clang/test/CIR/Incubator/IR/while.cir         |    17 +
 .../CIR/Incubator/Inputs/skip-this-header.h   |    12 +
 clang/test/CIR/Incubator/Inputs/std-cxx.h     |  1321 ++
 clang/test/CIR/Incubator/Inputs/typeinfo      |    24 +
 .../CIR/Incubator/Lowering/OpenMP/barrier.cir |    15 +
 .../Incubator/Lowering/OpenMP/parallel.cir    |    35 +
 .../Incubator/Lowering/OpenMP/taskwait.cir    |    14 +
 .../Incubator/Lowering/OpenMP/taskyield.cir   |    14 +
 .../Incubator/Lowering/ThroughMLIR/abs.cir    |    23 +
 .../Incubator/Lowering/ThroughMLIR/acos.cir   |    30 +
 .../ThroughMLIR/address-space-mlir.cir        |    67 +
 .../Incubator/Lowering/ThroughMLIR/array.c    |    57 +
 .../Incubator/Lowering/ThroughMLIR/array.cir  |    17 +
 .../Incubator/Lowering/ThroughMLIR/asin.cir   |    30 +
 .../Incubator/Lowering/ThroughMLIR/atan.cir   |    30 +
 .../Incubator/Lowering/ThroughMLIR/binop.cpp  |    77 +
 .../CIR/Incubator/Lowering/ThroughMLIR/bit.c  |   133 +
 .../Incubator/Lowering/ThroughMLIR/bool.cir   |    25 +
 .../Lowering/ThroughMLIR/br-with-arg.cir      |    15 +
 .../Incubator/Lowering/ThroughMLIR/branch.cir |    35 +
 .../CIR/Incubator/Lowering/ThroughMLIR/call.c |    52 +
 .../Incubator/Lowering/ThroughMLIR/cast.cir   |   147 +
 .../Incubator/Lowering/ThroughMLIR/ceil.cir   |    30 +
 .../Lowering/ThroughMLIR/cl-kernel.cir        |    14 +
 .../Incubator/Lowering/ThroughMLIR/cmp.cpp    |   182 +
 .../Incubator/Lowering/ThroughMLIR/cos.cir    |    30 +
 .../Incubator/Lowering/ThroughMLIR/doWhile.c  |    96 +
 .../Incubator/Lowering/ThroughMLIR/dot.cir    |    29 +
 .../Incubator/Lowering/ThroughMLIR/exp.cir    |    30 +
 .../Incubator/Lowering/ThroughMLIR/fabs.cir   |    30 +
 .../Incubator/Lowering/ThroughMLIR/float.cir  |    23 +
 .../Incubator/Lowering/ThroughMLIR/floor.cir  |    30 +
 .../Lowering/ThroughMLIR/for-reject-1.cpp     |     9 +
 .../Lowering/ThroughMLIR/for-reject-2.cpp     |     7 +
 .../Lowering/ThroughMLIR/for-reject.cpp       |    74 +
 .../Incubator/Lowering/ThroughMLIR/for.cpp    |   111 +
 .../ThroughMLIR/for_with_continue.cpp         |    19 +
 .../ThroughMLIR/function-attributes.c         |     9 +
 .../Incubator/Lowering/ThroughMLIR/global.cir |    55 +
 .../Incubator/Lowering/ThroughMLIR/global.cpp |    17 +
 .../Incubator/Lowering/ThroughMLIR/goto.cir   |    35 +
 .../CIR/Incubator/Lowering/ThroughMLIR/if.c   |   117 +
 .../Incubator/Lowering/ThroughMLIR/log.cir    |    30 +
 .../Incubator/Lowering/ThroughMLIR/memref.cir |    40 +
 .../Lowering/ThroughMLIR/ptr-arg.cir          |    46 +
 .../Lowering/ThroughMLIR/ptrstride-ptr.cir    |    40 +
 .../Lowering/ThroughMLIR/ptrstride.cir        |    78 +
 .../Incubator/Lowering/ThroughMLIR/round.cir  |    30 +
 .../Incubator/Lowering/ThroughMLIR/scope.cir  |    50 +
 .../Incubator/Lowering/ThroughMLIR/select.cir |    32 +
 .../Incubator/Lowering/ThroughMLIR/shift.cir  |    31 +
 .../Incubator/Lowering/ThroughMLIR/sin.cir    |    30 +
 .../Incubator/Lowering/ThroughMLIR/sqrt.cir   |    30 +
 .../ThroughMLIR/store-memcpy-mlir.cpp         |    12 +
 .../Incubator/Lowering/ThroughMLIR/tan.cir    |    30 +
 .../Incubator/Lowering/ThroughMLIR/tenary.cir |    43 +
 .../Lowering/ThroughMLIR/unary-inc-dec.cir    |    48 +
 .../Lowering/ThroughMLIR/unary-plus-minus.cir |    40 +
 .../Lowering/ThroughMLIR/unreachable.cir      |    19 +
 .../Lowering/ThroughMLIR/vectype.cpp          |   176 +
 .../Incubator/Lowering/ThroughMLIR/vtable.cir |    73 +
 .../ThroughMLIR/while-with-continue.cpp       |   106 +
 .../Incubator/Lowering/ThroughMLIR/while.c    |    87 +
 .../CIR/Incubator/Lowering/address-space.cir  |    59 +
 clang/test/CIR/Incubator/Lowering/alloca.cir  |    17 +
 .../CIR/Incubator/Lowering/applearm64-new.cpp |    41 +
 .../test/CIR/Incubator/Lowering/array-init.c  |    31 +
 clang/test/CIR/Incubator/Lowering/array.cir   |    35 +
 clang/test/CIR/Incubator/Lowering/asm.cir     |    55 +
 .../CIR/Incubator/Lowering/atomic-runtime.cpp |    37 +
 .../Incubator/Lowering/attribute-lowering.cir |    23 +
 .../CIR/Incubator/Lowering/binop-bool.cir     |    18 +
 .../test/CIR/Incubator/Lowering/binop-fp.cir  |    68 +
 .../CIR/Incubator/Lowering/binop-overflow.cir |    63 +
 .../Incubator/Lowering/binop-signed-int.cir   |    76 +
 .../Incubator/Lowering/binop-unsigned-int.cir |    92 +
 clang/test/CIR/Incubator/Lowering/bit.cir     |   189 +
 clang/test/CIR/Incubator/Lowering/bitfieils.c |    32 +
 clang/test/CIR/Incubator/Lowering/bitint.cir  |    30 +
 .../CIR/Incubator/Lowering/bool-to-int.cir    |    21 +
 clang/test/CIR/Incubator/Lowering/bool.cir    |    30 +
 clang/test/CIR/Incubator/Lowering/branch.cir  |    35 +
 clang/test/CIR/Incubator/Lowering/brcond.cir  |    43 +
 clang/test/CIR/Incubator/Lowering/bswap.cir   |    19 +
 .../Incubator/Lowering/builtin-binary-fp2fp.c |   194 +
 .../Lowering/builtin-floating-point.cir       |   197 +
 .../Incubator/Lowering/builtin-isfpclass.c    |   125 +
 .../Incubator/Lowering/call-op-call-conv.cir  |    19 +
 clang/test/CIR/Incubator/Lowering/call.cir    |   121 +
 clang/test/CIR/Incubator/Lowering/cast.cir    |   111 +
 clang/test/CIR/Incubator/Lowering/class.cir   |    96 +
 clang/test/CIR/Incubator/Lowering/cmp.cir     |    78 +
 clang/test/CIR/Incubator/Lowering/cmp3way.cir |    40 +
 clang/test/CIR/Incubator/Lowering/complex.cir |    15 +
 .../CIR/Incubator/Lowering/const-array.cir    |    20 +
 clang/test/CIR/Incubator/Lowering/const.cir   |    86 +
 .../CIR/Incubator/Lowering/data-member.cir    |    57 +
 .../test/CIR/Incubator/Lowering/debug-info.c  |    19 +
 .../CIR/Incubator/Lowering/delete-array.cpp   |    19 +
 .../Incubator/Lowering/derived-to-base.cpp    |    27 +
 clang/test/CIR/Incubator/Lowering/dot.cir     |   111 +
 .../CIR/Incubator/Lowering/exceptions.cir     |   108 +
 clang/test/CIR/Incubator/Lowering/expect.cir  |    54 +
 clang/test/CIR/Incubator/Lowering/float.cir   |    18 +
 .../CIR/Incubator/Lowering/func-call-conv.cir |    20 +
 clang/test/CIR/Incubator/Lowering/func.cir    |    17 +
 .../test/CIR/Incubator/Lowering/global-ptr.c  |    55 +
 clang/test/CIR/Incubator/Lowering/globals.cir |   218 +
 .../CIR/Incubator/Lowering/goto-interscope.c  |    32 +
 clang/test/CIR/Incubator/Lowering/goto.cir    |    52 +
 clang/test/CIR/Incubator/Lowering/hello.cir   |    35 +
 clang/test/CIR/Incubator/Lowering/if.cir      |    99 +
 .../test/CIR/Incubator/Lowering/int-wrap.cir  |    24 +
 .../CIR/Incubator/Lowering/intrinsics.cir     |    23 +
 clang/test/CIR/Incubator/Lowering/libc.cir    |    18 +
 .../CIR/Incubator/Lowering/linker-options.cir |     9 +
 .../Incubator/Lowering/loadstorealloca.cir    |    57 +
 clang/test/CIR/Incubator/Lowering/loop.cir    |   126 +
 .../Incubator/Lowering/loops-with-break.cir   |   269 +
 .../Lowering/loops-with-continue.cir          |   265 +
 .../CIR/Incubator/Lowering/module-asm.cir     |    11 +
 .../test/CIR/Incubator/Lowering/multi-array.c |    58 +
 .../CIR/Incubator/Lowering/nested-switch.cpp  |    69 +
 .../Incubator/Lowering/nested-union-array.c   |    28 +
 clang/test/CIR/Incubator/Lowering/new.cpp     |   237 +
 clang/test/CIR/Incubator/Lowering/ptrdiff.cir |    18 +
 .../test/CIR/Incubator/Lowering/ptrstride.cir |    44 +
 .../Incubator/Lowering/region-simplify.cir    |    38 +
 .../CIR/Incubator/Lowering/resume-flat.cir    |    30 +
 clang/test/CIR/Incubator/Lowering/scope.cir   |    78 +
 clang/test/CIR/Incubator/Lowering/select.cir  |    48 +
 .../CIR/Incubator/Lowering/setjmp-longjmp.cir |    37 +
 clang/test/CIR/Incubator/Lowering/shift.cir   |    28 +
 .../Incubator/Lowering/stack-save-restore.cir |    19 +
 .../CIR/Incubator/Lowering/static-array.c     |     8 +
 .../CIR/Incubator/Lowering/store-memcpy.cpp   |    21 +
 clang/test/CIR/Incubator/Lowering/str.c       |     9 +
 .../test/CIR/Incubator/Lowering/struct-init.c |    13 +
 clang/test/CIR/Incubator/Lowering/struct.cir  |   130 +
 .../CIR/Incubator/Lowering/switch-while.c     |    84 +
 clang/test/CIR/Incubator/Lowering/switch.cir  |   190 +
 .../test/CIR/Incubator/Lowering/syncscope.cir |    29 +
 clang/test/CIR/Incubator/Lowering/ternary.cir |   111 +
 .../test/CIR/Incubator/Lowering/try-catch.cpp |   112 +
 clang/test/CIR/Incubator/Lowering/types.cir   |    18 +
 .../CIR/Incubator/Lowering/unary-inc-dec.cir  |    63 +
 .../test/CIR/Incubator/Lowering/unary-not.cir |    78 +
 .../Incubator/Lowering/unary-plus-minus.cir   |    43 +
 .../Incubator/Lowering/union-in-struct-init.c |    51 +
 clang/test/CIR/Incubator/Lowering/unions.cir  |    43 +
 .../CIR/Incubator/Lowering/var-arg-x86_64.c   |   210 +
 .../test/CIR/Incubator/Lowering/variadics.cir |    40 +
 clang/test/CIR/Incubator/Lowering/vec-cmp.cir |    16 +
 clang/test/CIR/Incubator/Lowering/vectype.cpp |   349 +
 .../CIR/Incubator/Lowering/vtable-thunk.cpp   |    55 +
 .../cir-translate/cir-translate-triple.cir    |    11 +
 .../has-triple-and-data-layout.cir            |    24 +
 .../has-triple-no-data-layout.cir             |    23 +
 .../invalid-translate-triple.cir              |     8 +
 .../no-triple-has-data-layout.cir             |    23 +
 .../no-triple-no-data-layout.cir              |    21 +
 .../cir-translate/warn-default-triple.cir     |     8 +
 .../Incubator/Transforms/ABILowering/cast.cir |    53 +
 .../Incubator/Transforms/ABILowering/cmp.cir  |    41 +
 .../Transforms/ABILowering/const.cir          |    40 +
 .../Incubator/Transforms/ABILowering/func.cir |    28 +
 .../Transforms/ABILowering/global.cir         |    22 +
 .../Transforms/ABILowering/member-ptr.cir     |   106 +
 .../Incubator/Transforms/Inputs/folly-coro.h  |    44 +
 .../CIR/Incubator/Transforms/Inputs/std.h     |    29 +
 .../CIR/Incubator/Transforms/Inputs/string.h  |    11 +
 .../Incubator/Transforms/builtin-assume.cir   |    38 +
 .../CIR/Incubator/Transforms/complex-fold.cir |    64 +
 .../CIR/Incubator/Transforms/goto_solver.cir  |    63 +
 .../CIR/Incubator/Transforms/idiom-iter.cpp   |    21 +
 .../Incubator/Transforms/idiom-recognizer.cpp |    49 +
 .../CIR/Incubator/Transforms/idiom-string.c   |    15 +
 clang/test/CIR/Incubator/Transforms/if.cir    |    48 +
 .../CIR/Incubator/Transforms/lib-opt-find.cpp |    66 +
 .../Incubator/Transforms/lib-opt-string.cpp   |    91 +
 .../test/CIR/Incubator/Transforms/lib-opt.cpp |     3 +
 .../Transforms/lifetime-check-agg.cpp         |    74 +
 .../Transforms/lifetime-check-coro-task.cpp   |    35 +
 .../Transforms/lifetime-check-lambda.cpp      |    35 +
 .../Transforms/lifetime-check-owner.cpp       |    71 +
 .../lifetime-check-range-for-vector.cpp       |    30 +
 .../Transforms/lifetime-check-remarks.cpp     |    39 +
 ...ifetime-check-smart-pointer-after-move.cpp |   239 +
 .../Transforms/lifetime-check-string.cpp      |    87 +
 .../lifetime-check-use-after-move.cpp         |   157 +
 .../Incubator/Transforms/lifetime-check.cpp   |    48 +
 .../Incubator/Transforms/lifetime-fn-args.cpp |    12 +
 .../Transforms/lifetime-invalid-option.cpp    |     7 +
 .../Transforms/lifetime-loop-valid.cpp        |    38 +
 .../Incubator/Transforms/lifetime-loop.cpp    |    56 +
 .../Transforms/lifetime-null-passing.cpp      |    23 +
 .../Incubator/Transforms/lifetime-switch.cpp  |    46 +
 .../Incubator/Transforms/lifetime-this.cpp    |    12 +
 .../Transforms/live-object-analysis.cir       |   158 +
 .../Transforms/live-object-analysis.cpp       |    72 +
 clang/test/CIR/Incubator/Transforms/loop.cir  |   122 +
 clang/test/CIR/Incubator/Transforms/mem2reg.c |   191 +
 .../test/CIR/Incubator/Transforms/mem2reg.cir |    31 +
 .../Incubator/Transforms/merge-cleanups.cir   |   150 +
 .../CIR/Incubator/Transforms/move-opt.cpp     |   159 +
 .../Transforms/points-to-analysis.cir         |   124 +
 .../Transforms/points-to-analysis.cpp         |    55 +
 .../CIR/Incubator/Transforms/scf-prepare.cir  |   206 +
 clang/test/CIR/Incubator/Transforms/scope.cir |    60 +
 .../test/CIR/Incubator/Transforms/select.cir  |    60 +
 .../Transforms/setjmp-longjmp-lower.c         |    74 +
 clang/test/CIR/Incubator/Transforms/simpl.c   |    38 +
 clang/test/CIR/Incubator/Transforms/simpl.cir |    55 +
 .../test/CIR/Incubator/Transforms/switch.cir  |   278 +
 .../CIR/Incubator/Transforms/ternary-fold.cir |    60 +
 .../test/CIR/Incubator/Transforms/ternary.cir |    64 +
 .../Incubator/Transforms/vector-cmp-fold.cir  |   227 +
 .../Transforms/vector-create-fold.cir         |    19 +
 .../Transforms/vector-extract-fold.cir        |    33 +
 .../vector-shuffle-dynamic-fold.cir           |    30 +
 .../Transforms/vector-shuffle.fold.cir        |    59 +
 .../CIR/Incubator/Transforms/vector-splat.cir |    16 +
 .../Transforms/vector-ternary-fold.cir        |    19 +
 clang/test/CIR/Incubator/analysis-only.cpp    |     2 +
 clang/test/CIR/Incubator/cc1.c                |    29 +
 clang/test/CIR/Incubator/cc1.cir              |    12 +
 clang/test/CIR/Incubator/cir-output.c         |    21 +
 clang/test/CIR/Incubator/cirtool.cir          |    20 +
 .../crashes/apvalue-constexpr-init.cpp        |    25 +
 .../crashes/array-new-default-arg.cpp         |    37 +
 .../CIR/Incubator/crashes/async-future.cpp    |    12 +
 .../crashes/bitfield-bool-int-cast.cpp        |    14 +
 .../crashes/cleanup-892-null-fixups.cpp       |    31 +
 .../Incubator/crashes/cleanup-unreachable.cpp |    36 +
 .../Incubator/crashes/computed-goto-nyi.cpp   |    45 +
 .../conditional-return-destructors.cpp        |    28 +
 .../CIR/Incubator/crashes/constexpr-cast.cpp  |    19 +
 .../constexpr-complex-template-metaprog.cpp   |    69 +
 .../CIR/Incubator/crashes/copy-on-catch.cpp   |    16 +
 .../Incubator/crashes/dyncast-assertion.cpp   |    23 +
 .../crashes/exception-handling-nyi.cpp        |    49 +
 .../CIR/Incubator/crashes/exception-ptr.cpp   |    12 +
 .../crashes/filesystem-sd-automatic.cpp       |    16 +
 .../crashes/function-ref-pointer-params.cpp   |    16 +
 .../crashes/multi-inheritance-thunk-crash.cpp |    47 +
 .../crashes/range-for-temp-automatic.cpp      |    50 +
 .../Incubator/crashes/ref-temp-automatic.cpp  |    16 +
 .../crashes/static-init-recursion.cpp         |    12 +
 .../crashes/static-local-destructor.cpp       |    18 +
 .../crashes/static-local-guard-nyi.cpp        |    45 +
 .../crashes/static-local-used-attribute.cpp   |    12 +
 .../Incubator/crashes/static-var-dyn-cast.cpp |    19 +
 .../crashes/static-var-guarded-init.cpp       |    14 +
 .../crashes/template-syntax-error.cpp         |    15 +
 .../CIR/Incubator/crashes/tls-destructor.cpp  |    14 +
 .../crashes/verification-block-terminator.cpp |    20 +
 .../crashes/virtual-base-constructor.cpp      |    17 +
 .../crashes/virtual-inheritance-crash.cpp     |    49 +
 .../crashes/virtual-method-global-dtor.cpp    |    16 +
 .../array-new-delete-divergences.cpp          |    55 +
 .../calling-conv-12byte-struct.cpp            |    33 +
 .../calling-conv-16byte-struct.cpp            |    33 +
 .../calling-conv-20byte-struct.cpp            |    29 +
 .../divergences/calling-conv-4byte-struct.cpp |    33 +
 .../calling-conv-aligned-struct.cpp           |    26 +
 .../calling-conv-array-in-struct.cpp          |    30 +
 .../calling-conv-bitfield-struct.cpp          |    27 +
 .../calling-conv-bool-in-struct.cpp           |    31 +
 .../divergences/calling-conv-empty-struct.cpp |    27 +
 .../calling-conv-longlong-struct.cpp          |    29 +
 .../calling-conv-multiple-struct-params.cpp   |    29 +
 .../calling-conv-nested-struct.cpp            |    34 +
 .../calling-conv-packed-struct.cpp            |    31 +
 .../calling-conv-pointer-in-struct.cpp        |    30 +
 .../calling-conv-two-longlongs.cpp            |    29 +
 .../CIR/Incubator/divergences/ctor-copy.cpp   |    27 +
 .../divergences/ctor-deep-inheritance.cpp     |    32 +
 .../Incubator/divergences/ctor-delegating.cpp |    28 +
 .../Incubator/divergences/ctor-inherited.cpp  |    29 +
 .../divergences/ctor-member-init-list.cpp     |    31 +
 .../CIR/Incubator/divergences/ctor-move.cpp   |    31 +
 .../divergences/ctor-multiple-inheritance.cpp |    32 +
 .../divergences/ctor-parameterized.cpp        |    25 +
 .../divergences/float-double-struct.cpp       |    27 +
 .../divergences/float-mixed-int-float.cpp     |    29 +
 .../divergences/float-single-float-struct.cpp |    27 +
 .../divergences/float-struct-calling-conv.cpp |   105 +
 .../divergences/float-two-floats-struct.cpp   |    27 +
 .../divergences/global-constructor.cpp        |    27 +
 .../divergences/inheritance-diamond.cpp       |    36 +
 .../divergences/inheritance-empty-base.cpp    |    26 +
 .../inheritance-missing-comdat.cpp            |   108 +
 .../divergences/inheritance-private.cpp       |    28 +
 .../divergences/inheritance-protected.cpp     |    28 +
 .../inline-ctor-dtor-missing-comdat.cpp       |   126 +
 .../divergences/lambda-capture-by-ref.cpp     |    21 +
 .../divergences/lambda-capture-by-value.cpp   |    21 +
 .../divergences/lambda-missing-comdat.cpp     |    60 +
 .../Incubator/divergences/lambda-mutable.cpp  |    21 +
 .../divergences/lambda-returning-struct.cpp   |    24 +
 .../Incubator/divergences/lambda-simple.cpp   |    20 +
 .../divergences/lambda-with-params.cpp        |    20 +
 .../member-ptr-abi-calling-conv.cpp           |    46 +
 .../divergences/member-ptr-array.cpp          |    27 +
 .../member-ptr-base-to-derived.cpp            |    33 +
 .../divergences/member-ptr-comparison.cpp     |    28 +
 .../divergences/member-ptr-const-method.cpp   |    29 +
 .../divergences/member-ptr-data-member.cpp    |    30 +
 .../member-ptr-multiple-inheritance.cpp       |    30 +
 .../Incubator/divergences/member-ptr-null.cpp |    28 +
 .../member-ptr-overloaded-function.cpp        |    30 +
 .../member-ptr-returning-struct.cpp           |    32 +
 .../member-ptr-stored-in-struct.cpp           |    26 +
 .../member-ptr-virtual-function.cpp           |    34 +
 .../divergences/missing-llvm-attributes.cpp   |    62 +
 .../divergences/operator-missing-comdat.cpp   |   130 +
 .../rtti-dynamic-cast-downcast.cpp            |    32 +
 .../divergences/rtti-dynamic-cast-upcast.cpp  |    32 +
 .../divergences/rtti-linkage-gep.cpp          |    47 +
 .../divergences/small-struct-coercion.cpp     |    69 +
 .../divergences/sret-abi-mismatch.cpp         |    43 +
 .../divergences/static-inline-member.cpp      |    24 +
 .../divergences/static-local-trivial.cpp      |    24 +
 .../divergences/static-member-variable.cpp    |    25 +
 .../template-class-instantiation.cpp          |    30 +
 .../divergences/template-inheritance.cpp      |    30 +
 .../divergences/template-member-function.cpp  |    27 +
 .../divergences/template-missing-comdat.cpp   |    48 +
 .../template-multiple-type-params.cpp         |    24 +
 .../divergences/template-non-type-param.cpp   |    26 +
 .../divergences/template-specialization.cpp   |    28 +
 .../divergences/template-variadic.cpp         |    24 +
 .../thread-local-wrapper-missing.cpp          |    38 +
 .../divergences/unnecessary-temp-allocas.cpp  |    59 +
 .../divergences/virtual-inheritance-vtt.cpp   |    66 +
 .../divergences/vtable-missing-comdat.cpp     |    96 +
 .../divergences/vtable-thunk-destructor.cpp   |    38 +
 clang/test/CIR/Incubator/driver.c             |    56 +
 clang/test/CIR/Incubator/emit-mlir.c          |    44 +
 .../test/CIR/Incubator/global-var-simple.cpp  |    78 +
 clang/test/CIR/Incubator/hello.c              |     5 +
 clang/test/CIR/Incubator/mlirargs.c           |    12 +
 clang/test/CIR/Incubator/mlirprint.c          |    41 +
 887 files changed, 97120 insertions(+), 87 deletions(-)
 delete mode 100644 clang/test/CIR/CodeGen/CUDA/mangling.cu
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/AArch64/aarch64-cc-structs.c
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/AArch64/aarch64_be-cc-structs.c
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/AArch64/basic.cpp
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/AArch64/ptr-fields.c
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/AArch64/struct.c
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/AArch64/union.c
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/AArch64/vector-fp16.c
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/NVPTX/basic.cpp
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/x86_64/basic.cpp
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/x86_64/fptrs.c
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/x86_64/int128.cpp
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/x86_64/varargs.c
 create mode 100644 clang/test/CIR/Incubator/CallConvLowering/x86_64/void-ptr.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/bf16-getset-intrinsics.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/neon-arith.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/neon-crypto.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/neon-ext-mov.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/neon-fp16.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/neon-ldst.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/neon-misc.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/neon.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/address-spaces.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/addrspace-lowering.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/builtin-functions.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/builtins-nvptx-ptx60.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/builtins-sm90.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/cuda-builtin-vars.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/destructor.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/global-vars.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/mangling.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/printf.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/registration.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/simple-nvptx-triple.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/simple.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/surface.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/CUDA/texture.cu
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/address-spaces.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/addrspace-lowering.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-attrs.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-hip-kernel-abi.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-module-flags.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-vec3-memory-type.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx10.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx11.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx1250.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-image-sample.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-image.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-logb-scalbn.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-raw-buffer-atomics.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-raw-buffer.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-vi.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/calling-conv-lowering-amdgpu.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/global-vars.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/hip-cuid.hip
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/ptr-diff.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/registration.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/HIP/simple.cpp
 rename clang/test/CIR/{CodeGenCUDA => Incubator/CodeGen}/Inputs/cuda.h (89%)
 create mode 100644 clang/test/CIR/Incubator/CodeGen/Inputs/std-compare.h
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/addrspace-alloca.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/addrspace_cast.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/amdgpu-kernel-abi.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/array-decay.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/as_type.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/async_copy.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx10.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx11.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx1250.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-image-sample.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-image.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-logb-scalbn.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-raw-buffer-atomics.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-raw-buffer.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-vi.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/builtins_amdgcn.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/cl-uniform-wg-size.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/convergent.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/elemwise-ops.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/global-var-with-ctor.clcpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/global.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-info-single-as.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-info.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-metadata.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-attributes.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-unit-attr.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/nothrow.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/null-vec.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/opencl-c-lang.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/opencl-version.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/printf.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/spir-calling-conv.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/spirv-target.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/static-vardecl.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/str_literals.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/vec_initializer.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/vec_logic.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenCL/vec_widening.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenMP/barrier.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenMP/parallel.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenMP/taskwait.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/OpenMP/taskyield.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/String.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/StringExample.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx-shuffle-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx10_2_512bf16-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx10_2bf16-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx2-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx512bw-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx512dq-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx512f-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx512fp16-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx512vbmi2-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx512vl-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx512vlbw-buiiltins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx512vldq-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/avx512vlvbmi2-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/bmi-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/lzcnt-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/mmx-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/palignr.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/pause.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/rd-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/sse-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/sse2-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/sse3-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/sse41-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/X86/x86_64-xsave.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/aapcs-volatile-bitfields.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/aarch64-neon-vdup-lane.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/abstract-cond.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/address-space-cast-subscript.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/address-space-conversion.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/address-space.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/agg-copy.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/agg-init-inherit.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/agg-init.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/agg-init2.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/align-load.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/align-store.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/alignment.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/amdgpu-address-spaces.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/analysis-only.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/annotations-declaration.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/annotations-var.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/applearm64-array-cookies.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/array-init-destroy.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/array-init-partial.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/array-init.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/array-init.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/array-new-init.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/array-unknown-bound.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/array.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/array.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/asm.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/assign-operator.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/atomic-runtime.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/atomic-thread-fence.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/atomic-type-casts.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/atomic-xchg-field.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/atomic.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/attribute-annotate-multiple.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/attributes.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/basic.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/basic.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/bf16-ops.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/binassign.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/binop.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/binop.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/bitfield-union.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/bitfields.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/bitfields.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/bitfields_be.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/bitint.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/bitint.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/bool.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/bswap.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/build-deferred.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-abort.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-addressof.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-alloca.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-arm-exclusive.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-assume.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-bcopy.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-bit-cast.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-bitreverse.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-bits.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-constant-evaluated.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-constant-fold.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-constant-p.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-fcmp-sse.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-floating-point.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-isfpclass.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-isinf-sign.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-ms-alloca.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-nontemporal.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-prefetch.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-rotate.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-setjmp-longjmp.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-signbit.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-types.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-x86-pshufd.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-x86-pslldqi.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtin-x86-psrldqi.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtins-elementwise.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtins-memory.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtins-overflow.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/builtins.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/c11atomic.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/c89-implicit-int.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/call-extra-attrs.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/call-side-effect.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/call-via-class-member-funcptr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/call.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/call.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/cast-lvalue.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/cast.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/cast.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/class_cast.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/clear_cache.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/cmp.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/cold-attr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/comma.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/complex-arithmetic.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/complex-builtins.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/complex-cast.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/complex-cast.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/complex-compound-assignment.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/complex-init-list.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/complex.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/complex.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/compound-literal-empty.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/compound-literal.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/concept-specialization.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/cond.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/conditional-cleanup.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/const-alloca.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/const-array.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/const-baseclass.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/const-bitfields.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/const-complex.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/constant-expr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/constptr.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/copy-constructor.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/coro-task.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/count-of.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/ctor-alias.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/ctor-member-lvalue-to-rvalue.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/ctor.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/cxx-default-arg.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/cxx-traits.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/cxx1z-inline-variables.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/default-address-space.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/default-methods.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/defaultarg.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/defined-pure-virtual-func.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/delegating-ctor.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/delete-array.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/delete.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/derived-cast.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/derived-to-base.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dlti.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dtor-alias.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dtors-scopes.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dtors.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dumb-record.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dynamic-alloca-with-address-space.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dynamic-cast-address-space.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dynamic-cast-exact.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dynamic-cast-relative-layout.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dynamic-cast.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/dynamic-cast.mlir
 create mode 100644 clang/test/CIR/Incubator/CodeGen/eh.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/empty-try-catch.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/error-attr.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/evaluate-expr.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/expressions.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/finegrain-bitfield-access.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/fixedpoint-literal.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/float16-ops.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/forward-decls.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/fp16-ops.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/fullexpr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/fun-ptr.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/func_dsolocal_pie.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/function-attrs.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/function-to-pointer-decay.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/generic-selection.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/global-const-record-crash.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/global-constant.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/global-ctor-dtor.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/global-init.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/global-new.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/globals-neg-index-array.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/globals-ref-globals.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/globals.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/globals.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/gnu-extension.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/gnu-null.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/gnu89.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/goto.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/hello.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/hot-attr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/if-consteval.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/if-constexpr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/implicit-return.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/inc-bool.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/inc-dec.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/inheriting-constructor.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/init_priority.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/initlist-ptr-ptr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/initlist-ptr-unsigned.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/int-wrap.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/int128.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/kr-func-promote.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/label-values.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/lalg.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/lambda.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/libc.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/libcall.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/link-bitcode-file.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/linkage.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/literals.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/literals.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/loop-scope.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/loop.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/lvalue-refs.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/materialize-temporary.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/member-init-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/mms-bitfields.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/module-asm.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/move.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/ms-intrinsics-other.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/multi-vtable.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/new-null.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/new.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/no-common.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/no-pie.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/no-proto-fun-ptr.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/no-proto-is-void.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/no-prototype.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/no-unique-address.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/noexcept.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/non-odr-use-constant.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/nonzeroinit-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/nrvo-eh.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/nrvo.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/null-arithmatic-expression.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/nullptr-init.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/offsetof.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/ofstream.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/opaque.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/opaque.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/operators.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/optimization-attr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/optnone.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pack-indexing.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/packed-structs.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/paren-list-init.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pass-object-size.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pointer-arith-ext.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pointer-to-data-member-cast.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pointer-to-data-member-cmp.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pointer-to-data-member.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pointer-to-member-func.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pointer.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pointers.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/pred-info-builtins.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/predefined.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/ptrdiff.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/ptrdiff.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/rangefor.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/requires-expr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/return.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/same-mangled-name.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/scalar_to_scalar_bitcast.cl
 create mode 100644 clang/test/CIR/Incubator/CodeGen/scoped-atomic-load-store.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/shift.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/sizeof-pack.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/skip-functions-from-system-headers.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/source-loc-expr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/source-location-scope.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/sourcelocation.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/special-virtual-func.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/spelling-locations.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/static-vars.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/static-vars.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/static.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/static_class_ref.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/std-array.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/std-find.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/stmt-expr.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/stmt-expr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/stmtexpr-init.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/store.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/string-literals.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/struct-comma.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/struct-empty.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/struct.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/struct.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/structural-binding.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/switch-gnurange.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/switch-unreachable-after-break.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/switch.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/synthetic-try-resume.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tbaa-bitinit.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tbaa-enum.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tbaa-enum.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tbaa-pointer.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tbaa-scalar.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tbaa-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tbaa-union.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tbaa-vptr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/temporaries.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/temporary-materialization.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tempref.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/ternary.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/ternary.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/thread-local.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/three-way-comparison.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/throw.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/tls.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/trap.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/trivial-copy.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/try-catch-dtors.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/try-catch.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/type-trait.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/typedef.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/typeinfo
 create mode 100644 clang/test/CIR/Incubator/CodeGen/types-IEEE-quad.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/types-nullptr.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/types.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/unary-deref.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/unary.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/unary.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/union-array.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/union-empty.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/union-init.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/union-padding.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/union.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/unreachable.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/uwtable.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/var-arg-float.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/var-arg-scope.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/var-arg.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/variadic-ctor.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/variadics.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vbase.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vector-ext-element.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vector.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vectype-ext.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vectype-issized.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vectype.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/verbose-trap.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/virtual-base-cast.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/virtual-destructor-calls.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/virtual-destructor-explicit-unqualified-call.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/virtual-function-calls.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/visibility-attribute.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vla.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/volatile.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-available-externally.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-comdat-divergence.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-emission.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-rtti.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-thunk-compare-codegen.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-thunk-destructor.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-thunk-edge-cases.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-thunk-multibase.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-thunk-virtual-inheritance.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-thunk.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtable-unnamed-addr-divergence.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/vtt.cpp
 create mode 100644 clang/test/CIR/Incubator/CodeGen/weak.c
 create mode 100644 clang/test/CIR/Incubator/CodeGen/wide-string.cpp
 create mode 100644 clang/test/CIR/Incubator/Driver/callconv.cpp
 create mode 100644 clang/test/CIR/Incubator/Driver/idiom-recognizer.cpp
 create mode 100644 clang/test/CIR/Incubator/Driver/lib-opt.cpp
 create mode 100644 clang/test/CIR/Incubator/Driver/move-opt.cpp
 create mode 100644 clang/test/CIR/Incubator/IR/address-space.cir
 create mode 100644 clang/test/CIR/Incubator/IR/aliases.cir
 create mode 100644 clang/test/CIR/Incubator/IR/alloca.cir
 create mode 100644 clang/test/CIR/Incubator/IR/annotations.cir
 create mode 100644 clang/test/CIR/Incubator/IR/array.cir
 create mode 100644 clang/test/CIR/Incubator/IR/attribute.cir
 create mode 100644 clang/test/CIR/Incubator/IR/await.cir
 create mode 100644 clang/test/CIR/Incubator/IR/being_and_nothingness.cir
 create mode 100644 clang/test/CIR/Incubator/IR/bit.cir
 create mode 100644 clang/test/CIR/Incubator/IR/block-adress.cir
 create mode 100644 clang/test/CIR/Incubator/IR/branch.cir
 create mode 100644 clang/test/CIR/Incubator/IR/builtins.cir
 create mode 100644 clang/test/CIR/Incubator/IR/call-op-call-conv.cir
 create mode 100644 clang/test/CIR/Incubator/IR/call.cir
 create mode 100644 clang/test/CIR/Incubator/IR/cast.cir
 create mode 100644 clang/test/CIR/Incubator/IR/cir-ops.cir
 create mode 100644 clang/test/CIR/Incubator/IR/cold.cir
 create mode 100644 clang/test/CIR/Incubator/IR/constptrattr.cir
 create mode 100644 clang/test/CIR/Incubator/IR/copy.cir
 create mode 100644 clang/test/CIR/Incubator/IR/cxx-special-member.cir
 create mode 100644 clang/test/CIR/Incubator/IR/data-member-ptr.cir
 create mode 100644 clang/test/CIR/Incubator/IR/do-while.cir
 create mode 100644 clang/test/CIR/Incubator/IR/dynamic-cast.cir
 create mode 100644 clang/test/CIR/Incubator/IR/exceptions.cir
 create mode 100644 clang/test/CIR/Incubator/IR/float.cir
 create mode 100644 clang/test/CIR/Incubator/IR/for.cir
 create mode 100644 clang/test/CIR/Incubator/IR/func-call-conv.cir
 create mode 100644 clang/test/CIR/Incubator/IR/func-dsolocal-parser.cir
 create mode 100644 clang/test/CIR/Incubator/IR/func.cir
 create mode 100644 clang/test/CIR/Incubator/IR/getmember.cir
 create mode 100644 clang/test/CIR/Incubator/IR/global.cir
 create mode 100644 clang/test/CIR/Incubator/IR/indirect-br.cir
 create mode 100644 clang/test/CIR/Incubator/IR/inlineAttr.cir
 create mode 100644 clang/test/CIR/Incubator/IR/int.cir
 create mode 100644 clang/test/CIR/Incubator/IR/invalid-annotations.cir
 create mode 100644 clang/test/CIR/Incubator/IR/invalid-block-address.cir
 create mode 100644 clang/test/CIR/Incubator/IR/invalid-complex.cir
 create mode 100644 clang/test/CIR/Incubator/IR/invalid-opencl-vec-type-hint.cir
 create mode 100644 clang/test/CIR/Incubator/IR/invalid-type-info.cir
 create mode 100644 clang/test/CIR/Incubator/IR/invalid-vector-shuffle-wrong-index.cir
 create mode 100644 clang/test/CIR/Incubator/IR/invalid-vector-zero-size.cir
 create mode 100644 clang/test/CIR/Incubator/IR/invalid-vector.cir
 create mode 100644 clang/test/CIR/Incubator/IR/invalid.cir
 create mode 100644 clang/test/CIR/Incubator/IR/libc-fabs.cir
 create mode 100644 clang/test/CIR/Incubator/IR/libc-memchr.cir
 create mode 100644 clang/test/CIR/Incubator/IR/libc-memcpy.cir
 create mode 100644 clang/test/CIR/Incubator/IR/llvm-intrinsic.cir
 create mode 100644 clang/test/CIR/Incubator/IR/module.cir
 create mode 100644 clang/test/CIR/Incubator/IR/ptr_stride.cir
 create mode 100644 clang/test/CIR/Incubator/IR/resume-location-parsing.cir
 create mode 100644 clang/test/CIR/Incubator/IR/scope.cir
 create mode 100644 clang/test/CIR/Incubator/IR/side-effect.cir
 create mode 100644 clang/test/CIR/Incubator/IR/stack-save-restore.cir
 create mode 100644 clang/test/CIR/Incubator/IR/struct.cir
 create mode 100644 clang/test/CIR/Incubator/IR/switch.cir
 create mode 100644 clang/test/CIR/Incubator/IR/tbaa-parse.cir
 create mode 100644 clang/test/CIR/Incubator/IR/ternary.cir
 create mode 100644 clang/test/CIR/Incubator/IR/try.cir
 create mode 100644 clang/test/CIR/Incubator/IR/types.cir
 create mode 100644 clang/test/CIR/Incubator/IR/unreachable.cir
 create mode 100644 clang/test/CIR/Incubator/IR/vtableAttr.cir
 create mode 100644 clang/test/CIR/Incubator/IR/while.cir
 create mode 100644 clang/test/CIR/Incubator/Inputs/skip-this-header.h
 create mode 100644 clang/test/CIR/Incubator/Inputs/std-cxx.h
 create mode 100644 clang/test/CIR/Incubator/Inputs/typeinfo
 create mode 100644 clang/test/CIR/Incubator/Lowering/OpenMP/barrier.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/OpenMP/parallel.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/OpenMP/taskwait.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/OpenMP/taskyield.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/abs.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/acos.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/address-space-mlir.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/array.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/array.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/asin.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/atan.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/binop.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/bit.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/bool.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/br-with-arg.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/branch.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/call.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/cast.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/ceil.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/cl-kernel.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/cmp.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/cos.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/doWhile.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/dot.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/exp.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/fabs.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/float.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/floor.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject-1.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject-2.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/for.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/for_with_continue.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/function-attributes.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/global.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/global.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/goto.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/if.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/log.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/memref.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptr-arg.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptrstride-ptr.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptrstride.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/round.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/scope.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/select.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/shift.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/sin.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/sqrt.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/store-memcpy-mlir.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/tan.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/tenary.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/unary-inc-dec.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/unary-plus-minus.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/unreachable.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/vectype.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/vtable.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/while-with-continue.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ThroughMLIR/while.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/address-space.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/alloca.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/applearm64-new.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/array-init.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/array.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/asm.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/atomic-runtime.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/attribute-lowering.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/binop-bool.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/binop-fp.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/binop-overflow.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/binop-signed-int.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/binop-unsigned-int.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/bit.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/bitfieils.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/bitint.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/bool-to-int.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/bool.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/branch.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/brcond.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/bswap.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/builtin-binary-fp2fp.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/builtin-floating-point.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/builtin-isfpclass.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/call-op-call-conv.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/call.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/cast.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/class.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/cmp.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/cmp3way.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/complex.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/const-array.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/const.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/data-member.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/debug-info.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/delete-array.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/derived-to-base.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/dot.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/exceptions.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/expect.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/float.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/func-call-conv.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/func.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/global-ptr.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/globals.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/goto-interscope.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/goto.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/hello.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/if.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/int-wrap.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/intrinsics.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/libc.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/linker-options.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/loadstorealloca.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/loop.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/loops-with-break.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/loops-with-continue.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/module-asm.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/multi-array.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/nested-switch.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/nested-union-array.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/new.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/ptrdiff.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ptrstride.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/region-simplify.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/resume-flat.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/scope.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/select.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/setjmp-longjmp.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/shift.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/stack-save-restore.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/static-array.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/store-memcpy.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/str.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/struct-init.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/struct.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/switch-while.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/switch.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/syncscope.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/ternary.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/try-catch.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/types.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/unary-inc-dec.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/unary-not.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/unary-plus-minus.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/union-in-struct-init.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/unions.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/var-arg-x86_64.c
 create mode 100644 clang/test/CIR/Incubator/Lowering/variadics.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/vec-cmp.cir
 create mode 100644 clang/test/CIR/Incubator/Lowering/vectype.cpp
 create mode 100644 clang/test/CIR/Incubator/Lowering/vtable-thunk.cpp
 create mode 100644 clang/test/CIR/Incubator/Tools/cir-translate/cir-translate-triple.cir
 create mode 100644 clang/test/CIR/Incubator/Tools/cir-translate/has-triple-and-data-layout.cir
 create mode 100644 clang/test/CIR/Incubator/Tools/cir-translate/has-triple-no-data-layout.cir
 create mode 100644 clang/test/CIR/Incubator/Tools/cir-translate/invalid-translate-triple.cir
 create mode 100644 clang/test/CIR/Incubator/Tools/cir-translate/no-triple-has-data-layout.cir
 create mode 100644 clang/test/CIR/Incubator/Tools/cir-translate/no-triple-no-data-layout.cir
 create mode 100644 clang/test/CIR/Incubator/Tools/cir-translate/warn-default-triple.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/ABILowering/cast.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/ABILowering/cmp.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/ABILowering/const.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/ABILowering/func.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/ABILowering/global.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/ABILowering/member-ptr.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/Inputs/folly-coro.h
 create mode 100644 clang/test/CIR/Incubator/Transforms/Inputs/std.h
 create mode 100644 clang/test/CIR/Incubator/Transforms/Inputs/string.h
 create mode 100644 clang/test/CIR/Incubator/Transforms/builtin-assume.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/complex-fold.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/goto_solver.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/idiom-iter.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/idiom-recognizer.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/idiom-string.c
 create mode 100644 clang/test/CIR/Incubator/Transforms/if.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/lib-opt-find.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lib-opt-string.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lib-opt.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check-agg.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check-coro-task.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check-lambda.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check-owner.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check-range-for-vector.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check-remarks.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check-smart-pointer-after-move.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check-string.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check-use-after-move.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-check.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-fn-args.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-invalid-option.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-loop-valid.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-loop.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-null-passing.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-switch.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/lifetime-this.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/live-object-analysis.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/live-object-analysis.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/loop.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/mem2reg.c
 create mode 100644 clang/test/CIR/Incubator/Transforms/mem2reg.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/merge-cleanups.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/move-opt.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/points-to-analysis.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/points-to-analysis.cpp
 create mode 100644 clang/test/CIR/Incubator/Transforms/scf-prepare.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/scope.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/select.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/setjmp-longjmp-lower.c
 create mode 100644 clang/test/CIR/Incubator/Transforms/simpl.c
 create mode 100644 clang/test/CIR/Incubator/Transforms/simpl.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/switch.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/ternary-fold.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/ternary.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/vector-cmp-fold.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/vector-create-fold.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/vector-extract-fold.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/vector-shuffle-dynamic-fold.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/vector-shuffle.fold.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/vector-splat.cir
 create mode 100644 clang/test/CIR/Incubator/Transforms/vector-ternary-fold.cir
 create mode 100644 clang/test/CIR/Incubator/analysis-only.cpp
 create mode 100644 clang/test/CIR/Incubator/cc1.c
 create mode 100644 clang/test/CIR/Incubator/cc1.cir
 create mode 100644 clang/test/CIR/Incubator/cir-output.c
 create mode 100644 clang/test/CIR/Incubator/cirtool.cir
 create mode 100644 clang/test/CIR/Incubator/crashes/apvalue-constexpr-init.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/array-new-default-arg.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/async-future.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/bitfield-bool-int-cast.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/cleanup-892-null-fixups.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/cleanup-unreachable.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/computed-goto-nyi.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/conditional-return-destructors.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/constexpr-cast.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/constexpr-complex-template-metaprog.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/copy-on-catch.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/dyncast-assertion.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/exception-handling-nyi.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/exception-ptr.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/filesystem-sd-automatic.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/function-ref-pointer-params.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/multi-inheritance-thunk-crash.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/range-for-temp-automatic.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/ref-temp-automatic.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/static-init-recursion.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/static-local-destructor.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/static-local-guard-nyi.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/static-local-used-attribute.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/static-var-dyn-cast.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/static-var-guarded-init.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/template-syntax-error.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/tls-destructor.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/verification-block-terminator.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/virtual-base-constructor.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/virtual-inheritance-crash.cpp
 create mode 100644 clang/test/CIR/Incubator/crashes/virtual-method-global-dtor.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/array-new-delete-divergences.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-12byte-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-16byte-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-20byte-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-4byte-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-aligned-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-array-in-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-bitfield-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-bool-in-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-empty-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-longlong-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-multiple-struct-params.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-nested-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-packed-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-pointer-in-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/calling-conv-two-longlongs.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/ctor-copy.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/ctor-deep-inheritance.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/ctor-delegating.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/ctor-inherited.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/ctor-member-init-list.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/ctor-move.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/ctor-multiple-inheritance.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/ctor-parameterized.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/float-double-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/float-mixed-int-float.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/float-single-float-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/float-struct-calling-conv.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/float-two-floats-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/global-constructor.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/inheritance-diamond.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/inheritance-empty-base.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/inheritance-missing-comdat.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/inheritance-private.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/inheritance-protected.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/inline-ctor-dtor-missing-comdat.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/lambda-capture-by-ref.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/lambda-capture-by-value.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/lambda-missing-comdat.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/lambda-mutable.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/lambda-returning-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/lambda-simple.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/lambda-with-params.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-abi-calling-conv.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-array.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-base-to-derived.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-comparison.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-const-method.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-data-member.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-multiple-inheritance.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-null.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-overloaded-function.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-returning-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-stored-in-struct.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/member-ptr-virtual-function.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/missing-llvm-attributes.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/operator-missing-comdat.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/rtti-dynamic-cast-downcast.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/rtti-dynamic-cast-upcast.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/rtti-linkage-gep.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/small-struct-coercion.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/sret-abi-mismatch.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/static-inline-member.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/static-local-trivial.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/static-member-variable.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/template-class-instantiation.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/template-inheritance.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/template-member-function.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/template-missing-comdat.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/template-multiple-type-params.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/template-non-type-param.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/template-specialization.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/template-variadic.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/thread-local-wrapper-missing.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/unnecessary-temp-allocas.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/virtual-inheritance-vtt.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/vtable-missing-comdat.cpp
 create mode 100644 clang/test/CIR/Incubator/divergences/vtable-thunk-destructor.cpp
 create mode 100644 clang/test/CIR/Incubator/driver.c
 create mode 100644 clang/test/CIR/Incubator/emit-mlir.c
 create mode 100644 clang/test/CIR/Incubator/global-var-simple.cpp
 create mode 100644 clang/test/CIR/Incubator/hello.c
 create mode 100644 clang/test/CIR/Incubator/mlirargs.c
 create mode 100644 clang/test/CIR/Incubator/mlirprint.c

diff --git a/clang/test/CIR/CodeGen/CUDA/mangling.cu b/clang/test/CIR/CodeGen/CUDA/mangling.cu
deleted file mode 100644
index bad62892cf318..0000000000000
--- a/clang/test/CIR/CodeGen/CUDA/mangling.cu
+++ /dev/null
@@ -1,81 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -x cuda -emit-cir -target-sdk-version=12.3 %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -fcuda-is-device -emit-cir -target-sdk-version=12.3 %s -o %t.cir
-// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
-
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -x cuda -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -fcuda-is-device -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
-// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
-
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x cuda -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
-// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -target-sdk-version=12.3 %s -o %t.ll
-// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
-
-#include "../Inputs/cuda.h"
-
-namespace ns {
-    __global__ void cpp_global_function_1(int a, int* b, float c) {}
-    __global__ void cpp_global_function_2(int a, int* b, float c) {}
-    __host__ void cpp_host_function_1(int a, int* b, float c) {}
-    __host__ void cpp_host_function_2(int a, int* b, float c) {}
-    __device__ void cpp_device_function_1(int a, int* b, float c) {}
-    __device__ void cpp_device_function_2(int a, int* b, float c) {}
-}
-
-__global__ void cpp_global_function_1(int a, int* b, float c) {}
-__global__ void cpp_global_function_2(int a, int* b, float c) {}
-__host__ void cpp_host_function_1(int a, int* b, float c) {}
-__host__ void cpp_host_function_2(int a, int* b, float c) {}
-__device__ void cpp_device_function_1(int a, int* b, float c) {}
-__device__ void cpp_device_function_2(int a, int* b, float c) {}
-
-extern "C" {
-    __global__ void c_global_function_1(int a, int* b, float c) {}
-    __global__ void c_global_function_2(int a, int* b, float c) {}
-    __host__ void c_host_function_1(int a, int* b, float c) {}
-    __host__ void c_host_function_2(int a, int* b, float c) {}
-    __device__ void c_device_function_1(int a, int* b, float c) {}
-    __device__ void c_device_function_2(int a, int* b, float c) {}
-}
-
-// CIR-HOST: cir.func {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
-// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_global_function_1EiPif
-// LLVM-HOST: define {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
-// LLVM-DEVICE: define {{.*}} @_ZN2ns21cpp_global_function_1EiPif
-// OGCG-HOST: define {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
-// OGCG-DEVICE: define {{.*}} @_ZN2ns21cpp_global_function_1EiPif
-
-// CIR-HOST: cir.func {{.*}} @_ZN2ns36__device_stub__cpp_global_function_2EiPif
-// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_global_function_2EiPif
-
-// CIR-HOST: cir.func {{.*}} @_ZN2ns19cpp_host_function_1EiPif
-// CIR-HOST: cir.func {{.*}} @_ZN2ns19cpp_host_function_2EiPif
-
-// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_device_function_1EiPif
-// CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_device_function_2EiPif
-
-// CIR-HOST: cir.func {{.*}} @_Z36__device_stub__cpp_global_function_1iPif
-// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_global_function_1iPif
-
-// CIR-HOST: cir.func {{.*}} @_Z36__device_stub__cpp_global_function_2iPif
-// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_global_function_2iPif
-
-// CIR-HOST: cir.func {{.*}} @_Z19cpp_host_function_1iPif
-// CIR-HOST: cir.func {{.*}} @_Z19cpp_host_function_2iPif
-
-// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_device_function_1iPif
-// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_device_function_2iPif
-
-// CIR-HOST: cir.func {{.*}} @__device_stub__c_global_function_1
-// CIR-DEVICE: cir.func {{.*}} @c_global_function_1
-
-// CIR-HOST: cir.func {{.*}} @__device_stub__c_global_function_2
-// CIR-DEVICE: cir.func {{.*}} @c_global_function_2
-
-// CIR-HOST: cir.func {{.*}} @c_host_function_1
-// CIR-HOST: cir.func {{.*}} @c_host_function_2
-
-// CIR-DEVICE: cir.func {{.*}} @c_device_function_1
-// CIR-DEVICE: cir.func {{.*}} @c_device_function_2
diff --git a/clang/test/CIR/Incubator/CallConvLowering/AArch64/aarch64-cc-structs.c b/clang/test/CIR/Incubator/CallConvLowering/AArch64/aarch64-cc-structs.c
new file mode 100644
index 0000000000000..d0742456084fb
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/AArch64/aarch64-cc-structs.c
@@ -0,0 +1,416 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -fclangir -emit-cir-flat -fclangir-call-conv-lowering %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -fclangir -emit-llvm -fclangir-call-conv-lowering %s -o -| FileCheck %s -check-prefix=LLVM
+
+#include <stdint.h>
+
+typedef struct {
+  short a;
+} LT_64;
+
+typedef struct {
+  int64_t a;
+} EQ_64;
+
+typedef struct {
+  int64_t a;
+  int b;
+} LT_128;
+
+typedef struct {
+  int64_t a;
+  int64_t b;
+} EQ_128;
+
+typedef struct {
+  int64_t a;
+  int64_t b;
+  int64_t c;
+} GT_128;
+
+// CHECK: cir.func {{.*}} @ret_lt_64() -> !u16i
+// CHECK:   %[[#V0:]] = cir.alloca !rec_LT_64, !cir.ptr<!rec_LT_64>, ["__retval"]
+// CHECK:   %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_LT_64> -> !cir.ptr<!u16i>
+// CHECK:   %[[#V2:]] = cir.load{{.*}} %[[#V1]] : !cir.ptr<!u16i>, !u16i
+// CHECK:   cir.return %[[#V2]] : !u16i
+LT_64 ret_lt_64() {
+  LT_64 x;
+  return x;
+}
+
+// CHECK: cir.func {{.*}} @ret_eq_64() -> !u64i
+// CHECK:   %[[#V0:]] = cir.alloca !rec_EQ_64, !cir.ptr<!rec_EQ_64>, ["__retval"]
+// CHECK:   %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_EQ_64> -> !cir.ptr<!u64i>
+// CHECK:   %[[#V2:]] = cir.load{{.*}} %[[#V1]] : !cir.ptr<!u64i>, !u64i
+// CHECK:   cir.return %[[#V2]] : !u64i
+EQ_64 ret_eq_64() {
+  EQ_64 x;
+  return x;
+}
+
+// CHECK: cir.func {{.*}} @ret_lt_128() -> !cir.array<!u64i x 2>
+// CHECK:   %[[#V0:]] = cir.alloca !rec_LT_128, !cir.ptr<!rec_LT_128>, ["__retval"]
+// CHECK:   %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_LT_128> -> !cir.ptr<!cir.array<!u64i x 2>>
+// CHECK:   %[[#V2:]] = cir.load{{.*}} %[[#V1]] : !cir.ptr<!cir.array<!u64i x 2>>, !cir.array<!u64i x 2>
+// CHECK:   cir.return %[[#V2]] : !cir.array<!u64i x 2>
+LT_128 ret_lt_128() {
+  LT_128 x;
+  return x;
+}
+
+// CHECK: cir.func {{.*}} @ret_eq_128() -> !cir.array<!u64i x 2>
+// CHECK:   %[[#V0:]] = cir.alloca !rec_EQ_128, !cir.ptr<!rec_EQ_128>, ["__retval"]
+// CHECK:   %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_EQ_128> -> !cir.ptr<!cir.array<!u64i x 2>>
+// CHECK:   %[[#V2:]] = cir.load{{.*}} %[[#V1]] : !cir.ptr<!cir.array<!u64i x 2>>, !cir.array<!u64i x 2>
+// CHECK:   cir.return %[[#V2]] : !cir.array<!u64i x 2>
+EQ_128 ret_eq_128() {
+  EQ_128 x;
+  return x;
+}
+
+// CHECK:     cir.func {{.*}} @ret_gt_128(%arg0: !cir.ptr<!rec_GT_128>
+// CHECK-NOT:   cir.return {{%.*}}
+GT_128 ret_gt_128() {
+  GT_128 x;
+  return x;
+}
+
+typedef struct {
+  int a;
+  int b;
+  int c;
+} S;
+
+// CHECK: cir.func {{.*}} @retS() -> !cir.array<!u64i x 2>
+// CHECK:   %[[#V0:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["__retval"] {alignment = 4 : i64}
+// CHECK:   %[[#V1:]] = cir.alloca !cir.array<!u64i x 2>, !cir.ptr<!cir.array<!u64i x 2>>, ["tmp"] {alignment = 8 : i64}
+// CHECK:   %[[#V2:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!void>
+// CHECK:   %[[#V3:]] = cir.cast bitcast %[[#V1]] : !cir.ptr<!cir.array<!u64i x 2>> -> !cir.ptr<!void>
+// CHECK:   %[[#V4:]] = cir.const #cir.int<12> : !u64i
+// CHECK:   cir.libc.memcpy %[[#V4]] bytes from %[[#V2]] to %[[#V3]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+// CHECK:   %[[#V5:]] = cir.load{{.*}} %[[#V1]] : !cir.ptr<!cir.array<!u64i x 2>>, !cir.array<!u64i x 2>
+// CHECK:   cir.return %[[#V5]] : !cir.array<!u64i x 2>
+
+// LLVM: [2 x i64] @retS()
+// LLVM:   %[[#V1:]] = alloca %struct.S, i64 1, align 4
+// LLVM:   %[[#V2:]] = alloca [2 x i64], i64 1, align 8
+// LLVM:   call void @llvm.memcpy.p0.p0.i64(ptr %[[#V2]], ptr %[[#V1]], i64 12, i1 false)
+// LLVM:   %[[#V3:]] = load [2 x i64], ptr %[[#V2]], align 8
+// LLVM:   ret [2 x i64] %[[#V3]]
+S retS() {
+  S s;
+  return s;
+}
+// CHECK: cir.func {{.*}} @pass_lt_64(%arg0: !u64
+// CHECK:   %[[#V0:]] = cir.alloca !rec_LT_64, !cir.ptr<!rec_LT_64>
+// CHECK:   %[[#V1:]] = cir.cast integral %arg0 : !u64i -> !u16i
+// CHECK:   %[[#V2:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_LT_64> -> !cir.ptr<!u16i>
+// CHECK:   cir.store{{.*}} %[[#V1]], %[[#V2]] : !u16i, !cir.ptr<!u16i>
+
+// LLVM: void @pass_lt_64(i64 %0)
+// LLVM:   %[[#V1:]] = alloca %struct.LT_64, i64 1, align 4
+// LLVM:   %[[#V2:]] = trunc i64 %0 to i16
+// LLVM:   store i16 %[[#V2]], ptr %[[#V1]], align 2
+void pass_lt_64(LT_64 s) {}
+
+// CHECK: cir.func {{.*}} @pass_eq_64(%arg0: !u64i
+// CHECK:   %[[#V0:]] = cir.alloca !rec_EQ_64, !cir.ptr<!rec_EQ_64>
+// CHECK:   %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_EQ_64> -> !cir.ptr<!u64i>
+// CHECK:   cir.store{{.*}} %arg0, %[[#V1]] : !u64i, !cir.ptr<!u64i>
+
+// LLVM: void @pass_eq_64(i64 %0)
+// LLVM:   %[[#V1:]] = alloca %struct.EQ_64, i64 1, align 4
+// LLVM:   store i64 %0, ptr %[[#V1]], align 8
+void pass_eq_64(EQ_64 s) {}
+
+// CHECK: cir.func {{.*}} @pass_lt_128(%arg0: !cir.array<!u64i x 2>
+// CHECK:   %[[#V0:]] = cir.alloca !rec_LT_128, !cir.ptr<!rec_LT_128>
+// CHECK:   %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_LT_128> -> !cir.ptr<!cir.array<!u64i x 2>>
+// CHECK:   cir.store{{.*}} %arg0, %[[#V1]] : !cir.array<!u64i x 2>, !cir.ptr<!cir.array<!u64i x 2>>
+
+// LLVM: void @pass_lt_128([2 x i64] %0)
+// LLVM:   %[[#V1:]] = alloca %struct.LT_128, i64 1, align 4
+// LLVM:   store [2 x i64] %0, ptr %[[#V1]], align 8
+void pass_lt_128(LT_128 s) {}
+
+// CHECK: cir.func {{.*}} @pass_eq_128(%arg0: !cir.array<!u64i x 2>
+// CHECK:   %[[#V0:]] = cir.alloca !rec_EQ_128, !cir.ptr<!rec_EQ_128>
+// CHECK:   %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_EQ_128> -> !cir.ptr<!cir.array<!u64i x 2>>
+// CHECK:   cir.store{{.*}} %arg0, %[[#V1]] : !cir.array<!u64i x 2>, !cir.ptr<!cir.array<!u64i x 2>>
+
+// LLVM: void @pass_eq_128([2 x i64] %0)
+// LLVM:   %[[#V1]] = alloca %struct.EQ_128, i64 1, align 4
+// LLVM:   store [2 x i64] %0, ptr %[[#V1]], align 8
+void pass_eq_128(EQ_128 s) {}
+
+// CHECK: cir.func {{.*}} @pass_gt_128(%arg0: !cir.ptr<!rec_GT_128>
+// CHECK:   %[[#V0:]] = cir.alloca !cir.ptr<!rec_GT_128>, !cir.ptr<!cir.ptr<!rec_GT_128>>, [""] {alignment = 8 : i64}
+// CHECK:   cir.store{{.*}} %arg0, %[[#V0]] : !cir.ptr<!rec_GT_128>, !cir.ptr<!cir.ptr<!rec_GT_128>>
+// CHECK:   %[[#V1:]] = cir.load{{.*}} %[[#V0]] : !cir.ptr<!cir.ptr<!rec_GT_128>>, !cir.ptr<!rec_GT_128>
+
+// LLVM: void @pass_gt_128(ptr %0)
+// LLVM:   %[[#V1:]] = alloca ptr, i64 1, align 8
+// LLVM:   store ptr %0, ptr %[[#V1]], align 8
+// LLVM:   %[[#V2:]] = load ptr, ptr %[[#V1]], align 8
+void pass_gt_128(GT_128 s) {}
+
+// CHECK: cir.func {{.*}} @get_gt_128(%arg0: !cir.ptr<!rec_GT_128> {{.*}}, %arg1: !cir.ptr<!rec_GT_128>
+// CHECK: %[[#V0:]] = cir.alloca !cir.ptr<!rec_GT_128>, !cir.ptr<!cir.ptr<!rec_GT_128>>, [""] {alignment = 8 : i64}
+// CHECK: cir.store{{.*}} %arg1, %[[#V0]] : !cir.ptr<!rec_GT_128>, !cir.ptr<!cir.ptr<!rec_GT_128>>
+// CHECK: %[[#V1:]] = cir.load{{.*}} %[[#V0]] : !cir.ptr<!cir.ptr<!rec_GT_128>>, !cir.ptr<!rec_GT_128>
+// CHECK: cir.copy %[[#V1]] to %arg0 : !cir.ptr<!rec_GT_128>
+// CHECK: cir.return
+
+// LLVM: void @get_gt_128(ptr %[[#V0:]], ptr %[[#V1:]])
+// LLVM: %[[#V3:]] = alloca ptr, i64 1, align 8
+// LLVM: store ptr %[[#V1]], ptr %[[#V3]], align 8
+// LLVM: %[[#V4:]] = load ptr, ptr %[[#V3]], align 8
+// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[#V0]], ptr %[[#V4]], i32 24, i1 false)
+// LLVM: ret void
+GT_128 get_gt_128(GT_128 s) {
+  return s;
+}
+
+// CHECK: cir.func {{.*}} @call_and_get_gt_128(%arg0: !cir.ptr<!rec_GT_128>
+// CHECK: %[[#V0:]] = cir.alloca !rec_GT_128, !cir.ptr<!rec_GT_128>, ["tmp"] {alignment = 8 : i64}
+// CHECK: %[[#V1:]] = cir.load{{.*}} %arg0 : !cir.ptr<!rec_GT_128>, !rec_GT_128
+// CHECK: %[[#V2:]] = cir.alloca !rec_GT_128, !cir.ptr<!rec_GT_128>, [""] {alignment = 8 : i64}
+// CHECK: %[[#V3:]] = cir.alloca !rec_GT_128, !cir.ptr<!rec_GT_128>, ["tmp"] {alignment = 8 : i64}
+// CHECK: %[[#V4:]] = cir.cast bitcast %arg0 : !cir.ptr<!rec_GT_128> -> !cir.ptr<!void>
+// CHECK: %[[#V5:]] = cir.cast bitcast %[[#V3]] : !cir.ptr<!rec_GT_128> -> !cir.ptr<!void>
+// CHECK: %[[#V6:]] = cir.const #cir.int<24> : !u64i
+// CHECK: cir.libc.memcpy %[[#V6]] bytes from %[[#V4]] to %[[#V5]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+// CHECK: cir.call @get_gt_128(%[[#V2]], %[[#V3]]) : (!cir.ptr<!rec_GT_128>, !cir.ptr<!rec_GT_128>) -> ()
+// CHECK: cir.return
+
+// LLVM: void @call_and_get_gt_128(ptr %[[#V0:]])
+// LLVM: %[[#V2:]] = alloca %struct.GT_128, i64 1, align 8
+// LLVM: %[[#V3:]] = load %struct.GT_128, ptr %[[#V0]], align 8
+// LLVM: %[[#V4:]] = alloca %struct.GT_128, i64 1, align 8
+// LLVM: %[[#V5:]] = alloca %struct.GT_128, i64 1, align 8
+// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[#V5]], ptr %[[#V0]], i64 24, i1 false)
+// LLVM: call void @get_gt_128(ptr %[[#V4]], ptr %[[#V5]])
+GT_128 call_and_get_gt_128() {
+  GT_128 s;
+  s = get_gt_128(s);
+  return s;
+}
+// CHECK: cir.func {{.*}} @passS(%arg0: !cir.array<!u64i x 2>
+// CHECK:   %[[#V0:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, [""] {alignment = 4 : i64}
+// CHECK:   %[[#V1:]] = cir.alloca !cir.array<!u64i x 2>, !cir.ptr<!cir.array<!u64i x 2>>, ["tmp"] {alignment = 8 : i64}
+// CHECK:   cir.store{{.*}} %arg0, %[[#V1]] : !cir.array<!u64i x 2>, !cir.ptr<!cir.array<!u64i x 2>>
+// CHECK:   %[[#V2:]] = cir.cast bitcast %[[#V1]] : !cir.ptr<!cir.array<!u64i x 2>> -> !cir.ptr<!void>
+// CHECK:   %[[#V3:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!void>
+// CHECK:   %[[#V4:]] = cir.const #cir.int<12> : !u64i
+// CHECK:   cir.libc.memcpy %[[#V4]] bytes from %[[#V2]] to %[[#V3]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+
+// LLVM: void @passS([2 x i64] %[[#ARG:]])
+// LLVM:   %[[#V1:]] = alloca %struct.S, i64 1, align 4
+// LLVM:   %[[#V2:]] = alloca [2 x i64], i64 1, align 8
+// LLVM:   store [2 x i64] %[[#ARG]], ptr %[[#V2]], align 8
+// LLVM:   call void @llvm.memcpy.p0.p0.i64(ptr %[[#V1]], ptr %[[#V2]], i64 12, i1 false)
+void passS(S s) {}
+
+// CHECK: cir.func {{.*}} @callS()
+// CHECK: %[[#V0:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"] {alignment = 4 : i64}
+// CHECK: %[[#V1:]] = cir.alloca !cir.array<!u64i x 2>, !cir.ptr<!cir.array<!u64i x 2>>, ["tmp"] {alignment = 8 : i64}
+// CHECK: %[[#V2:]] = cir.load{{.*}} %[[#V0]] : !cir.ptr<!rec_S>, !rec_S
+// CHECK: %[[#V3:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!void>
+// CHECK: %[[#V4:]] = cir.cast bitcast %[[#V1]] : !cir.ptr<!cir.array<!u64i x 2>> -> !cir.ptr<!void>
+// CHECK: %[[#V5:]] = cir.const #cir.int<12> : !u64i
+// CHECK: cir.libc.memcpy %[[#V5]] bytes from %[[#V3]] to %[[#V4]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+// CHECK: %[[#V6:]] = cir.load{{.*}} %[[#V1]] : !cir.ptr<!cir.array<!u64i x 2>>, !cir.array<!u64i x 2>
+// CHECK: cir.call @passS(%[[#V6]]) : (!cir.array<!u64i x 2>) -> ()
+// CHECK: cir.return
+
+// LLVM: @callS()
+// LLVM: %[[#V1:]] = alloca %struct.S, i64 1, align 4
+// LLVM: %[[#V2:]] = alloca [2 x i64], i64 1, align 8
+// LLVM: %[[#V3:]] = load %struct.S, ptr %[[#V1]], align 4
+// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[#V2]], ptr %[[#V1]], i64 12, i1 false)
+// LLVM: %[[#V4:]] = load [2 x i64], ptr %[[#V2]], align 8
+// LLVM: call void @passS([2 x i64] %[[#V4]])
+// LLVM: ret void
+void callS() {
+  S s;
+  passS(s);
+}
+
+typedef struct {
+  uint8_t a;
+  uint16_t b;
+  uint8_t c;
+} S_PAD;
+
+// CHECK: cir.func {{.*}} @ret_s_pad()  -> !u48i
+// CHECK: %[[#V0:]] = cir.alloca !rec_S_PAD, !cir.ptr<!rec_S_PAD>, ["__retval"] {alignment = 2 : i64}
+// CHECK: %[[#V1:]] = cir.load{{.*}} %[[#V0]] : !cir.ptr<!rec_S_PAD>, !rec_S_PAD
+// CHECK: %[[#V2:]] = cir.alloca !u48i, !cir.ptr<!u48i>, [""] {alignment = 2 : i64}
+// CHECK: %[[#V3:]] = cir.cast bitcast %[[#V0]]  : !cir.ptr<!rec_S_PAD>
+// CHECK: %[[#V4:]] = cir.cast bitcast %[[#V2:]] : !cir.ptr<!u48i> -> !cir.ptr<!void>
+// CHECK: %[[#V5:]] = cir.const #cir.int<6> : !u64i
+// CHECK: cir.libc.memcpy %[[#V5]] bytes from %[[#V3]] to %[[#V4]] : !u64i, !cir.ptr<!void>
+// CHECK: %[[#V6:]] = cir.load{{.*}} %[[#V2]] : !cir.ptr<!u48i>
+// CHECK: cir.return %[[#V6]]
+
+// LLVM: i48 @ret_s_pad()
+// LLVM: %[[#V1:]] = alloca %struct.S_PAD, i64 1, align 2
+// LLVM: %[[#V2:]] = load %struct.S_PAD, ptr %[[#V1]], align 2
+// LLVM: %[[#V3:]] = alloca i48, i64 1, align 2
+// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[#V3]], ptr %[[#V1]], i64 6, i1 false)
+// LLVM: %[[#V4:]] = load i48, ptr %[[#V3]]
+// LLVM: ret i48 %[[#V4]]
+S_PAD ret_s_pad() {
+  S_PAD s;
+  return s;
+}
+
+typedef struct {
+  int a[42];
+} CAT;
+
+// CHECK: cir.func {{.*}} @pass_cat
+// CHECK: %[[#V0:]]  = cir.alloca !cir.ptr<!rec_CAT>, !cir.ptr<!cir.ptr<!rec_CAT>>, [""] {alignment = 8 : i64}
+// CHECK: cir.store{{.*}} %arg0, %[[#V0]]  : !cir.ptr<!rec_CAT>, !cir.ptr<!cir.ptr<!rec_CAT>>
+// CHECK: %[[#V1:]]  = cir.load{{.*}} %[[#V0]]  : !cir.ptr<!cir.ptr<!rec_CAT>>, !cir.ptr<!rec_CAT>
+// CHECK: cir.return
+
+// LLVM: void @pass_cat(ptr %[[#V0:]])
+// LLVM: %[[#V2:]] = alloca ptr, i64 1, align 8
+// LLVM: store ptr %[[#V0]], ptr %[[#V2]], align 8
+// LLVM: %[[#V3:]] = load ptr, ptr %[[#V2]], align 8
+// LLVM: ret void
+void pass_cat(CAT a) {}
+
+typedef struct {
+  union {
+    struct {
+      char a, b;
+    };
+    char c;
+  };
+} NESTED_U;
+
+// CHECK: cir.func {{.*}} @pass_nested_u(%[[ARG0:[a-z0-9]+]]: !u64i
+// CHECK: %[[#V0:]] = cir.alloca !rec_NESTED_U, !cir.ptr<!rec_NESTED_U>, [""] {alignment = 4 : i64}
+// CHECK: %[[#V1:]] = cir.cast integral %[[ARG0]] : !u64i -> !u16i
+// CHECK: %[[#V2:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_NESTED_U>
+// CHECK: cir.store{{.*}} %[[#V1]], %[[#V2]] : !u16i
+// CHECK: cir.return
+
+// LLVM: @pass_nested_u(i64 %[[#V0:]]
+// LLVM: %[[#V2:]] = alloca %struct.NESTED_U, i64 1, align 4
+// LLVM: %[[#V3:]] = trunc i64 %[[#V0]] to i16
+// LLVM: store i16 %[[#V3]], ptr %[[#V2]], align 2
+// LLVM: ret void
+void pass_nested_u(NESTED_U a) {}
+
+// CHECK: cir.func {{.*}} @call_nested_u()
+// CHECK: %[[#V0:]] = cir.alloca !rec_NESTED_U, !cir.ptr<!rec_NESTED_U>
+// CHECK: %[[#V1:]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["tmp"]
+// CHECK: %[[#V2:]] = cir.load{{.*}} %[[#V0]] : !cir.ptr<!rec_NESTED_U>, !rec_NESTED_U
+// CHECK: %[[#V3:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_NESTED_U> -> !cir.ptr<!rec_anon2E0>
+// CHECK: %[[#V4:]] = cir.load{{.*}} %[[#V3]] : !cir.ptr<!rec_anon2E0>, !rec_anon2E0
+// CHECK: %[[#V5:]] = cir.cast bitcast %[[#V3]] : !cir.ptr<!rec_anon2E0> -> !cir.ptr<!rec_anon2E1>
+// CHECK: %[[#V6:]] = cir.load{{.*}} %[[#V5]] : !cir.ptr<!rec_anon2E1>, !rec_anon2E1
+// CHECK: %[[#V7:]] = cir.cast bitcast %[[#V5]] : !cir.ptr<!rec_anon2E1> -> !cir.ptr<!void>
+// CHECK: %[[#V8:]] = cir.cast bitcast %[[#V1]] : !cir.ptr<!u64i> -> !cir.ptr<!void>
+// CHECK: %[[#V9:]] = cir.const #cir.int<2> : !u64i
+// CHECK: cir.libc.memcpy %[[#V9]] bytes from %[[#V7]] to %[[#V8]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+// CHECK: %[[#V10:]] = cir.load{{.*}} %[[#V1]] : !cir.ptr<!u64i>, !u64i
+// CHECK: cir.call @pass_nested_u(%[[#V10]]) : (!u64i) -> ()
+
+// LLVM: void @call_nested_u()
+// LLVM: %[[#V1:]] = alloca %struct.NESTED_U, i64 1, align 1
+// LLVM: %[[#V2:]] = alloca i64, i64 1, align 8
+// LLVM: %[[#V3:]] = load %struct.NESTED_U, ptr %[[#V1]], align 1
+// LLVM: %[[#V4:]] = load %union.anon.0, ptr %[[#V1]], align 1
+// LLVM: %[[#V5:]] = load %struct.anon.1, ptr %[[#V1]], align 1
+// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[#V2]], ptr %[[#V1]], i64 2, i1 false)
+// LLVM: %[[#V6:]] = load i64, ptr %[[#V2]], align 8
+// LLVM: call void @pass_nested_u(i64 %[[#V6]])
+// LLVM: ret void
+void call_nested_u() {
+  NESTED_U a;
+  pass_nested_u(a);
+}
+
+
+#pragma pack(push)
+#pragma pack(1)
+typedef struct {
+   int f0 : 18;
+   int f1 : 31;
+   int f2 : 5;
+   int f3 : 29;
+   int f4 : 24;
+} PackedS1;
+#pragma pack(pop)
+
+PackedS1 foo(void) {
+  PackedS1 s;
+  return s;
+}
+
+void bar(void) {
+  PackedS1 y = foo();
+}
+
+// CHECK: cir.func {{.*}} @bar
+// CHECK: %[[#V0:]] = cir.alloca !rec_PackedS1, !cir.ptr<!rec_PackedS1>, ["y", init]
+// CHECK: %[[#V1:]] = cir.alloca !cir.array<!u64i x 2>, !cir.ptr<!cir.array<!u64i x 2>>, ["tmp"]
+// CHECK: %[[#V2:]] = cir.call @foo() : () -> !cir.array<!u64i x 2>
+// CHECK: cir.store{{.*}} %[[#V2]], %[[#V1]] : !cir.array<!u64i x 2>, !cir.ptr<!cir.array<!u64i x 2>>
+// CHECK: %[[#V3:]] = cir.cast bitcast %[[#V1]] : !cir.ptr<!cir.array<!u64i x 2>> -> !cir.ptr<!void>
+// CHECK: %[[#V4:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_PackedS1> -> !cir.ptr<!void>
+// CHECK: %[[#V5:]] = cir.const #cir.int<14> : !u64i
+// CHECK: cir.libc.memcpy %[[#V5]] bytes from %[[#V3]] to %[[#V4]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+
+// LLVML: void @bar
+// LLVM:  %[[#V1:]] = alloca %struct.PackedS1, i64 1, align 1
+// LLVM:  %[[#V2:]] = alloca [2 x i64], i64 1, align 8
+// LLVM:  %[[#V3:]] = call [2 x i64] @foo()
+// LLVM:  store [2 x i64] %[[#V3]], ptr %[[#V2]], align 8
+// LLVM:  call void @llvm.memcpy.p0.p0.i64(ptr %[[#V1]], ptr %[[#V2]], i64 14, i1 false)
+
+
+#pragma pack(push)
+#pragma pack(1)
+typedef struct {
+   short  f0;
+   int  f1;
+} PackedS2;
+#pragma pack(pop)
+
+PackedS2 g[3] = {{1,2},{3,4},{5,6}};
+
+void baz(PackedS2 a) {
+  short *x = &g[2].f0;
+  (*x) = a.f0;
+}
+
+void qux(void) {
+  const PackedS2 *s1 = &g[1];
+  baz(*s1);
+}
+
+// check source of memcpy
+// CHECK: cir.func {{.*}} @qux
+// CHECK: %[[#V0:]] = cir.alloca !cir.ptr<!rec_PackedS2>, !cir.ptr<!cir.ptr<!rec_PackedS2>>, ["s1", init]
+// CHECK: %[[#V1:]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["tmp"]
+// CHECK: %[[#V2:]] = cir.get_global @g : !cir.ptr<!cir.array<!rec_PackedS2 x 3>>
+// CHECK: %[[#V3:]] = cir.const #cir.int<1> : !s32i
+// CHECK: %[[#V5:]] = cir.get_element %[[#V2]][%[[#V3]]] : (!cir.ptr<!cir.array<!rec_PackedS2 x 3>>, !s32i) -> !cir.ptr<!rec_PackedS2>
+// CHECK: cir.store{{.*}} %[[#V5]], %[[#V0]] : !cir.ptr<!rec_PackedS2>, !cir.ptr<!cir.ptr<!rec_PackedS2>>
+// CHECK: %[[#V6:]] = cir.load deref{{.*}}  %[[#V0]] : !cir.ptr<!cir.ptr<!rec_PackedS2>>, !cir.ptr<!rec_PackedS2>
+// CHECK: %[[#V7:]] = cir.cast bitcast %[[#V6]] : !cir.ptr<!rec_PackedS2> -> !cir.ptr<!void>
+// CHECK: %[[#V8:]] = cir.const #cir.int<6> : !u64i
+// CHECK: cir.libc.memcpy %[[#V8]] bytes from %[[#V7]]
+
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// LLVM: void @qux
+// LLVM: %[[#V1:]] = alloca ptr, i64 1, align 8
+// LLVM: %[[#V2:]] = alloca i64, i64 1, align 8
+// LLVM: store ptr getelementptr inbounds nuw (i8, ptr @g, i64 6), ptr %[[#V1]], align 8
+// LLVM: %[[#V3:]] = load ptr, ptr %[[#V1]], align 8
+// LLVM: %[[#V4:]] = load %struct.PackedS2, ptr %[[#V3]], align 1
+// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[#V2]], ptr %[[#V3]], i64 6, i1 false)
diff --git a/clang/test/CIR/Incubator/CallConvLowering/AArch64/aarch64_be-cc-structs.c b/clang/test/CIR/Incubator/CallConvLowering/AArch64/aarch64_be-cc-structs.c
new file mode 100644
index 0000000000000..e579c749d8daf
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/AArch64/aarch64_be-cc-structs.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple aarch64_be-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef struct {
+  int a;
+  int b;
+} __attribute__((alligned (4))) S;
+
+// CHECK: cir.func {{.*@init}}() -> !u64i
+// CHECK:    %[[#V0:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["__retval"] {alignment = 4 : i64}
+// CHECK:    %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CHECK:    %[[#V2:]] = cir.load %[[#V1]] : !cir.ptr<!u64i>, !u64i
+// CHECK:    cir.return %[[#V2]] : !u64i
+S init() {
+  S s;
+  return s;
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/AArch64/basic.cpp b/clang/test/CIR/Incubator/CallConvLowering/AArch64/basic.cpp
new file mode 100644
index 0000000000000..364aee5109622
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/AArch64/basic.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: @_Z4Voidv()
+void Void(void) {
+// CHECK:   cir.call @_Z4Voidv() : () -> ()
+  Void();
+}
+
+// Test call conv lowering for trivial usinged integer cases.
+
+// CHECK: @_Z4Boolb(%arg0: !cir.bool loc({{.+}})) -> !cir.bool
+bool Bool(bool a) {
+// CHECK:   cir.call @_Z4Boolb({{.+}}) : (!cir.bool) -> !cir.bool
+  return Bool(a);
+}
+
+// CHECK: cir.func {{.*}} @_Z5UCharh(%arg0: !u8i loc({{.+}})) -> !u8i
+unsigned char UChar(unsigned char c) {
+  // CHECK: cir.call @_Z5UCharh(%2) : (!u8i) -> !u8i
+  return UChar(c);
+}
+// CHECK: cir.func {{.*}} @_Z6UShortt(%arg0: !u16i loc({{.+}})) -> !u16i
+unsigned short UShort(unsigned short s) {
+  // CHECK: cir.call @_Z6UShortt(%2) : (!u16i) -> !u16i
+  return UShort(s);
+}
+// CHECK: cir.func {{.*}} @_Z4UIntj(%arg0: !u32i loc({{.+}})) -> !u32i
+unsigned int UInt(unsigned int i) {
+  // CHECK: cir.call @_Z4UIntj(%2) : (!u32i) -> !u32i
+  return UInt(i);
+}
+// CHECK: cir.func {{.*}} @_Z5ULongm(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long ULong(unsigned long l) {
+  // CHECK: cir.call @_Z5ULongm(%2) : (!u64i) -> !u64i
+  return ULong(l);
+}
+// CHECK: cir.func {{.*}} @_Z9ULongLongy(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long long ULongLong(unsigned long long l) {
+  // CHECK: cir.call @_Z9ULongLongy(%2) : (!u64i) -> !u64i
+  return ULongLong(l);
+}
+
+
+/// Test call conv lowering for trivial signed cases. ///
+
+// CHECK: cir.func {{.*}} @_Z4Chara(%arg0: !s8i loc({{.+}})) -> !s8i
+char Char(signed char c) {
+  // CHECK: cir.call @_Z4Chara(%{{.+}}) : (!s8i) -> !s8i
+  return Char(c);
+}
+// CHECK: cir.func {{.*}} @_Z5Shorts(%arg0: !s16i loc({{.+}})) -> !s16i
+short Short(short s) {
+  // CHECK: cir.call @_Z5Shorts(%{{.+}}) : (!s16i) -> !s16i
+  return Short(s);
+}
+// CHECK: cir.func {{.*}} @_Z3Inti(%arg0: !s32i loc({{.+}})) -> !s32i
+int Int(int i) {
+  // CHECK: cir.call @_Z3Inti(%{{.+}}) : (!s32i) -> !s32i
+  return Int(i);
+}
+// CHECK: cir.func {{.*}} @_Z4Longl(%arg0: !s64i loc({{.+}})) -> !s64i
+long Long(long l) {
+  // CHECK: cir.call @_Z4Longl(%{{.+}}) : (!s64i) -> !s64i
+  return Long(l);
+}
+// CHECK: cir.func {{.*}} @_Z8LongLongx(%arg0: !s64i loc({{.+}})) -> !s64i
+long long LongLong(long long l) {
+  // CHECK: cir.call @_Z8LongLongx(%{{.+}}) : (!s64i) -> !s64i
+  return LongLong(l);
+}
+
+
+/// Test call conv lowering for floating point. ///
+
+// CHECK: cir.func {{.*}} @_Z5Floatf(%arg0: !cir.float loc({{.+}})) -> !cir.float
+float Float(float f) {
+  // cir.call @_Z5Floatf(%{{.+}}) : (!cir.float) -> !cir.float
+  return Float(f);
+}
+// CHECK: cir.func {{.*}} @_Z6Doubled(%arg0: !cir.double loc({{.+}})) -> !cir.double
+double Double(double d) {
+  // cir.call @_Z6Doubled(%{{.+}}) : (!cir.double) -> !cir.double
+  return Double(d);
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/AArch64/ptr-fields.c b/clang/test/CIR/Incubator/CallConvLowering/AArch64/ptr-fields.c
new file mode 100644
index 0000000000000..79e4e437d7915
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/AArch64/ptr-fields.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu  -fclangir -fclangir-call-conv-lowering -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu  -fclangir -emit-llvm %s -o %t.ll -fclangir-call-conv-lowering
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+typedef int (*myfptr)(int);
+
+typedef struct {
+  myfptr f;
+} A;
+
+int foo(int x) { return x; }
+
+// CIR: cir.func {{.*}} @passA(%arg0: !u64i
+// CIR: %[[#V0:]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, [""] {alignment = 4 : i64}
+// CIR: %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_A> -> !cir.ptr<!u64i>
+// CIR: cir.store{{.*}} %arg0, %[[#V1]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[#V2:]] = cir.get_global @foo : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+// CIR: %[[#V3:]] = cir.get_member %[[#V0]][0] {name = "f"} : !cir.ptr<!rec_A> -> !cir.ptr<!cir.ptr<!cir.func<(!s32i) -> !s32i>>>
+// CIR: cir.store{{.*}} %[[#V2]], %[[#V3]] : !cir.ptr<!cir.func<(!s32i) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!s32i) -> !s32i>>>
+// CIR: cir.return
+
+// LLVM: void @passA(i64 %[[#V0:]])
+// LLVM: %[[#V2:]] = alloca %struct.A, i64 1, align 4
+// LLVM: store i64 %[[#V0]], ptr %[[#V2]], align 8
+// LLVM: %[[#V3:]] = getelementptr %struct.A, ptr %[[#V2]], i32 0, i32 0
+// LLVM: store ptr @foo, ptr %[[#V3]], align 8
+// LLVM: ret void
+void passA(A a) { a.f = foo; }
+
+typedef struct {
+  int a;
+} S_1;
+
+typedef struct {
+  S_1* s;
+} S_2;
+
+// CIR: cir.func {{.*}} @passB(%arg0: !u64i
+// CIR: %[[#V0:]]  = cir.alloca !rec_S_2, !cir.ptr<!rec_S_2>, [""] {alignment = 4 : i64}
+// CIR: %[[#V1:]]  = cir.cast bitcast %[[#V0]]  : !cir.ptr<!rec_S_2> -> !cir.ptr<!u64i>
+// CIR: cir.store{{.*}} %arg0, %[[#V1]]  : !u64i, !cir.ptr<!u64i>
+// CIR: cir.return
+
+// LLVM: void @passB(i64 %[[#V0:]])
+// LLVM: %[[#V2:]]  = alloca %struct.S_2, i64 1, align 4
+// LLVM: store i64 %[[#V0]], ptr %[[#V2]], align 8
+// LLVM: ret void
+void passB(S_2 s) {}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/AArch64/struct.c b/clang/test/CIR/Incubator/CallConvLowering/AArch64/struct.c
new file mode 100644
index 0000000000000..d542502bf33ab
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/AArch64/struct.c
@@ -0,0 +1,167 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -fclangir-call-conv-lowering
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+typedef struct {
+  int a, b;
+} S;
+
+// CIR: cir.func {{.*}} @init(%arg0: !u64i
+// CIR: %[[#V0:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, [""] {alignment = 4 : i64}
+// CIR: %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CIR: cir.store{{.*}} %arg0, %[[#V1]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[#V2:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["__retval"] {alignment = 4 : i64}
+// CIR: %[[#V3:]] = cir.const #cir.int<1> : !s32i
+// CIR: %[[#V4:]] = cir.get_member %[[#V0]][0] {name = "a"} : !cir.ptr<!rec_S> -> !cir.ptr<!s32i>
+// CIR: cir.store{{.*}} %[[#V3]], %[[#V4]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[#V5:]] = cir.const #cir.int<2> : !s32i
+// CIR: %[[#V6:]] = cir.get_member %[[#V0]][1] {name = "b"} : !cir.ptr<!rec_S> -> !cir.ptr<!s32i>
+// CIR: cir.store{{.*}} %[[#V5]], %[[#V6]] : !s32i, !cir.ptr<!s32i>
+// CIR: cir.copy %[[#V0]] to %[[#V2]] : !cir.ptr<!rec_S>
+// CIR: %[[#V7:]] = cir.cast bitcast %[[#V2]] : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CIR: %[[#V8:]] = cir.load %[[#V7]] : !cir.ptr<!u64i>, !u64i
+// CIR: cir.return %[[#V8]] : !u64i
+
+// LLVM: @init(i64 %[[#V0:]])
+// LLVM: %[[#V2:]] = alloca %struct.S, i64 1, align 4
+// LLVM: store i64 %[[#V0]], ptr %[[#V2]], align 8
+// LLVM: %[[#V3:]] = alloca %struct.S, i64 1, align 4
+// LLVM: %[[#V4:]] = getelementptr %struct.S, ptr %[[#V2]], i32 0, i32 0
+// LLVM: store i32 1, ptr %[[#V4]], align 4
+// LLVM: %[[#V5:]] = getelementptr %struct.S, ptr %[[#V2]], i32 0, i32 1
+// LLVM: store i32 2, ptr %[[#V5]], align 4
+// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[#V3]], ptr %[[#V2]], i32 8, i1 false)
+// LLVM: %[[#V6:]] = load i64, ptr %[[#V3]], align 8
+// LLVM: ret i64 %[[#V6]]
+S init(S s) {
+  s.a = 1;
+  s.b = 2;
+  return s;
+}
+
+// CIR: cir.func {{.*}} @foo1
+// CIR: %[[#V0:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s"]
+// CIR: %[[#V1:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["tmp"] {alignment = 4 : i64}
+// CIR: %[[#V2:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CIR: %[[#V3:]] = cir.load %[[#V2]] : !cir.ptr<!u64i>, !u64i
+// CIR: %[[#V4:]] = cir.call @init(%[[#V3]]) : (!u64i) -> !u64i
+// CIR: %[[#V5:]] = cir.cast bitcast %[[#V1]] : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CIR: cir.store{{.*}} %[[#V4]], %[[#V5]] : !u64i, !cir.ptr<!u64i>
+// CIR: cir.copy %[[#V1]] to %[[#V0]] : !cir.ptr<!rec_S>
+// CIR: cir.return
+
+// LLVM: @foo1()
+// LLVM: %[[#V1:]] = alloca %struct.S, i64 1, align 4
+// LLVM: %[[#V2:]] = alloca %struct.S, i64 1, align 4
+// LLVM: %[[#V3:]] = load i64, ptr %[[#V1]], align 8
+// LLVM: %[[#V4:]] = call i64 @init(i64 %[[#V3]])
+// LLVM: store i64 %[[#V4]], ptr %[[#V2]], align 8
+// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[#V1]], ptr %[[#V2]], i32 8, i1 false)
+void foo1() {
+  S s;
+  s = init(s);
+}
+
+// CIR: cir.func {{.*}} @foo2(%arg0: !u64i
+// CIR: %[[#V0:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, [""] {alignment = 4 : i64}
+// CIR: %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CIR: cir.store{{.*}} %arg0, %[[#V1]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[#V2:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["__retval"] {alignment = 4 : i64}
+// CIR: %[[#V3:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s2", init]
+// CIR: %[[#V4:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["tmp"] {alignment = 4 : i64}
+// CIR: %[[#V5:]] = cir.const #cir.const_record<{#cir.int<1> : !s32i, #cir.int<2> : !s32i}> : !rec_S
+// CIR: cir.store{{.*}} %[[#V5]], %[[#V3]] : !rec_S, !cir.ptr<!rec_S>
+// CIR: %[[#V6:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CIR: %[[#V7:]] = cir.load %[[#V6]] : !cir.ptr<!u64i>, !u64i
+// CIR: %[[#V8:]] = cir.call @foo2(%[[#V7]]) : (!u64i) -> !u64i
+// CIR: %[[#V9:]] = cir.cast bitcast %[[#V4]] : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CIR: cir.store{{.*}} %[[#V8]], %[[#V9]] : !u64i, !cir.ptr<!u64i>
+// CIR: cir.copy %[[#V4]] to %[[#V0]] : !cir.ptr<!rec_S>
+// CIR: cir.copy %[[#V0]] to %[[#V2]] : !cir.ptr<!rec_S>
+// CIR: %[[#V10:]] = cir.cast bitcast %[[#V2]] : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CIR: %[[#V11:]] = cir.load %[[#V10]] : !cir.ptr<!u64i>, !u64i
+// CIR: cir.return %[[#V11]] : !u64i
+
+// LLVM: @foo2(i64 %[[#V0:]])
+// LLVM: %[[#V2:]] = alloca %struct.S, i64 1, align 4
+// LLVM: store i64 %[[#V0]], ptr %[[#V2]], align 8
+// LLVM: %[[#V3:]] = alloca %struct.S, i64 1, align 4
+// LLVM: %[[#V4:]] = alloca %struct.S, i64 1, align 4
+// LLVM: %[[#V5:]] = alloca %struct.S, i64 1, align 4
+// LLVM: store %struct.S { i32 1, i32 2 }, ptr %[[#V4]], align 4
+// LLVM: %[[#V6:]] = load i64, ptr %[[#V2]], align 8
+// LLVM: %[[#V7:]] = call i64 @foo2(i64 %[[#V6]])
+// LLVM: store i64 %[[#V7]], ptr %[[#V5]], align 8
+// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[#V2]], ptr %[[#V5]], i32 8, i1 false)
+// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[#V3]], ptr %[[#V2]], i32 8, i1 false)
+// LLVM: %[[#V8:]] = load i64, ptr %[[#V3]], align 8
+// LLVM: ret i64 %[[#V8]]
+S foo2(S s1) {
+  S s2 = {1, 2};
+  s1 = foo2(s1);
+  return s1;
+}
+
+typedef struct {
+  char a;
+  char b;
+} S2;
+
+// CIR: cir.func {{.*}} @init2(%arg0: !u16i
+// CIR: %[[#V0:]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, [""] {alignment = 4 : i64}
+// CIR: %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: cir.store{{.*}} %arg0, %[[#V1]] : !u16i, !cir.ptr<!u16i>
+// CIR: %[[#V2:]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, ["__retval"] {alignment = 1 : i64}
+// CIR: %[[#V3:]] = cir.const #cir.int<1> : !s32i
+// CIR: %[[#V4:]] = cir.cast integral %[[#V3]] : !s32i -> !s8i
+// CIR: %[[#V5:]] = cir.get_member %[[#V0]][0] {name = "a"} : !cir.ptr<!rec_S2> -> !cir.ptr<!s8i>
+// CIR: cir.store{{.*}} %[[#V4]], %[[#V5]] : !s8i, !cir.ptr<!s8i>
+// CIR: %[[#V6:]] = cir.const #cir.int<2> : !s32i
+// CIR: %[[#V7:]] = cir.cast integral %[[#V6]] : !s32i -> !s8i
+// CIR: %[[#V8:]] = cir.get_member %[[#V0]][1] {name = "b"} : !cir.ptr<!rec_S2> -> !cir.ptr<!s8i>
+// CIR: cir.store{{.*}} %[[#V7]], %[[#V8]] : !s8i, !cir.ptr<!s8i>
+// CIR: cir.copy %[[#V0]] to %[[#V2]] : !cir.ptr<!rec_S2>
+// CIR: %[[#V9:]] = cir.cast bitcast %[[#V2]] : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: %[[#V10:]] = cir.load %[[#V9]] : !cir.ptr<!u16i>, !u16i
+// CIR: cir.return %[[#V10]] : !u16i
+
+// LLVM: @init2(i16 %[[#V0:]])
+// LLVM: %[[#V2:]] = alloca %struct.S2, i64 1, align 4
+// LLVM: store i16 %[[#V0]], ptr %[[#V2]], align 2
+// LLVM: %[[#V3:]] = alloca %struct.S2, i64 1, align 1
+// LLVM: %[[#V4:]] = getelementptr %struct.S2, ptr %[[#V2]], i32 0, i32 0
+// LLVM: store i8 1, ptr %[[#V4]], align 1
+// LLVM: %[[#V5:]] = getelementptr %struct.S2, ptr %[[#V2]], i32 0, i32 1
+// LLVM: store i8 2, ptr %[[#V5]], align 1
+// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[#V3]], ptr %[[#V2]], i32 2, i1 false)
+// LLVM: %[[#V6:]] = load i16, ptr %[[#V3]], align 2
+// LLVM: ret i16 %[[#V6]]
+S2 init2(S2 s) {
+  s.a = 1;
+  s.b = 2;
+  return s;
+}
+
+// CIR: cir.func {{.*}} @foo3()
+// CIR: %[[#V0:]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, ["s"]
+// CIR: %[[#V1:]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, ["tmp"] {alignment = 1 : i64}
+// CIR: %[[#V2:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: %[[#V3:]] = cir.load %[[#V2]] : !cir.ptr<!u16i>, !u16i
+// CIR: %[[#V4:]] = cir.call @init2(%[[#V3]]) : (!u16i) -> !u16i
+// CIR: %[[#V5:]] = cir.cast bitcast %[[#V1]] : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: cir.store{{.*}} %[[#V4]], %[[#V5]] : !u16i, !cir.ptr<!u16i>
+// CIR: cir.copy %[[#V1]] to %[[#V0]] : !cir.ptr<!rec_S2>
+// CIR: cir.return
+
+// LLVM: @foo3()
+// LLVM: %[[#V1:]] = alloca %struct.S2, i64 1, align 1
+// LLVM: %[[#V2:]] = alloca %struct.S2, i64 1, align 1
+// LLVM: %[[#V3:]] = load i16, ptr %[[#V1]], align 2
+// LLVM: %[[#V4:]] = call i16 @init2(i16 %[[#V3]])
+// LLVM: store i16 %[[#V4]], ptr %[[#V2]], align 2
+// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[#V1]], ptr %[[#V2]], i32 2, i1 false)
+void foo3() {
+  S2 s;
+  s = init2(s);
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/AArch64/union.c b/clang/test/CIR/Incubator/CallConvLowering/AArch64/union.c
new file mode 100644
index 0000000000000..957ec1f535b64
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/AArch64/union.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu  -fclangir -fclangir-call-conv-lowering -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu  -fclangir -emit-llvm %s -o %t.ll -fclangir-call-conv-lowering
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+// CIR: !rec_U = !cir.record<union "U" {!s32i, !s32i, !s32i}>
+// LLVM: %union.U = type { i32 }
+typedef union {
+  int a, b, c;
+} U;
+
+// CIR: cir.func {{.*}} @foo(%arg0: !u64i
+// CIR: %[[#V0:]] = cir.alloca !rec_U, !cir.ptr<!rec_U>, [""] {alignment = 4 : i64}
+// CIR: %[[#V1:]] = cir.cast integral %arg0 : !u64i -> !u32i
+// CIR: %[[#V2:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_U> -> !cir.ptr<!u32i>
+// CIR: cir.store{{.*}} %[[#V1]], %[[#V2]] : !u32i, !cir.ptr<!u32i>
+// CIR: cir.return
+
+// LLVM: void @foo(i64 %[[#V0:]]
+// LLVM: %[[#V2:]] = alloca %union.U, i64 1, align 4
+// LLVM: %[[#V3:]] = trunc i64 %[[#V0]] to i32
+// LLVM: store i32 %[[#V3]], ptr %[[#V2]], align 4
+// LLVM: ret void
+void foo(U u) {}
+
+// CIR: cir.func {{.*}} @init() -> !u32i
+// CIR: %[[#V0:]] = cir.alloca !rec_U, !cir.ptr<!rec_U>, ["__retval"] {alignment = 4 : i64}
+// CIR: %[[#V1:]] = cir.load{{.*}} %[[#V0]] : !cir.ptr<!rec_U>, !rec_U
+// CIR: %[[#V2:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_U> -> !cir.ptr<!u32i>
+// CIR: %[[#V3:]] = cir.load{{.*}} %[[#V2]] : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return %[[#V3]] : !u32i
+
+// LLVM: i32 @init()
+// LLVM: %[[#V1:]] = alloca %union.U, i64 1, align 4
+// LLVM: %[[#V2:]] = load %union.U, ptr %[[#V1]], align 4
+// LLVM: %[[#V3:]] = load i32, ptr %[[#V1]], align 4
+// LLVM: ret i32 %[[#V3]]
+U init() {
+  U u;
+  return u;
+}
+
+typedef union {
+
+  struct {
+    short a;
+    char b;
+    char c;
+  };
+
+  int x;
+} A;
+
+void passA(A x) {}
+
+// CIR: cir.func {{.*@callA}}()
+// CIR:   %[[#V0:]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["x"] {alignment = 4 : i64}
+// CIR:   %[[#V1:]] = cir.cast bitcast %[[#V0:]] : !cir.ptr<!rec_A> -> !cir.ptr<!s32i>
+// CIR:   %[[#V2:]] = cir.load %[[#V1]] : !cir.ptr<!s32i>, !s32i
+// CIR:   %[[#V3:]] = cir.cast integral %[[#V2]] : !s32i -> !u64i
+// CIR:   cir.call @passA(%[[#V3]]) : (!u64i) -> ()
+
+// LLVM: void @callA()
+// LLVM:   %[[#V0:]] = alloca %union.A, i64 1, align 4
+// LLVM:   %[[#V1:]] = load i32, ptr %[[#V0]], align 4
+// LLVM:   %[[#V2:]] = sext i32 %[[#V1]] to i64
+// LLVM:   call void @passA(i64 %[[#V2]])
+void callA() {
+  A x;
+  passA(x);
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/AArch64/vector-fp16.c b/clang/test/CIR/Incubator/CallConvLowering/AArch64/vector-fp16.c
new file mode 100644
index 0000000000000..ee334fa0a2989
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/AArch64/vector-fp16.c
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +fullfp16 \
+// RUN:   -fclangir -fclangir-call-conv-lowering -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +fullfp16 \
+// RUN:   -fclangir -fclangir-call-conv-lowering -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +fullfp16 \
+// RUN:   -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef _Float16 half4 __attribute__((ext_vector_type(4)));
+
+// CIR: cir.func {{.*}} @test_half(%arg0: !cir.f16 {{.*}}) -> !cir.f16
+// LLVM: define {{.*}}half @test_half(half %{{.*}})
+// OGCG: define {{.*}}half @test_half(half {{.*}}%{{.*}})
+half test_half(half a) {
+  return a;
+}
+
+// CIR: cir.func {{.*}} @test_half_add(%arg0: !cir.f16 {{.*}}, %arg1: !cir.f16 {{.*}}) -> !cir.f16
+// LLVM: define {{.*}}half @test_half_add(half %{{.*}}, half %{{.*}})
+// OGCG: define {{.*}}half @test_half_add(half {{.*}}%{{.*}}, half {{.*}}%{{.*}})
+half test_half_add(half a, half b) {
+  return a + b;
+}
+
+// CIR: cir.func {{.*}} @test_float4(%arg0: !cir.vector<!cir.float x 4> {{.*}}) -> !cir.vector<!cir.float x 4>
+// LLVM: define {{.*}}<4 x float> @test_float4(<4 x float> %{{.*}})
+// OGCG: define {{.*}}<4 x float> @test_float4(<4 x float> {{.*}}%{{.*}})
+float4 test_float4(float4 a) {
+  return a;
+}
+
+// CIR: cir.func {{.*}} @test_float4_add(%arg0: !cir.vector<!cir.float x 4> {{.*}}, %arg1: !cir.vector<!cir.float x 4> {{.*}}) -> !cir.vector<!cir.float x 4>
+// LLVM: define {{.*}}<4 x float> @test_float4_add(<4 x float> %{{.*}}, <4 x float> %{{.*}})
+// OGCG: define {{.*}}<4 x float> @test_float4_add(<4 x float> {{.*}}%{{.*}}, <4 x float> {{.*}}%{{.*}})
+float4 test_float4_add(float4 a, float4 b) {
+  return a + b;
+}
+
+// CIR: cir.func {{.*}} @test_int4(%arg0: !cir.vector<!s32i x 4> {{.*}}) -> !cir.vector<!s32i x 4>
+// LLVM: define {{.*}}<4 x i32> @test_int4(<4 x i32> %{{.*}})
+// OGCG: define {{.*}}<4 x i32> @test_int4(<4 x i32> {{.*}}%{{.*}})
+int4 test_int4(int4 a) {
+  return a;
+}
+
+// CIR: cir.func {{.*}} @test_int4_add(%arg0: !cir.vector<!s32i x 4> {{.*}}, %arg1: !cir.vector<!s32i x 4> {{.*}}) -> !cir.vector<!s32i x 4>
+// LLVM: define {{.*}}<4 x i32> @test_int4_add(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+// OGCG: define {{.*}}<4 x i32> @test_int4_add(<4 x i32> {{.*}}%{{.*}}, <4 x i32> {{.*}}%{{.*}})
+int4 test_int4_add(int4 a, int4 b) {
+  return a + b;
+}
+
+// CIR: cir.func {{.*}} @test_half4(%arg0: !cir.vector<!cir.f16 x 4> {{.*}}) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define {{.*}}<4 x half> @test_half4(<4 x half> %{{.*}})
+// OGCG: define {{.*}}<4 x half> @test_half4(<4 x half> {{.*}}%{{.*}})
+half4 test_half4(half4 a) {
+  return a;
+}
+
+// CIR: cir.func {{.*}} @test_half4_add(%arg0: !cir.vector<!cir.f16 x 4> {{.*}}, %arg1: !cir.vector<!cir.f16 x 4> {{.*}}) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define {{.*}}<4 x half> @test_half4_add(<4 x half> %{{.*}}, <4 x half> %{{.*}})
+// OGCG: define {{.*}}<4 x half> @test_half4_add(<4 x half> {{.*}}%{{.*}}, <4 x half> {{.*}}%{{.*}})
+half4 test_half4_add(half4 a, half4 b) {
+  return a + b;
+}
+
+// CIR: cir.func {{.*}} @test_void_ptr(%arg0: !cir.ptr<!void> {{.*}}) -> !cir.ptr<!void>
+// LLVM: define {{.*}}ptr @test_void_ptr(ptr %{{.*}})
+// OGCG: define {{.*}}ptr @test_void_ptr(ptr {{.*}}%{{.*}})
+void *test_void_ptr(void *p) {
+  return p;
+}
+
+// CIR: cir.func {{.*}} @test_void_ptr_arith(%arg0: !cir.ptr<!void> {{.*}}, %arg1: !s64i {{.*}}) -> !cir.ptr<!void>
+// LLVM: define {{.*}}ptr @test_void_ptr_arith(ptr %{{.*}}, i64 %{{.*}})
+// OGCG: define {{.*}}ptr @test_void_ptr_arith(ptr {{.*}}%{{.*}}, i64 {{.*}}%{{.*}})
+void *test_void_ptr_arith(void *p, long offset) {
+  return (char*)p + offset;
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/NVPTX/basic.cpp b/clang/test/CIR/Incubator/CallConvLowering/NVPTX/basic.cpp
new file mode 100644
index 0000000000000..d4af31c3c880b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/NVPTX/basic.cpp
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -std=c++20 -triple nvptx-nvidia-cuda -fclangir \
+// RUN:            -fclangir-call-conv-lowering -emit-cir-flat -mmlir \
+// RUN:            --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test call conv lowering for trivial cases. //
+
+// CHECK: @_Z4Voidv()
+void Void(void) {
+  // CHECK:   cir.call @_Z4Voidv() : () -> ()
+  Void();
+}
+
+// CHECK: @_Z4Boolb(%arg0: !cir.bool {cir.zeroext} loc({{.+}})) -> (!cir.bool {cir.zeroext})
+bool Bool(bool a) {
+  // CHECK:   cir.call @_Z4Boolb({{.+}}) : (!cir.bool) -> !cir.bool
+  return Bool(a);
+}
+
+// CHECK: cir.func {{.*}} @_Z5UCharh(%arg0: !u8i {cir.zeroext} loc({{.+}})) -> (!u8i {cir.zeroext})
+unsigned char UChar(unsigned char c) {
+  // CHECK: cir.call @_Z5UCharh(%{{.+}}) : (!u8i) -> !u8i
+  return UChar(c);
+}
+
+// CHECK: cir.func {{.*}} @_Z6UShortt(%arg0: !u16i {cir.zeroext} loc({{.+}})) -> (!u16i {cir.zeroext})
+unsigned short UShort(unsigned short s) {
+  // CHECK: cir.call @_Z6UShortt(%{{.+}}) : (!u16i) -> !u16i
+  return UShort(s);
+}
+
+// CHECK: cir.func {{.*}} @_Z4UIntj(%arg0: !u32i loc({{.+}})) -> !u32i
+unsigned int UInt(unsigned int i) {
+  // CHECK: cir.call @_Z4UIntj(%{{.+}}) : (!u32i) -> !u32i
+  return UInt(i);
+}
+
+// CHECK: cir.func {{.*}} @_Z5ULongm(%arg0: !u32i loc({{.+}})) -> !u32i
+unsigned long ULong(unsigned long l) {
+  // CHECK: cir.call @_Z5ULongm(%{{.+}}) : (!u32i) -> !u32i
+  return ULong(l);
+}
+
+// CHECK: cir.func {{.*}} @_Z9ULongLongy(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long long ULongLong(unsigned long long l) {
+  // CHECK: cir.call @_Z9ULongLongy(%{{.+}}) : (!u64i) -> !u64i
+  return ULongLong(l);
+}
+
+// CHECK: cir.func {{.*}} @_Z4Chara(%arg0: !s8i {cir.signext} loc({{.+}})) -> (!s8i {cir.signext})
+char Char(signed char c) {
+  // CHECK: cir.call @_Z4Chara(%{{.+}}) : (!s8i) -> !s8i
+  return Char(c);
+}
+
+// CHECK: cir.func {{.*}} @_Z5Shorts(%arg0: !s16i {cir.signext} loc({{.+}})) -> (!s16i {cir.signext})
+short Short(short s) {
+  // CHECK: cir.call @_Z5Shorts(%{{.+}}) : (!s16i) -> !s16i
+  return Short(s);
+}
+
+// CHECK: cir.func {{.*}} @_Z3Inti(%arg0: !s32i loc({{.+}})) -> !s32i
+int Int(int i) {
+  // CHECK: cir.call @_Z3Inti(%{{.+}}) : (!s32i) -> !s32i
+  return Int(i);
+}
+
+// CHECK: cir.func {{.*}} @_Z4Longl(%arg0: !s32i loc({{.+}})) -> !s32i
+long Long(long l) {
+  // CHECK: cir.call @_Z4Longl(%{{.+}}) : (!s32i) -> !s32i
+  return Long(l);
+}
+
+// CHECK: cir.func {{.*}} @_Z8LongLongx(%arg0: !s64i loc({{.+}})) -> !s64i
+long long LongLong(long long l) {
+  // CHECK: cir.call @_Z8LongLongx(%{{.+}}) : (!s64i) -> !s64i
+  return LongLong(l);
+}
+
+
+// Check for structs.
+
+struct Struct {
+  int a, b, c, d, e;
+};
+
+// CHECK: cir.func {{.*}} @_Z10StructFuncv() -> !rec_Struct
+Struct StructFunc() {
+  return { 0, 1, 2, 3, 4 };
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/x86_64/basic.cpp b/clang/test/CIR/Incubator/CallConvLowering/x86_64/basic.cpp
new file mode 100644
index 0000000000000..21e2cb4836980
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/x86_64/basic.cpp
@@ -0,0 +1,208 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test call conv lowering for trivial cases. //
+
+// CHECK: @_Z4Voidv()
+void Void(void) {
+// CHECK:   cir.call @_Z4Voidv() : () -> ()
+  Void();
+}
+
+// Test call conv lowering for trivial zeroext cases.
+
+// Bools are a bit of an odd case in CIR's x86_64 representation: they are considered i8
+// everywhere except in the function return/arguments, where they are considered i1. To
+// match LLVM's behavior, we need to zero-extend them when passing them as arguments.
+
+// CHECK: @_Z4Boolb(%arg0: !cir.bool {cir.zeroext} loc({{.+}})) -> (!cir.bool {cir.zeroext})
+bool Bool(bool a) {
+// CHECK:   cir.call @_Z4Boolb({{.+}}) : (!cir.bool) -> !cir.bool
+  return Bool(a);
+}
+
+// CHECK: cir.func {{.*}} @_Z5UCharh(%arg0: !u8i {cir.zeroext} loc({{.+}})) -> (!u8i {cir.zeroext})
+unsigned char UChar(unsigned char c) {
+  // CHECK: cir.call @_Z5UCharh(%2) : (!u8i) -> !u8i
+  return UChar(c);
+}
+// CHECK: cir.func {{.*}} @_Z6UShortt(%arg0: !u16i {cir.zeroext} loc({{.+}})) -> (!u16i {cir.zeroext})
+unsigned short UShort(unsigned short s) {
+  // CHECK: cir.call @_Z6UShortt(%2) : (!u16i) -> !u16i
+  return UShort(s);
+}
+// CHECK: cir.func {{.*}} @_Z4UIntj(%arg0: !u32i loc({{.+}})) -> !u32i
+unsigned int UInt(unsigned int i) {
+  // CHECK: cir.call @_Z4UIntj(%2) : (!u32i) -> !u32i
+  return UInt(i);
+}
+// CHECK: cir.func {{.*}} @_Z5ULongm(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long ULong(unsigned long l) {
+  // CHECK: cir.call @_Z5ULongm(%2) : (!u64i) -> !u64i
+  return ULong(l);
+}
+// CHECK: cir.func {{.*}} @_Z9ULongLongy(%arg0: !u64i loc({{.+}})) -> !u64i
+unsigned long long ULongLong(unsigned long long l) {
+  // CHECK: cir.call @_Z9ULongLongy(%2) : (!u64i) -> !u64i
+  return ULongLong(l);
+}
+
+/// Test call conv lowering for trivial signext cases. ///
+
+// CHECK: cir.func {{.*}} @_Z4Chara(%arg0: !s8i {cir.signext} loc({{.+}})) -> (!s8i {cir.signext})
+char Char(signed char c) {
+  // CHECK: cir.call @_Z4Chara(%{{.+}}) : (!s8i) -> !s8i
+  return Char(c);
+}
+// CHECK: cir.func {{.*}} @_Z5Shorts(%arg0: !s16i {cir.signext} loc({{.+}})) -> (!s16i {cir.signext})
+short Short(short s) {
+  // CHECK: cir.call @_Z5Shorts(%{{.+}}) : (!s16i) -> !s16i
+  return Short(s);
+}
+// CHECK: cir.func {{.*}} @_Z3Inti(%arg0: !s32i loc({{.+}})) -> !s32i
+int Int(int i) {
+  // CHECK: cir.call @_Z3Inti(%{{.+}}) : (!s32i) -> !s32i
+  return Int(i);
+}
+// CHECK: cir.func {{.*}} @_Z4Longl(%arg0: !s64i loc({{.+}})) -> !s64i
+long Long(long l) {
+  // CHECK: cir.call @_Z4Longl(%{{.+}}) : (!s64i) -> !s64i
+  return Long(l);
+}
+// CHECK: cir.func {{.*}} @_Z8LongLongx(%arg0: !s64i loc({{.+}})) -> !s64i
+long long LongLong(long long l) {
+  // CHECK: cir.call @_Z8LongLongx(%{{.+}}) : (!s64i) -> !s64i
+  return LongLong(l);
+}
+
+/// Test call conv lowering for floating point. ///
+
+// CHECK: cir.func {{.*}} @_Z5Floatf(%arg0: !cir.float loc({{.+}})) -> !cir.float
+float Float(float f) {
+  // cir.call @_Z5Floatf(%{{.+}}) : (!cir.float) -> !cir.float
+  return Float(f);
+}
+// CHECK: cir.func {{.*}} @_Z6Doubled(%arg0: !cir.double loc({{.+}})) -> !cir.double
+double Double(double d) {
+  // cir.call @_Z6Doubled(%{{.+}}) : (!cir.double) -> !cir.double
+  return Double(d);
+}
+
+
+/// Test call conv lowering for struct type coercion scenarios. ///
+
+struct S1 {
+  int a, b;
+};
+
+
+/// Validate coerced argument and cast it to the expected type.
+
+/// Cast arguments to the expected type.
+// CHECK: %[[#V0:]] = cir.alloca !rec_S1, !cir.ptr<!rec_S1>, [""] {alignment = 4 : i64}
+// CHECK: %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S1> -> !cir.ptr<!u64i>
+// CHECK: cir.store{{.*}} %arg0, %[[#V1]] : !u64i, !cir.ptr<!u64i>
+// CHECK: %[[#V2:]] = cir.alloca !rec_S1, !cir.ptr<!rec_S1>, ["__retval"] {alignment = 4 : i64}
+// CHECK: %[[#V3:]] = cir.alloca !rec_S1, !cir.ptr<!rec_S1>, ["agg.tmp0"] {alignment = 4 : i64}
+// CHECK: %[[#V4:]] = cir.alloca !rec_S1, !cir.ptr<!rec_S1>, ["agg.tmp1"] {alignment = 4 : i64}
+S1 s1(S1 arg) {
+
+  /// Cast argument and result of the function call to the expected types.
+  // CHECK: %[[#V9:]] = cir.cast bitcast %[[#V3]] : !cir.ptr<!rec_S1> -> !cir.ptr<!u64i>
+  // CHECK: %[[#V10:]] = cir.load{{.*}} %[[#V9]] : !cir.ptr<!u64i>, !u64i
+  // CHECK: %[[#V11:]] = cir.call @_Z2s12S1(%[[#V10]]) : (!u64i) -> !u64i
+  // CHECK: %[[#V12:]] = cir.cast bitcast %[[#V4]] : !cir.ptr<!rec_S1> -> !cir.ptr<!u64i>
+  // CHECK: cir.store{{.*}} %[[#V11]], %[[#V12]] : !u64i, !cir.ptr<!u64i>
+  s1({1, 2});
+
+  // CHECK: %[[#V13:]] = cir.get_member %[[#V2]][0] {name = "a"} : !cir.ptr<!rec_S1> -> !cir.ptr<!s32i>
+  // CHECK: %[[#V14:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: cir.store{{.*}} %[[#V14]], %[[#V13]] : !s32i, !cir.ptr<!s32i>
+  // CHECK: %[[#V15:]] = cir.get_member %[[#V2]][1] {name = "b"} : !cir.ptr<!rec_S1> -> !cir.ptr<!s32i>
+  // CHECK: %[[#V16:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: cir.store{{.*}} %[[#V16]], %[[#V15]] : !s32i, !cir.ptr<!s32i>
+  // CHECK: %[[#V17:]] = cir.cast bitcast %[[#V2]] : !cir.ptr<!rec_S1> -> !cir.ptr<!u64i>
+  // CHECK: %[[#V18:]] = cir.load{{.*}} %[[#V17]] : !cir.ptr<!u64i>, !u64i
+  // CHECK: cir.return %[[#V18]] : !u64i
+  return {1, 2};
+}
+
+/// Test call conv lowering for flattened structs. ///
+
+struct S2 {
+  int x, y, z;
+};
+
+// COM: Function prologue
+
+// CHECK: cir.func {{.*}} @_Z2s22S2(%[[ARG0:[a-z0-9]+]]: !u64i {{.*}}, %[[ARG1:[a-z0-9]+]]: !s32i {{.*}}) -> !rec_anon_struct
+// CHECK: %[[#F0:]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>
+// CHECK: %[[#F1:]] = cir.alloca !rec_anon_struct, !cir.ptr<!rec_anon_struct>
+// CHECK: %[[#F2:]] = cir.get_member %[[#F1]][0]{{.*}} : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!u64i>
+// CHECK: cir.store{{.*}} %[[ARG0]], %[[#F2]] : !u64i, !cir.ptr<!u64i>
+// CHECK: %[[#F3:]] = cir.get_member %[[#F1]][1]{{.*}} : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!s32i>
+// CHECK: cir.store{{.*}} %[[ARG1]], %[[#F3]] : !s32i, !cir.ptr<!s32i>
+// CHECK: %[[#F4:]] = cir.cast bitcast %[[#F1]] : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!void>
+// CHECK: %[[#F5:]] = cir.cast bitcast %[[#F0]] : !cir.ptr<!rec_S2> -> !cir.ptr<!void>
+// CHECK: %[[#F6:]] = cir.const #cir.int<12> : !u64i
+// CHECK: cir.libc.memcpy %[[#F6]] bytes from %[[#F4]] to %[[#F5]]
+S2 s2(S2 arg) {
+  // CHECK: %[[#F7:]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, ["__retval"] {alignment = 4 : i64}
+  // CHECK: %[[#F8:]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, ["agg.tmp0"] {alignment = 4 : i64}
+  // CHECK: %[[#F9:]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, ["agg.tmp1"] {alignment = 4 : i64}
+  // CHECK: %[[#F10:]] = cir.alloca !rec_anon_struct, !cir.ptr<!rec_anon_struct>, ["tmp"] {alignment = 8 : i64}
+  // CHECK: %[[#F11:]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, ["tmp"] {alignment = 4 : i64}
+  // CHECK: %[[#F12:]] = cir.alloca !rec_anon_struct, !cir.ptr<!rec_anon_struct>, ["tmp"] {alignment = 8 : i64}
+  // CHECK: %[[#F13:]] = cir.alloca !rec_anon_struct, !cir.ptr<!rec_anon_struct>, ["tmp"] {alignment = 8 : i64}
+
+  // COM: Construction of S2 { 1, 2, 3 }.
+
+  // CHECK: %[[#F14:]] = cir.get_member %[[#F8]][0] {{.*}} : !cir.ptr<!rec_S2> -> !cir.ptr<!s32i>
+  // CHECK: %[[#F15:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: cir.store{{.*}} %[[#F15]], %[[#F14]] : !s32i, !cir.ptr<!s32i>
+  // CHECK: %[[#F16:]] = cir.get_member %[[#F8]][1] {{.*}} : !cir.ptr<!rec_S2> -> !cir.ptr<!s32i>
+  // CHECK: %[[#F17:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: cir.store{{.*}} %[[#F17]], %[[#F16]] : !s32i, !cir.ptr<!s32i>
+  // CHECK: %[[#F18:]] = cir.get_member %[[#F8]][2] {{.*}} : !cir.ptr<!rec_S2> -> !cir.ptr<!s32i>
+  // CHECK: %[[#F19:]] = cir.const #cir.int<3> : !s32i
+  // CHECK: cir.store{{.*}} %[[#F19]], %[[#F18]] : !s32i, !cir.ptr<!s32i>
+
+  // COM: Flattening of the struct.
+  // COM: { i32, i32, i32 } -> { i64, i32 }.
+
+  // CHECK: %[[#F20:]] = cir.load{{.*}} %[[#F8]] : !cir.ptr<!rec_S2>, !rec_S2
+  // CHECK: cir.store{{.*}} %[[#F20]], %[[#F11]] : !rec_S2, !cir.ptr<!rec_S2>
+  // CHECK: %[[#F21:]] = cir.cast bitcast %[[#F11]] : !cir.ptr<!rec_S2> -> !cir.ptr<!void>
+  // CHECK: %[[#F22:]] = cir.cast bitcast %[[#F10]] : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!void>
+  // CHECK: %[[#F23:]] = cir.const #cir.int<12> : !u64i
+  // CHECK: cir.libc.memcpy %[[#F23]] bytes from %[[#F21]] to %[[#F22]]
+
+  // COM: Function call.
+  // COM: Retrieve the two values in { i64, i32 }.
+
+  // CHECK: %[[#F24:]] = cir.get_member %[[#F10]][0] {name = ""} : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!u64i>
+  // CHECK: %[[#F25:]] = cir.load{{.*}} %[[#F24]] : !cir.ptr<!u64i>, !u64i
+  // CHECK: %[[#F26:]] = cir.get_member %[[#F10]][1] {name = ""} : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!s32i>
+  // CHECK: %[[#F27:]] = cir.load{{.*}} %[[#F26]] : !cir.ptr<!s32i>, !s32i
+  // CHECK: %[[#F28:]] = cir.call @_Z2s22S2(%[[#F25]], %[[#F27]]) : (!u64i, !s32i) -> !rec_anon_struct
+  // CHECK: cir.store{{.*}} %[[#F28]], %[[#F12]] : !rec_anon_struct, !cir.ptr<!rec_anon_struct>
+
+  // CHECK: %[[#F29:]] = cir.cast bitcast %[[#F12]] : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!void>
+  // CHECK: %[[#F30:]] = cir.cast bitcast %[[#F9]] : !cir.ptr<!rec_S2> -> !cir.ptr<!void>
+  // CHECK: %[[#F31:]] = cir.const #cir.int<12> : !u64i
+  // CHECK: cir.libc.memcpy %[[#F31]] bytes from %[[#F29]] to %[[#F30]]
+
+  // COM: Construct S2 { 1, 2, 3 } again.
+  // COM: It has been tested above, so no duplication here.
+
+  // COM: For return, the first two fields of S2 is also coerced.
+
+  // CHECK: %[[#F39:]] = cir.cast bitcast %[[#F7]] : !cir.ptr<!rec_S2> -> !cir.ptr<!void>
+  // CHECK: %[[#F40:]] = cir.cast bitcast %[[#F13]] : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!void>
+  // CHECK: %[[#F41:]] = cir.const #cir.int<12> : !u64i
+  // cir.libc.memcpy %[[#F41]] bytes from %[[#F39]] to %[[#F40]]
+  // CHECK: %[[#F42:]] = cir.load{{.*}} %[[#F13]] : !cir.ptr<!rec_anon_struct>, !rec_anon_struct
+  // cir.return %[[#F42]] : !rec_anon_struct
+  s2({ 1, 2, 3 });
+  return { 1, 2, 3 };
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/x86_64/fptrs.c b/clang/test/CIR/Incubator/CallConvLowering/x86_64/fptrs.c
new file mode 100644
index 0000000000000..38891d81b2c83
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/x86_64/fptrs.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir-flat -fclangir-call-conv-lowering %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fclangir-call-conv-lowering %s -o -| FileCheck %s -check-prefix=LLVM
+
+typedef struct {
+  int a;
+} S;
+
+typedef int (*myfptr)(S);
+
+int foo(S s) { return 42 + s.a; }
+
+// CHECK: cir.func {{.*@bar}}
+// CHECK:   %[[#V0:]] = cir.alloca !cir.ptr<!cir.func<(!rec_S) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!rec_S) -> !s32i>>>, ["a", init]
+// CHECK:   %[[#V1:]] = cir.get_global @foo : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+// CHECK:   %[[#V2:]] = cir.cast bitcast %[[#V1]] : !cir.ptr<!cir.func<(!s32i) -> !s32i>> -> !cir.ptr<!cir.func<(!rec_S) -> !s32i>>
+// CHECK:   cir.store{{.*}} %[[#V2]], %[[#V0]] : !cir.ptr<!cir.func<(!rec_S) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!rec_S) -> !s32i>>>
+void bar() {
+  myfptr a = foo;
+}
+
+// CHECK: cir.func {{.*@baz}}(%arg0: !s32i
+// CHECK:   %[[#V0:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, [""] {alignment = 4 : i64}
+// CHECK:   %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!s32i>
+// CHECK:   cir.store{{.*}} %arg0, %[[#V1]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   %[[#V2:]] = cir.alloca !cir.ptr<!cir.func<(!rec_S) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!rec_S) -> !s32i>>>, ["a", init]
+// CHECK:   %[[#V3:]] = cir.get_global @foo : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+// CHECK:   %[[#V4:]] = cir.cast bitcast %[[#V3]] : !cir.ptr<!cir.func<(!s32i) -> !s32i>> -> !cir.ptr<!cir.func<(!rec_S) -> !s32i>>
+// CHECK:   cir.store{{.*}} %[[#V4]], %[[#V2]] : !cir.ptr<!cir.func<(!rec_S) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!rec_S) -> !s32i>>>
+// CHECK:   %[[#V5:]] = cir.load{{.*}} %[[#V2]] : !cir.ptr<!cir.ptr<!cir.func<(!rec_S) -> !s32i>>>, !cir.ptr<!cir.func<(!rec_S) -> !s32i>>
+// CHECK:   %[[#V6:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_S> -> !cir.ptr<!s32i>
+// CHECK:   %[[#V7:]] = cir.load{{.*}} %[[#V6]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[#V8:]] = cir.cast bitcast %[[#V5]] : !cir.ptr<!cir.func<(!rec_S) -> !s32i>> -> !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+// CHECK:   %[[#V9:]] = cir.call %[[#V8]](%[[#V7]]) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+
+// LLVM: define dso_local void @baz(i32 %0)
+// LLVM:   %[[#V1:]] = alloca %struct.S, i64 1
+// LLVM:   store i32 %0, ptr %[[#V1]]
+// LLVM:   %[[#V2:]] = alloca ptr, i64 1
+// LLVM:   store ptr @foo, ptr %[[#V2]]
+// LLVM:   %[[#V3:]] = load ptr, ptr %[[#V2]]
+// LLVM:   %[[#V4:]] = load i32, ptr %[[#V1]]
+// LLVM:   %[[#V5:]] = call i32 %[[#V3]](i32 %[[#V4]])
+
+void baz(S s) {
+  myfptr a = foo;
+  a(s);
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/x86_64/int128.cpp b/clang/test/CIR/Incubator/CallConvLowering/x86_64/int128.cpp
new file mode 100644
index 0000000000000..ceb098f47981c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/x86_64/int128.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+// CHECK: ![[I128_STRUCT:.+]] = !cir.record<struct  {!s64i, !s64i}>
+
+// CHECK: @_Z5test1nn(%[[ARG0:.+]]: !s64i loc({{.+}}), %[[ARG1:.+]]: !s64i loc({{.+}}), %[[ARG2:.+]]: !s64i loc({{.+}}), %[[ARG3:.+]]: !s64i loc({{.+}})) -> ![[I128_STRUCT]]
+// LLVM: define dso_local { i64, i64 } @_Z5test1nn(i64 %[[#A_LO:]], i64 %[[#A_HI:]], i64 %[[#B_LO:]], i64 %[[#B_HI:]])
+__int128 test1(__int128 a, __int128 b) {
+  //      CHECK: %[[#SLOT_A:]] = cir.alloca !s128i, !cir.ptr<!s128i>
+  // CHECK-NEXT: %[[#SLOT_A2:]] = cir.cast bitcast %[[#SLOT_A]] : !cir.ptr<!s128i> -> !cir.ptr<![[I128_STRUCT]]>
+  // CHECK-NEXT: %[[#SLOT_A_LO:]] = cir.get_member %[[#SLOT_A2]][0] {name = ""} : !cir.ptr<![[I128_STRUCT]]> -> !cir.ptr<!s64i>
+  // CHECK-NEXT: cir.store{{.*}} %[[ARG0]], %[[#SLOT_A_LO]] : !s64i, !cir.ptr<!s64i>
+  // CHECK-NEXT: %[[#SLOT_A_HI:]] = cir.get_member %[[#SLOT_A2]][1] {name = ""} : !cir.ptr<![[I128_STRUCT]]> -> !cir.ptr<!s64i>
+  // CHECK-NEXT: cir.store{{.*}} %arg1, %[[#SLOT_A_HI]] : !s64i, !cir.ptr<!s64i>
+  // CHECK-NEXT: %[[#SLOT_B:]] = cir.alloca !s128i, !cir.ptr<!s128i>
+  // CHECK-NEXT: %[[#SLOT_B2:]] = cir.cast bitcast %[[#SLOT_B]] : !cir.ptr<!s128i> -> !cir.ptr<![[I128_STRUCT]]>
+  // CHECK-NEXT: %[[#SLOT_B_LO:]] = cir.get_member %[[#SLOT_B2]][0] {name = ""} : !cir.ptr<![[I128_STRUCT]]> -> !cir.ptr<!s64i>
+  // CHECK-NEXT: cir.store{{.*}} %arg2, %[[#SLOT_B_LO]] : !s64i, !cir.ptr<!s64i>
+  // CHECK-NEXT: %[[#SLOT_B_HI:]] = cir.get_member %[[#SLOT_B2]][1] {name = ""} : !cir.ptr<![[I128_STRUCT]]> -> !cir.ptr<!s64i>
+  // CHECK-NEXT: cir.store{{.*}} %arg3, %[[#SLOT_B_HI]] : !s64i, !cir.ptr<!s64i>
+  // CHECK-NEXT: %[[#SLOT_RET:]] = cir.alloca !s128i, !cir.ptr<!s128i>, ["__retval"]
+
+  //      LLVM: %[[#A_SLOT:]] = alloca i128, i64 1, align 4
+  // LLVM-NEXT: %[[#A_SLOT_LO:]] = getelementptr { i64, i64 }, ptr %[[#A_SLOT]], i32 0, i32 0
+  // LLVM-NEXT: store i64 %[[#A_LO]], ptr %[[#A_SLOT_LO]], align 8
+  // LLVM-NEXT: %[[#A_SLOT_HI:]] = getelementptr { i64, i64 }, ptr %[[#A_SLOT]], i32 0, i32 1
+  // LLVM-NEXT: store i64 %[[#A_HI]], ptr %[[#A_SLOT_HI]], align 8
+  // LLVM-NEXT: %[[#B_SLOT:]] = alloca i128, i64 1, align 4
+  // LLVM-NEXT: %[[#B_SLOT_LO:]] = getelementptr { i64, i64 }, ptr %[[#B_SLOT]], i32 0, i32 0
+  // LLVM-NEXT: store i64 %[[#B_LO]], ptr %[[#B_SLOT_LO]], align 8
+  // LLVM-NEXT: %[[#B_SLOT_HI:]] = getelementptr { i64, i64 }, ptr %[[#B_SLOT]], i32 0, i32 1
+  // LLVM-NEXT: store i64 %[[#B_HI]], ptr %[[#B_SLOT_HI]], align 8
+  // LLVM-NEXT: %[[#RET_SLOT:]] = alloca i128, i64 1, align 16
+
+  return a + b;
+  //      CHECK: %[[#A:]] = cir.load{{.*}} %[[#SLOT_A]] : !cir.ptr<!s128i>, !s128i
+  // CHECK-NEXT: %[[#B:]] = cir.load{{.*}} %[[#SLOT_B]] : !cir.ptr<!s128i>, !s128i
+  // CHECK-NEXT: %[[#SUM:]] = cir.binop(add, %[[#A]], %[[#B]]) nsw : !s128i
+  // CHECK-NEXT: cir.store{{.*}} %[[#SUM]], %[[#SLOT_RET]] : !s128i, !cir.ptr<!s128i>
+
+  //      LLVM: %[[#A:]] = load i128, ptr %5, align 16
+  // LLVM-NEXT: %[[#B:]] = load i128, ptr %8, align 16
+  // LLVM-NEXT: %[[#SUM:]] = add nsw i128 %[[#A]], %[[#B]]
+  // LLVM-NEXT: store i128 %[[#SUM]], ptr %[[#RET_SLOT]], align 16
+
+  //      CHECK: %[[#SLOT_RET2:]] = cir.cast bitcast %[[#SLOT_RET]] : !cir.ptr<!s128i> -> !cir.ptr<![[I128_STRUCT]]>
+  // CHECK-NEXT: %[[#RET:]] = cir.load{{.*}} %[[#SLOT_RET2]] : !cir.ptr<![[I128_STRUCT]]>, ![[I128_STRUCT]]
+  // CHECK-NEXT: cir.return %[[#RET]] : ![[I128_STRUCT]]
+
+  //      LLVM: %[[#RET:]] = load { i64, i64 }, ptr %[[#RET_SLOT]], align 8
+  // LLVM-NEXT: ret { i64, i64 } %[[#RET]]
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/x86_64/varargs.c b/clang/test/CIR/Incubator/CallConvLowering/x86_64/varargs.c
new file mode 100644
index 0000000000000..97522aed8b55d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/x86_64/varargs.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+
+int printf(const char *str, ...);
+
+// CHECK: cir.func {{.*@bar}}
+// CHECK:   %[[#V1:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK:   %[[#V2:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CHECK:   cir.store %arg0, %[[#V0]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.store %arg1, %[[#V1]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   %[[#V2:]] = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 7>>
+// CHECK:   %[[#V3:]] = cir.cast array_to_ptrdecay %[[#V2]] : !cir.ptr<!cir.array<!s8i x 7>> -> !cir.ptr<!s8i>
+// CHECK:   %[[#V4:]] = cir.load %[[#V1]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[#V5:]] = cir.load %[[#V2]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[#V6:]] = cir.call @printf(%[[#V3]], %[[#V4]], %[[#V5]]) : (!cir.ptr<!s8i>, !s32i, !s32i) -> !s32i
+void bar(int a, int b) {
+  printf("%d %d\n", a, b);
+}
diff --git a/clang/test/CIR/Incubator/CallConvLowering/x86_64/void-ptr.c b/clang/test/CIR/Incubator/CallConvLowering/x86_64/void-ptr.c
new file mode 100644
index 0000000000000..b7470cc132931
--- /dev/null
+++ b/clang/test/CIR/Incubator/CallConvLowering/x86_64/void-ptr.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering \
+// RUN:   -emit-cir-flat -mmlir --mlir-print-ir-after=cir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-call-conv-lowering \
+// RUN:   -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+// CIR: cir.func {{.*}} @test_void_ptr(%arg0: !cir.ptr<!void> {{.*}}) -> !cir.ptr<!void>
+// LLVM: define {{.*}}ptr @test_void_ptr(ptr %{{.*}})
+// OGCG: define {{.*}}ptr @test_void_ptr(ptr {{.*}}%{{.*}})
+void *test_void_ptr(void *p) {
+  return p;
+}
+
+// CIR: cir.func {{.*}} @test_void_ptr_offset(%arg0: !cir.ptr<!void> {{.*}}, %arg1: !s64i {{.*}}) -> !cir.ptr<!void>
+// LLVM: define {{.*}}ptr @test_void_ptr_offset(ptr %{{.*}}, i64 %{{.*}})
+// OGCG: define {{.*}}ptr @test_void_ptr_offset(ptr {{.*}}%{{.*}}, i64 {{.*}}%{{.*}})
+void *test_void_ptr_offset(void *p, long offset) {
+  return (char*)p + offset;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/bf16-getset-intrinsics.c b/clang/test/CIR/Incubator/CodeGen/AArch64/bf16-getset-intrinsics.c
new file mode 100644
index 0000000000000..103a88dbcfc8a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/bf16-getset-intrinsics.c
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +bf16 \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +bf16 \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -fno-clangir-call-conv-lowering -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// This test mimics clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c, which eventually
+// CIR shall be able to support fully. Since this is going to take some time to converge,
+// the unsupported/NYI code is commented out, so that we can incrementally improve this.
+// The NYI filecheck used contains the LLVM output from OG codegen that should guide the
+// correct result when implementing this into the CIR pipeline.
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: @test_vcreate_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64 [[A:%.*]] to <4 x bfloat>
+// CHECK-NEXT:    ret <4 x bfloat> [[TMP0]]
+//
+// bfloat16x4_t test_vcreate_bf16(uint64_t a) {
+//   return vcreate_bf16(a);
+// }
+
+// CHECK-LABEL: @test_vdup_n_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x bfloat> poison, bfloat [[V:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
+// CHECK-NEXT:    ret <4 x bfloat> [[VECINIT3_I]]
+//
+// bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) {
+//   return vdup_n_bf16(v);
+// }
+
+// CHECK-LABEL: @test_vdupq_n_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x bfloat> poison, bfloat [[V:%.*]], i32 0
+// CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x bfloat> [[VECINIT3_I]], bfloat [[V]], i32 4
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x bfloat> [[VECINIT4_I]], bfloat [[V]], i32 5
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x bfloat> [[VECINIT5_I]], bfloat [[V]], i32 6
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x bfloat> [[VECINIT6_I]], bfloat [[V]], i32 7
+// CHECK-NEXT:    ret <8 x bfloat> [[VECINIT7_I]]
+//
+// bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) {
+//   return vdupq_n_bf16(v);
+// }
+
+// CHECK-LABEL: @test_vdup_lane_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT:    ret <4 x bfloat> [[LANE]]
+//
+// bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) {
+//   return vdup_lane_bf16(v, 1);
+// }
+
+// CHECK-LABEL: @test_vdupq_lane_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT:    ret <8 x bfloat> [[LANE]]
+//
+// bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) {
+//   return vdupq_lane_bf16(v, 1);
+// }
+
+// CHECK-LABEL: @test_vdup_laneq_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT:    ret <4 x bfloat> [[LANE]]
+//
+// bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) {
+//   return vdup_laneq_bf16(v, 7);
+// }
+
+// CHECK-LABEL: @test_vdupq_laneq_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT:    ret <8 x bfloat> [[LANE]]
+//
+// bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) {
+//   return vdupq_laneq_bf16(v, 7);
+// }
+
+// CHECK-LABEL: @test_vcombine_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> [[LOW:%.*]], <4 x bfloat> [[HIGH:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <8 x bfloat> [[SHUFFLE_I]]
+//
+// bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) {
+//   return vcombine_bf16(low, high);
+// }
+
+// CHECK-LABEL: @test_vget_high_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <4 x bfloat> [[SHUFFLE_I]]
+//
+// bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) {
+//   return vget_high_bf16(a);
+// }
+
+// CHECK-LABEL: @test_vget_low_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x bfloat> [[SHUFFLE_I]]
+//
+// bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) {
+//   return vget_low_bf16(a);
+// }
+
+bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) {
+ return vget_lane_bf16(v, 1);
+
+  // CIR-LABEL: vget_lane_bf16
+  // CIR: [[TMP0:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[TMP1:%.*]] = cir.vec.extract {{.*}}[{{.*}} : !s32i] : !cir.vector<!cir.bf16 x 4>
+
+  // LLVM-LABEL: test_vget_lane_bf16
+  // LLVM-SAME: (<4 x bfloat> [[VEC:%.*]])
+  // LLVM: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[VEC]], i32 1
+  // LLVM: ret bfloat [[VGET_LANE]]
+}
+
+bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) {
+  return vgetq_lane_bf16(v, 7);
+
+  // CIR-LABEL: vgetq_lane_bf16
+  // CIR: [[TMP0:%.*]] = cir.const #cir.int<7> : !s32i
+  // CIR: [[TMP1:%.*]] = cir.vec.extract {{.*}}[{{.*}} : !s32i] : !cir.vector<!cir.bf16 x 8>
+
+  // LLVM-LABEL: test_vgetq_lane_bf16
+  // LLVM-SAME: (<8 x bfloat> [[VEC:%.*]])
+  // LLVM: [[VGET_LANE:%.*]] = extractelement <8 x bfloat> [[VEC]], i32 7
+  // LLVM: ret bfloat [[VGET_LANE]]
+}
+
+// CHECK-LABEL: @test_vset_lane_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 1
+// CHECK-NEXT:    ret <4 x bfloat> [[VSET_LANE]]
+//
+// bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) {
+//   return vset_lane_bf16(a, v, 1);
+// }
+
+// CHECK-LABEL: @test_vsetq_lane_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 7
+// CHECK-NEXT:    ret <8 x bfloat> [[VSET_LANE]]
+//
+// bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) {
+//  return vsetq_lane_bf16(a, v, 7);
+// }
+
+bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) {
+ return vduph_lane_bf16(v, 1);
+
+  // CIR-LABEL: vduph_lane_bf16
+  // CIR: [[TMP0:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[TMP1:%.*]] = cir.vec.extract {{.*}}[{{.*}} : !s32i] : !cir.vector<!cir.bf16 x 4>
+
+  // LLVM-LABEL: test_vduph_lane_bf16
+  // LLVM-SAME: (<4 x bfloat> [[VEC:%.*]])
+  // LLVM: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[VEC]], i32 1
+  // LLVM: ret bfloat [[VGET_LANE]]
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/neon-arith.c b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-arith.c
new file mode 100644
index 0000000000000..db785c01c9631
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-arith.c
@@ -0,0 +1,1017 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -fno-clangir-call-conv-lowering \
+// RUN:   -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -fno-clangir-call-conv-lowering \
+// RUN:  -emit-llvm -o - %s \
+// RUN: | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+#include <arm_neon.h>
+
+// This test file contains tests for aarch64 NEON arithmetic intrinsics
+// that are not vector type related.
+
+float32_t test_vrndns_f32(float32_t a) {
+  return vrndns_f32(a);
+}
+// CIR: cir.func {{.*}} @vrndns_f32(%arg0: !cir.float {{.*}}) -> !cir.float
+// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.float, !cir.ptr<!cir.float>
+// CIR: [[INTRIN_ARG:%.*]] = cir.load{{.*}} [[ARG_SAVE]] : !cir.ptr<!cir.float>, !cir.float
+// CIR: {{%.*}} = cir.roundeven [[INTRIN_ARG]] : !cir.float
+// CIR: cir.return {{%.*}} : !cir.float
+
+// CIR-LABEL: test_vrndns_f32
+// CIR: cir.store %arg0, [[ARG_SAVE0:%.*]] : !cir.float, !cir.ptr<!cir.float>
+// CIR: [[FUNC_ARG:%.*]] = cir.load{{.*}} [[ARG_SAVE]] : !cir.ptr<!cir.float>, !cir.float
+// CIR: [[FUNC_RES:%.*]] = cir.call @vrndns_f32([[FUNC_ARG]]) : (!cir.float) -> !cir.float
+// CIR: cir.store{{.*}} [[FUNC_RES]], [[RET_P:%.*]] : !cir.float, !cir.ptr<!cir.float>
+// CIR: [[RET_VAL:%.*]] = cir.load{{.*}} [[RET_P]] : !cir.ptr<!cir.float>, !cir.float
+// CIR: cir.return [[RET_VAL]] : !cir.float loc
+
+// LLVM: {{.*}}test_vrndns_f32(float{{.*}}[[ARG:%.*]])
+// LLVM: [[INTRIN_RES:%.*]] = call float @llvm.roundeven.f32(float [[ARG]])
+// LLVM: ret float [[INTRIN_RES]]
+
+float32x2_t test_vrnda_f32(float32x2_t a) {
+  return vrnda_f32(a);
+}
+
+// CIR: cir.func {{.*}} @vrnda_f32(%arg0: !cir.vector<!cir.float x 2>
+// CIR: {{%.*}} = cir.round {{.*}} : !cir.vector<!cir.float x 2>
+// CIR: cir.return {{%.*}} : !cir.vector<!cir.float x 2>
+
+// CIR-LABEL: test_vrnda_f32
+// CIR: cir.store %arg0, [[ARG_SAVE0:%.*]] :  !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
+// CIR: [[FUNC_ARG:%.*]] = cir.load{{.*}} [[ARG_SAVE]] : !cir.ptr<!cir.vector<!cir.float x 2>>, !cir.vector<!cir.float x 2>
+// CIR: [[FUNC_RES:%.*]] = cir.call @vrnda_f32([[FUNC_ARG]]) : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+// CIR: cir.store{{.*}} [[FUNC_RES]], [[RET_P:%.*]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
+// CIR: [[RET_VAL:%.*]] = cir.load{{.*}} [[RET_P]] : !cir.ptr<!cir.vector<!cir.float x 2>>, !cir.vector<!cir.float x 2>
+// CIR: cir.return [[RET_VAL]] : !cir.vector<!cir.float x 2>
+
+// LLVM: {{.*}}test_vrnda_f32(<2 x float>{{.*}}[[ARG:%.*]])
+// LLVM: [[INTRIN_RES:%.*]] = call <2 x float> @llvm.round.v2f32(<2 x float> [[ARG]])
+// LLVM: ret <2 x float> [[INTRIN_RES]]
+
+float32x4_t test_vrndaq_f32(float32x4_t a) {
+  return vrndaq_f32(a);
+}
+
+// CIR: cir.func {{.*}} @vrndaq_f32(%arg0: !cir.vector<!cir.float x 4>
+// CIR: cir.store %arg0, [[ARG_SAVE:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+// CIR: {{%.*}} = cir.round {{.*}} : !cir.vector<!cir.float x 4>
+// CIR: cir.return {{%.*}} : !cir.vector<!cir.float x 4>
+
+// LLVM: {{.*}}test_vrndaq_f32(<4 x float>{{.*}}[[ARG:%.*]])
+// LLVM: [[INTRIN_RES:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[ARG]])
+// LLVM: ret <4 x float> [[INTRIN_RES]]
+
+int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
+  return vpadd_s8(a, b);
+}
+
+// CIR-LABEL: vpadd_s8
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.addp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+// LLVM: {{.*}}test_vpadd_s8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// LLVM: ret <8 x i8> [[RES]]
+
+
+int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
+  return vpaddq_s8(a, b);
+}
+
+// CIR-LABEL: vpaddq_s8
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.addp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vpaddq_s8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// LLVM: ret <16 x i8> [[RES]]
+
+uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
+  return vpadd_u8(a, b);
+}
+
+// CIR-LABEL: vpadd_u8
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.addp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+// LLVM: {{.*}}test_vpadd_u8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// LLVM: ret <8 x i8> [[RES]]
+
+int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
+  return vpadd_s16(a, b);
+}
+
+// CIR-LABEL: vpadd_s16
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.addp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+// CIR: {{%.*}} = cir.cast bitcast [[RES]] : !cir.vector<!s16i x 4> -> !cir.vector<!s8i x 8>
+
+// LLVM: {{.*}}test_vpadd_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[A]], <4 x i16> [[B]])
+// LLVM: ret <4 x i16> [[RES]]
+
+int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
+  return vpaddq_s16(a, b);
+}
+
+// CIR-LABEL: vpaddq_s16
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.addp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+// CIR: {{%.*}} = cir.cast bitcast [[RES]] : !cir.vector<!s16i x 8> -> !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vpaddq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[A]], <8 x i16> [[B]])
+// LLVM: ret <8 x i16> [[RES]]
+
+uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
+  return vpadd_u16(a, b);
+}
+
+// CIR-LABEL: vpadd_u16
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.addp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+// CIR: {{%.*}} = cir.cast bitcast [[RES]] : !cir.vector<!u16i x 4> -> !cir.vector<!s8i x 8>
+
+// LLVM: {{.*}}test_vpadd_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[A]], <4 x i16> [[B]])
+// LLVM: ret <4 x i16> [[RES]]
+
+int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
+  return vpadd_s32(a, b);
+}
+
+// CIR-LABEL: vpadd_s32
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.addp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+// CIR: {{%.*}} = cir.cast bitcast [[RES]] : !cir.vector<!s32i x 2> -> !cir.vector<!s8i x 8>
+
+// LLVM: {{.*}}test_vpadd_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[A]], <2 x i32> [[B]])
+// LLVM: ret <2 x i32> [[RES]]
+
+int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
+  return vpaddq_s32(a, b);
+}
+
+// CIR-LABEL: vpaddq_s32
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.addp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+// CIR: {{%.*}} = cir.cast bitcast [[RES]] : !cir.vector<!s32i x 4> -> !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vpaddq_s32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
+// LLVM: ret <4 x i32> [[RES]]
+
+float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
+  return vpadd_f32(a, b);
+}
+
+// CIR-LABEL: vpadd_f32
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.faddp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!cir.float x 2>, !cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+// CIR: {{%.*}} = cir.cast bitcast [[RES]] : !cir.vector<!cir.float x 2> -> !cir.vector<!s8i x 8>
+
+// LLVM: {{.*}}test_vpadd_f32(<2 x float>{{.*}}[[A:%.*]], <2 x float>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> [[A]], <2 x float> [[B]])
+// LLVM: ret <2 x float> [[RES]]
+
+float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
+  return vpaddq_f32(a, b);
+}
+
+// CIR-LABEL: vpaddq_f32
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.faddp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!cir.float x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+// CIR: {{%.*}} = cir.cast bitcast [[RES]] : !cir.vector<!cir.float x 4> -> !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vpaddq_f32(<4 x float>{{.*}}[[A:%.*]], <4 x float>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> [[A]], <4 x float> [[B]])
+// LLVM: ret <4 x float> [[RES]]
+
+float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
+  return vpaddq_f64(a, b);
+}
+
+// CIR-LABEL: vpaddq_f64
+// CIR: [[RES:%.*]] = cir.llvm.intrinsic "aarch64.neon.faddp" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+// CIR: {{%.*}} = cir.cast bitcast [[RES]] : !cir.vector<!cir.double x 2> -> !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vpaddq_f64(<2 x double>{{.*}}[[A:%.*]], <2 x double>{{.*}}[[B:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> [[A]], <2 x double> [[B]])
+// LLVM: ret <2 x double> [[RES]]
+
+int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) {
+  return vqdmulh_lane_s16(a, v, 3);
+}
+
+// CIR-LABEL: vqdmulh_lane_s16
+// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
+// CIR: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>, !s32i) -> !cir.vector<!s16i x 4>
+
+// LLVM: {{.*}}test_vqdmulh_lane_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[V:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16
+// LLVM-SAME: (<4 x i16> [[A]], <4 x i16> [[V]], i32 3)
+// LLVM:  ret <4 x i16> [[RES]]
+
+
+int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) {
+  return vqdmulh_lane_s32(a, v, 1);
+}
+
+// CIR-LABEL: vqdmulh_lane_s32
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
+// CIR: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>, !s32i) -> !cir.vector<!s32i x 2>
+
+// LLVM: {{.*}}test_vqdmulh_lane_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[V:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32
+// LLVM-SAME: (<2 x i32> [[A]], <2 x i32> [[V]], i32 1)
+// LLVM:  ret <2 x i32> [[RES]]
+
+int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
+  return vqdmulhq_lane_s16(a, v, 3);
+}
+
+// CIR-LABEL: vqdmulhq_lane_s16
+// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
+// CIR: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 4>, !s32i) -> !cir.vector<!s16i x 8>
+
+// LLVM: {{.*}}test_vqdmulhq_lane_s16(<8 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[V:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16
+// LLVM-SAME: (<8 x i16> [[A]], <4 x i16> [[V]], i32 3)
+// LLVM:  ret <8 x i16> [[RES]]
+
+int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) {
+  return vqdmulhq_lane_s32(a, v, 1);
+}
+
+// CIR-LABEL: vqdmulhq_lane_s32
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
+// CIR: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 2>, !s32i) -> !cir.vector<!s32i x 4>
+
+// LLVM: {{.*}}test_vqdmulhq_lane_s32(<4 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[V:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32
+// LLVM-SAME: (<4 x i32> [[A]], <2 x i32> [[V]], i32 1)
+// LLVM:  ret <4 x i32> [[RES]]
+
+int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) {
+  return vqrdmulh_lane_s16(a, v, 3);
+}
+
+// CIR-LABEL: vqrdmulh_lane_s16
+// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
+// CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>, !s32i) -> !cir.vector<!s16i x 4>
+
+// LLVM: {{.*}}test_vqrdmulh_lane_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[V:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16
+// LLVM-SAME: (<4 x i16> [[A]], <4 x i16> [[V]], i32 3)
+// LLVM:  ret <4 x i16> [[RES]]
+
+int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) {
+  return vqrdmulhq_lane_s16(a, v, 3);
+}
+
+// CIR-LABEL: vqrdmulhq_lane_s16
+// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
+// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 4>, !s32i) -> !cir.vector<!s16i x 8>
+
+// LLVM: {{.*}}test_vqrdmulhq_lane_s16(<8 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[V:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16
+// LLVM-SAME: (<8 x i16> [[A]], <4 x i16> [[V]], i32 3)
+// LLVM:  ret <8 x i16> [[RES]]
+
+int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) {
+  return vqrdmulh_lane_s32(a, v, 1);
+}
+
+// CIR-LABEL: vqrdmulh_lane_s32
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
+// CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>, !s32i) -> !cir.vector<!s32i x 2>
+
+// LLVM: {{.*}}test_vqrdmulh_lane_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[V:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32
+// LLVM-SAME: (<2 x i32> [[A]], <2 x i32> [[V]], i32 1)
+// LLVM:  ret <2 x i32> [[RES]]
+
+int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) {
+  return vqrdmulhq_lane_s32(a, v, 1);
+}
+
+// CIR-LABEL: vqrdmulhq_lane_s32
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrdmulh.lane" {{%.*}}, {{%.*}}, [[LANE]] :
+// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 2>, !s32i) -> !cir.vector<!s32i x 4>
+
+// LLVM: {{.*}}test_vqrdmulhq_lane_s32(<4 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[V:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32
+// LLVM-SAME: (<4 x i32> [[A]], <2 x i32> [[V]], i32 1)
+// LLVM:  ret <4 x i32> [[RES]]
+
+int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
+  return vqaddq_s8(a, b);
+}
+
+// CIR-LABEL: vqaddq_s8
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vqaddq_s8(<16 x i8>{{.*}} [[A:%.*]], <16 x i8>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// LLVM: ret <16 x i8> [[RES]]
+
+uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
+  return vqaddq_u8(a, b);
+}
+
+// CIR-LABEL: vqaddq_u8
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqadd" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+// LLVM: {{.*}}test_vqaddq_u8(<16 x i8>{{.*}} [[A:%.*]], <16 x i8>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// LLVM: ret <16 x i8> [[RES]]
+
+int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
+  return vqaddq_s16(a, b);
+}
+
+// CIR-LABEL: vqaddq_s16
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+// LLVM: {{.*}}test_vqaddq_s16(<8 x i16>{{.*}} [[A:%.*]], <8 x i16>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> [[A]], <8 x i16> [[B]])
+// LLVM: ret <8 x i16> [[RES]]
+
+uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
+  return vqaddq_u16(a, b);
+}
+
+// CIR-LABEL: vqaddq_u16
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqadd" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+// LLVM: {{.*}}test_vqaddq_u16(<8 x i16>{{.*}} [[A:%.*]], <8 x i16>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> [[A]], <8 x i16> [[B]])
+// LLVM: ret <8 x i16> [[RES]]
+
+int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
+  return vqaddq_s32(a, b);
+}
+
+// CIR-LABEL: vqaddq_s32
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+// LLVM: {{.*}}test_vqaddq_s32(<4 x i32>{{.*}} [[A:%.*]], <4 x i32>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
+// LLVM: ret <4 x i32> [[RES]]
+
+int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
+  return vqaddq_s64(a, b);
+}
+
+// CIR-LABEL: vqaddq_s64
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+// LLVM: {{.*}}test_vqaddq_s64(<2 x i64>{{.*}} [[A:%.*]], <2 x i64>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+// LLVM: ret <2 x i64> [[RES]]
+
+uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
+  return vqaddq_u64(a, b);
+}
+
+// CIR-LABEL: vqaddq_u64
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqadd" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
+
+// LLVM: {{.*}}test_vqaddq_u64(<2 x i64>{{.*}} [[A:%.*]], <2 x i64>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+// LLVM: ret <2 x i64> [[RES]]
+
+int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
+  return vqsub_s8(a, b);
+}
+
+// CIR-LABEL: vqsub_s8
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+// LLVM: {{.*}}test_vqsub_s8(<8 x i8>{{.*}} [[A:%.*]], <8 x i8>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// LLVM: ret <8 x i8> [[RES]]
+
+uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
+  return vqsub_u8(a, b);
+}
+
+// CIR-LABEL: vqsub_u8
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+// LLVM: {{.*}}test_vqsub_u8(<8 x i8>{{.*}} [[A:%.*]], <8 x i8>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
+// LLVM: ret <8 x i8> [[RES]]
+
+int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
+  return vqsub_s16(a, b);
+}
+
+// CIR-LABEL: vqsub_s16
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+// LLVM: {{.*}}test_vqsub_s16(<4 x i16>{{.*}} [[A:%.*]], <4 x i16>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[A]], <4 x i16> [[B]])
+// LLVM: ret <4 x i16> [[RES]]
+
+uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
+  return vqsub_u16(a, b);
+}
+
+// CIR-LABEL: vqsub_u16
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+// LLVM: {{.*}}test_vqsub_u16(<4 x i16>{{.*}} [[A:%.*]], <4 x i16>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[A]], <4 x i16> [[B]])
+// LLVM: ret <4 x i16> [[RES]]
+
+int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
+  return vqsub_s32(a, b);
+}
+
+// CIR-LABEL: vqsub_s32
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+// LLVM: {{.*}}test_vqsub_s32(<2 x i32>{{.*}} [[A:%.*]], <2 x i32>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> [[A]], <2 x i32> [[B]])
+// LLVM: ret <2 x i32> [[RES]]
+
+uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
+  return vqsub_u32(a, b);
+}
+
+// CIR-LABEL: vqsub_u32
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+// LLVM: {{.*}}test_vqsub_u32(<2 x i32>{{.*}} [[A:%.*]], <2 x i32>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> [[A]], <2 x i32> [[B]])
+// LLVM: ret <2 x i32> [[RES]]
+
+int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
+  return vqsub_s64(a, b);
+}
+
+// CIR-LABEL: vqsub_s64
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
+
+// LLVM: {{.*}}test_vqsub_s64(<1 x i64>{{.*}} [[A:%.*]], <1 x i64>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> [[A]], <1 x i64> [[B]])
+// LLVM: ret <1 x i64> [[RES]]
+
+uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
+  return vqsub_u64(a, b);
+}
+
+// CIR-LABEL: vqsub_u64
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>) -> !cir.vector<!u64i x 1>
+
+// LLVM: {{.*}}test_vqsub_u64(<1 x i64>{{.*}} [[A:%.*]], <1 x i64>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> [[A]], <1 x i64> [[B]])
+// LLVM: ret <1 x i64> [[RES]]
+
+int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
+  return vqsubq_s8(a, b);
+}
+
+// CIR-LABEL: vqsubq_s8
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vqsubq_s8(<16 x i8>{{.*}} [[A:%.*]], <16 x i8>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// LLVM: ret <16 x i8> [[RES]]
+
+uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
+  return vqsubq_u8(a, b);
+}
+
+// CIR-LABEL: vqsubq_u8
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+// LLVM: {{.*}}test_vqsubq_u8(<16 x i8>{{.*}} [[A:%.*]], <16 x i8>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// LLVM: ret <16 x i8> [[RES]]
+
+int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
+  return vqsubq_s16(a, b);
+}
+
+// CIR-LABEL: vqsubq_s16
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+// LLVM: {{.*}}test_vqsubq_s16(<8 x i16>{{.*}} [[A:%.*]], <8 x i16>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> [[A]], <8 x i16> [[B]])
+// LLVM: ret <8 x i16> [[RES]]
+
+uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
+  return vqsubq_u16(a, b);
+}
+
+// CIR-LABEL: vqsubq_u16
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+// LLVM: {{.*}}test_vqsubq_u16(<8 x i16>{{.*}} [[A:%.*]], <8 x i16>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> [[A]], <8 x i16> [[B]])
+// LLVM: ret <8 x i16> [[RES]]
+
+int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
+  return vqsubq_s32(a, b);
+}
+
+// CIR-LABEL: vqsubq_s32
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+// LLVM: {{.*}}test_vqsubq_s32(<4 x i32>{{.*}} [[A:%.*]], <4 x i32>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
+// LLVM: ret <4 x i32> [[RES]]
+
+uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
+  return vqsubq_u32(a, b);
+}
+
+// CIR-LABEL: vqsubq_u32
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+// LLVM: {{.*}}test_vqsubq_u32(<4 x i32>{{.*}} [[A:%.*]], <4 x i32>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[B]])
+// LLVM: ret <4 x i32> [[RES]]
+
+int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
+  return vqsubq_s64(a, b);
+}
+
+// CIR-LABEL: vqsubq_s64
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+// LLVM: {{.*}}test_vqsubq_s64(<2 x i64>{{.*}} [[A:%.*]], <2 x i64>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+// LLVM: ret <2 x i64> [[RES]]
+
+uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
+  return vqsubq_u64(a, b);
+}
+
+// CIR-LABEL: vqsubq_u64
+// CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqsub" {{%.*}}, {{%.*}} :
+// CIR-SAME: (!cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
+
+// LLVM: {{.*}}test_vqsubq_u64(<2 x i64>{{.*}} [[A:%.*]], <2 x i64>{{.*}} [[B:%.*]])
+// LLVM: [[RES:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+// LLVM: ret <2 x i64> [[RES]]
+
+int16x4_t test_vpaddl_s8(int8x8_t a) {
+  return vpaddl_s8(a);
+
+  // CIR-LABEL: vpaddl_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : (!cir.vector<!s8i x 8>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vpaddl_s8(<8 x i8>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[A]])
+  // LLVM:   ret <4 x i16> [[VPADDL1_I]]
+}
+
+int32x2_t test_vpaddl_s16(int16x4_t a) {
+  return vpaddl_s16(a);
+
+  // CIR-LABEL: vpaddl_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : (!cir.vector<!s16i x 4>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vpaddl_s16(<4 x i16>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[A]])
+  // LLVM:   ret <2 x i32> [[VPADDL1_I]]
+}
+
+int64x1_t test_vpaddl_s32(int32x2_t a) {
+  return vpaddl_s32(a);
+
+  // CIR-LABEL: vpaddl_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : (!cir.vector<!s32i x 2>) -> !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}test_vpaddl_s32(<2 x i32>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[A]])
+  // LLVM:   ret <1 x i64> [[VPADDL1_I]]
+}
+
+uint16x4_t test_vpaddl_u8(uint8x8_t a) {
+  return vpaddl_u8(a);
+
+  // CIR-LABEL: vpaddl_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : (!cir.vector<!u8i x 8>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vpaddl_u8(<8 x i8>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[A]])
+  // LLVM:   ret <4 x i16> [[VPADDL1_I]]
+}
+
+uint32x2_t test_vpaddl_u16(uint16x4_t a) {
+  return vpaddl_u16(a);
+
+  // CIR-LABEL: vpaddl_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : (!cir.vector<!u16i x 4>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vpaddl_u16(<4 x i16>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[A]])
+  // LLVM:   ret <2 x i32> [[VPADDL1_I]]
+}
+
+uint64x1_t test_vpaddl_u32(uint32x2_t a) {
+  return vpaddl_u32(a);
+
+  // CIR-LABEL: vpaddl_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : (!cir.vector<!u32i x 2>) -> !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}test_vpaddl_u32(<2 x i32>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[A]])
+  // LLVM:   ret <1 x i64> [[VPADDL1_I]]
+}
+
+int16x8_t test_vpaddlq_s8(int8x16_t a) {
+  return vpaddlq_s8(a);
+
+  // CIR-LABEL: vpaddlq_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : (!cir.vector<!s8i x 16>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vpaddlq_s8(<16 x i8>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[A]])
+  // LLVM:   ret <8 x i16> [[VPADDL1_I]]
+}
+
+int32x4_t test_vpaddlq_s16(int16x8_t a) {
+  return vpaddlq_s16(a);
+
+  // CIR-LABEL: vpaddlq_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : (!cir.vector<!s16i x 8>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vpaddlq_s16(<8 x i16>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[A]])
+  // LLVM:   ret <4 x i32> [[VPADDL1_I]]
+}
+
+int64x2_t test_vpaddlq_s32(int32x4_t a) {
+  return vpaddlq_s32(a);
+
+  // CIR-LABEL: vpaddlq_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : (!cir.vector<!s32i x 4>) -> !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vpaddlq_s32(<4 x i32>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[A]])
+  // LLVM:   ret <2 x i64> [[VPADDL1_I]]
+}
+
+uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
+  return vpaddlq_u8(a);
+
+  // CIR-LABEL: vpaddlq_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : (!cir.vector<!u8i x 16>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vpaddlq_u8(<16 x i8>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[A]])
+  // LLVM:   ret <8 x i16> [[VPADDL1_I]]
+}
+
+uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
+  return vpaddlq_u16(a);
+
+  // CIR-LABEL: vpaddlq_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : (!cir.vector<!u16i x 8>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vpaddlq_u16(<8 x i16>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[A]])
+  // LLVM:   ret <4 x i32> [[VPADDL1_I]]
+}
+
+uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
+  return vpaddlq_u32(a);
+
+  // CIR-LABEL: vpaddlq_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : (!cir.vector<!u32i x 4>) -> !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vpaddlq_u32(<4 x i32>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[A]])
+  // LLVM:   ret <2 x i64> [[VPADDL1_I]]
+}
+
+int8x8_t test_vabs_s8(int8x8_t a) {
+  return vabs_s8(a);
+
+  // CIR-LABEL: vabs_s8
+  // CIR: cir.abs {{%.*}} : !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}test_vabs_s8(<8 x i8>{{.*}}[[a:%.*]])
+  // LLVM: [[VABS_I:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[a]], i1 false)
+  // LLVM: ret <8 x i8> [[VABS_I]]
+}
+
+int8x16_t test_vabsq_s8(int8x16_t a) {
+  return vabsq_s8(a);
+
+  // CIR-LABEL: vabsq_s8
+  // CIR: cir.abs {{%.*}} : !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}test_vabsq_s8(<16 x i8>{{.*}}[[a:%.*]])
+  // LLVM: [[VABS_I:%.*]] = call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[a]], i1 false)
+  // LLVM: ret <16 x i8> [[VABS_I]]
+}
+
+int16x4_t test_vabs_s16(int16x4_t a) {
+  return vabs_s16(a);
+
+  // CIR-LABEL: vabs_s16
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s16i x 4>
+  // CIR: cir.abs [[TMP0]] : !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vabs_s16(<4 x i16>{{.*}}[[a:%.*]])
+  // LLVM:   [[VABS1_I:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[a]], i1 false)
+  // LLVM:   ret <4 x i16> [[VABS1_I]]
+}
+
+int16x8_t test_vabsq_s16(int16x8_t a) {
+  return vabsq_s16(a);
+
+  // CIR-LABEL: vabsq_s16
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: cir.abs [[TMP0]] : !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vabsq_s16(<8 x i16>{{.*}}[[a:%.*]])
+  // LLVM:   [[VABS1_I:%.*]] = call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[a]], i1 false)
+  // LLVM:   ret <8 x i16> [[VABS1_I]]
+}
+
+int32x2_t test_vabs_s32(int32x2_t a) {
+  return vabs_s32(a);
+
+  // CIR-LABEL: vabs_s32
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s32i x 2>
+  // CIR: cir.abs [[TMP0]] : !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vabs_s32(<2 x i32>{{.*}}[[a:%.*]])
+  // LLVM:   [[VABS1_I:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[a]], i1 false)
+  // LLVM:   ret <2 x i32> [[VABS1_I]]
+}
+
+int32x4_t test_vabsq_s32(int32x4_t a) {
+  return vabsq_s32(a);
+
+  // CIR-LABEL: vabsq_s32
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: cir.abs [[TMP0]] : !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vabsq_s32(<4 x i32>{{.*}}[[a:%.*]])
+  // LLVM:   [[VABS1_I:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[a]], i1 false)
+  // LLVM:   ret <4 x i32> [[VABS1_I]]
+}
+
+int64x1_t test_vabs_s64(int64x1_t a) {
+  return vabs_s64(a);
+
+  // CIR-LABEL: vabs_s64
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s64i x 1>
+  // CIR: cir.abs [[TMP0]] : !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}test_vabs_s64(<1 x i64>{{.*}}[[a:%.*]])
+  // LLVM:   [[VABS1_I:%.*]] = call <1 x i64> @llvm.abs.v1i64(<1 x i64> [[a]], i1 false)
+  // LLVM:   ret <1 x i64> [[VABS1_I]]
+}
+
+int64x2_t test_vabsq_s64(int64x2_t a) {
+  return vabsq_s64(a);
+
+  // CIR-LABEL: vabsq_s64
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: cir.abs [[TMP0]] : !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vabsq_s64(<2 x i64>{{.*}}[[a:%.*]])
+  // LLVM:   [[VABS1_I:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[a]], i1 false)
+  // LLVM:   ret <2 x i64> [[VABS1_I]]
+}
+
+
+float32x2_t test_vabs_f32(float32x2_t a) {
+  return vabs_f32(a);
+
+  // CIR-LABEL: vabs_f32
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!cir.float x 2>
+  // CIR: cir.fabs [[TMP0]] : !cir.vector<!cir.float x 2>
+
+  // LLVM: {{.*}}test_vabs_f32(<2 x float>{{.*}}[[a:%.*]])
+  // LLVM: [[VABS_F:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[a]])
+  // LLVM: ret <2 x float> [[VABS_F]]
+}
+
+float32x4_t test_vabsq_f32(float32x4_t a) {
+  return vabsq_f32(a);
+
+  // CIR-LABEL: vabsq_f32
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!cir.float x 4>
+  // CIR: cir.fabs [[TMP0]] : !cir.vector<!cir.float x 4>
+
+  // LLVM: {{.*}}test_vabsq_f32(<4 x float>{{.*}}[[a:%.*]])
+  // LLVM: [[VABS_F:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[a]])
+  // LLVM: ret <4 x float> [[VABS_F]]
+}
+
+float64x1_t test_vabs_f64(float64x1_t a) {
+  return vabs_f64(a);
+
+  // CIR-LABEL: vabs_f64
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!cir.double x 1>
+  // CIR: cir.fabs [[TMP0]] : !cir.vector<!cir.double x 1>
+
+  // LLVM: {{.*}}test_vabs_f64(<1 x double>{{.*}}[[a:%.*]])
+  // LLVM: [[VABS_F:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> [[a]])
+  // LLVM: ret <1 x double> [[VABS_F]]
+}
+
+float64x2_t test_vabsq_f64(float64x2_t a) {
+  return vabsq_f64(a);
+
+  // CIR-LABEL: vabsq_f64
+  // CIR: [[TMP0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!cir.double x 2>
+  // CIR: cir.fabs [[TMP0]] : !cir.vector<!cir.double x 2>
+
+  // LLVM: {{.*}}test_vabsq_f64(<2 x double>{{.*}}[[a:%.*]])
+  // LLVM: [[VABS_F:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[a]])
+  // LLVM: ret <2 x double> [[VABS_F]]
+}
+
+uint32_t test_vaddlvq_u16(uint16x8_t a) {
+  return vaddlvq_u16(a);
+
+  // CIR-LABEL: vaddlvq_u16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uaddlv" {{%.*}}: (!cir.vector<!u16i x 8>) -> !u32i
+
+  // LLVM: {{.*}}test_vaddlvq_u16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> [[A]])
+  // LLVM: ret i32 [[VADDLV_I]]
+}
+
+int32_t test_vaddlvq_s16(int16x8_t a) {
+  return vaddlvq_s16(a);
+
+  // CIR-LABEL: vaddlvq_s16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.saddlv" {{%.*}}: (!cir.vector<!s16i x 8>) -> !s32i
+
+  // LLVM: {{.*}}test_vaddlvq_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> [[A]])
+  // LLVM: ret i32 [[VADDLV_I]]
+}
+
+int16_t test_vaddlv_s8(int8x8_t a) {
+  return vaddlv_s8(a);
+
+  // CIR-LABEL: vaddlv_s8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.saddlv" {{%.*}}: (!cir.vector<!s8i x 8>) -> !s32i
+
+  // LLVM: {{.*}}test_vaddlv_s8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> [[A]])
+  // LLVM-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
+  // LLVM-NEXT: ret i16 [[TMP0]]
+}
+
+uint16_t test_vaddlv_u8(uint8x8_t a) {
+  return vaddlv_u8(a);
+
+  // CIR-LABEL: vaddlv_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uaddlv" {{%.*}}: (!cir.vector<!u8i x 8>) -> !u32i
+
+  // LLVM: {{.*}}test_vaddlv_u8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> [[A]])
+  // LLVM-NEXT: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
+  // LLVM-NEXT: ret i16 [[TMP0]]
+}
+
+int32_t test_vaddlv_s16(int16x4_t a) {
+  return vaddlv_s16(a);
+
+  // CIR-LABEL: vaddlv_s16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.saddlv" {{%.*}}: (!cir.vector<!s16i x 4>) -> !s32i
+
+  // LLVM: {{.*}}test_vaddlv_s16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> [[A]])
+  // LLVM: ret i32 [[VADDLV_I]]
+}
+
+int32_t test_vaddlv_u16(uint16x4_t a) {
+  return vaddlv_u16(a);
+
+  // CIR-LABEL: vaddlv_u16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uaddlv" {{%.*}}: (!cir.vector<!u16i x 4>) -> !u32i
+
+  // LLVM: {{.*}}test_vaddlv_u16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> [[A]])
+  // LLVM: ret i32 [[VADDLV_I]]
+}
+
+uint16_t test_vaddv_u16(uint16x4_t a) {
+  return vaddv_u16(a);
+
+  // CIR-LABEL: vaddv_u16
+  // CIR: [[VADDV_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddv" {{%.*}} : (!cir.vector<!u16i x 4>) -> !s32i
+  // CIR: cir.cast integral [[VADDV_I]] : !s32i -> !u16i
+
+  // LLVM: {{.*}}test_vaddv_u16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> [[A]])
+  // LLVM-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
+  // LLVM-NEXT:    ret i16 [[TMP0]]
+}
+
+int16_t test_vaddv_s16(int16x4_t a) {
+  return vaddv_s16(a);
+
+  // CIR-LABEL: vaddv_s16
+  // CIR: [[VADDV_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddv" {{%.*}} : (!cir.vector<!s16i x 4>) -> !s32i
+  // CIR: cir.cast integral [[VADDV_I]] : !s32i -> !s16i
+
+  // LLVM: {{.*}}test_vaddv_s16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM:    [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> [[A]])
+  // LLVM-NEXT:    [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i16
+  // LLVM-NEXT:    ret i16 [[TMP0]]
+}
+
+uint32_t test_vaddvq_u32(uint32x4_t a) {
+  return vaddvq_u32(a);
+
+  // CIR-LABEL: vaddvq_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uaddv" {{%.*}} : (!cir.vector<!u32i x 4>) -> !u32i
+
+  // LLVM-LABEL: test_vaddvq_u32
+  // LLVM:   [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> {{%.*}})
+  // LLVM:   ret i32 [[VADDVQ_U32_I]]
+}
+
+uint64_t test_vaddvq_u64(uint64x2_t a) {
+  return vaddvq_u64(a);
+
+  // CIR-LABEL: vaddvq_u64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uaddv" {{%.*}} : (!cir.vector<!u64i x 2>) -> !u64i
+
+  // LLVM-LABEL: test_vaddvq_u64
+  // LLVM:   [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> {{%.*}})
+  // LLVM:   ret i64 [[VADDVQ_U64_I]]
+}
+
+int32_t test_vaddvq_s32(int32x4_t a) {
+  return vaddvq_s32(a);
+
+  // CIR-LABEL: vaddvq_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.saddv" {{%.*}} : (!cir.vector<!s32i x 4>) -> !s32i
+
+  // LLVM-LABEL: test_vaddvq_s32
+  // LLVM-SAME: (<4 x i32> [[a:%.*]])
+  // LLVM:   [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> [[a]])
+  // LLVM:   ret i32 [[VADDVQ_S32_I]]
+}
+
+int64_t test_vaddvq_s64(int64x2_t a) {
+  return vaddvq_s64(a);
+
+  // CIR-LABEL: vaddvq_s64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.saddv" {{%.*}} : (!cir.vector<!s64i x 2>) -> !s64i
+
+  // LLVM-LABEL: test_vaddvq_s64
+  // LLVM-SAME: (<2 x i64> [[a:%.*]])
+  // LLVM:   [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> [[a]])
+  // LLVM:   ret i64 [[VADDVQ_S64_I]]
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/neon-crypto.c b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-crypto.c
new file mode 100644
index 0000000000000..8f83d0cc47392
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-crypto.c
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN:   -target-feature +sha2 -target-feature +aes \
+// RUN:   -disable-O0-optnone -fclangir -emit-cir -o %t.cir %s 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN:  -fclangir -target-feature +sha2 -target-feature +aes \
+// RUN:   -disable-O0-optnone  -emit-llvm -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+#include <arm_neon.h>
+
+uint8x16_t test_vaesmcq_u8(uint8x16_t data) {
+  return vaesmcq_u8(data);
+
+  // CIR-LABEL: vaesmcq_u8
+  // {{%.*}} = cir.llvm.intrinsic "aarch64.crypto.aesmc" {{%.*}} : (!cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}vaesmcq_u8(<16 x i8>{{.*}}[[DATA:%.*]])
+  // LLVM: [[RES:%.*]] = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> [[DATA]])
+  // LLVM: ret <16 x i8> [[RES]]
+}
+
+uint8x16_t test_vaeseq_u8(uint8x16_t data, uint8x16_t key) {
+  return vaeseq_u8(data, key);
+
+  // CIR-LABEL: vaeseq_u8
+  // {{%.*}} = cir.llvm.intrinsic "aarch64.crypto.aese" {{%.*}} : (!cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}vaeseq_u8(<16 x i8>{{.*}}[[DATA:%.*]], <16 x i8>{{.*}}[[KEY:%.*]])
+  // LLVM: [[RES:%.*]] = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> [[DATA]], <16 x i8> [[KEY]])
+  // LLVM: ret <16 x i8> [[RES]]
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/neon-ext-mov.c b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-ext-mov.c
new file mode 100644
index 0000000000000..525b0d46defac
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-ext-mov.c
@@ -0,0 +1,215 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone -fno-clangir-call-conv-lowering \
+// RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
+// RUN: | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// This test file contains test cases for the intrinsics that move data between
+// registers and vectors, such as mov, get, set, and ext. We dedicate this file 
+// to them becuase they are many. The file neon.c covers some such intrinsics 
+// that are not in this file.  
+
+#include <arm_neon.h>
+
+int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
+  return vext_s8(a, b, 2);
+
+  // CIR-LABEL: vext_s8
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s8i x 8>) 
+  // CIR-SAME: [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, 
+  // CIR-SAME:  #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, 
+  // CIR-SAME:  #cir.int<8> : !s32i, #cir.int<9> : !s32i] : !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}test_vext_s8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], 
+  // LLVM-SAME: <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  // LLVM: ret <8 x i8> [[RES]]
+}
+
+int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
+  return vextq_s8(a, b, 2);
+
+  // CIR-LABEL: vextq_s8
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s8i x 16>) 
+  // CIR-SAME: [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, 
+  // CIR-SAME:  #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, 
+  // CIR-SAME:  #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, 
+  // CIR-SAME:  #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i,
+  // CIR-SAME:  #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i,  
+  // CIR-SAME:  #cir.int<17> : !s32i] : !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}test_vextq_s8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], 
+  // LLVM-SAME: <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9
+  // LLVM-SAME: i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  // LLVM: ret <16 x i8> [[RES]]
+}
+
+int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
+  return vext_s16(a, b, 3);
+
+  // CIR-LABEL: vext_s16
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s16i x 4>) 
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i,
+  // CIR-SAME:  #cir.int<6> : !s32i] : !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vext_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], 
+  // LLVM-SAME: <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
+  return vextq_s16(a, b, 3);
+
+  // CIR-LABEL: vextq_s16
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s16i x 8>) 
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, 
+  // CIR-SAME:  #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, 
+  // CIR-SAME:  #cir.int<9> : !s32i, #cir.int<10> : !s32i] : !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vextq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], 
+  // LLVM-SAME: <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+
+uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
+  return vext_u16(a, b, 3);
+
+  // CIR-LABEL: vext_u16
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!u16i x 4>) 
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i,
+  // CIR-SAME:  #cir.int<6> : !s32i] : !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vext_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], 
+  // LLVM-SAME: <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
+  return vextq_u16(a, b, 3);
+
+  // CIR-LABEL: vextq_u16
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!u16i x 8>) 
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, 
+  // CIR-SAME:  #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, 
+  // CIR-SAME:  #cir.int<9> : !s32i, #cir.int<10> : !s32i] : !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vextq_u16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], 
+  // LLVM-SAME: <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
+  return vext_s32(a, b, 1);
+
+  // CIR-LABEL: vext_s32
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s32i x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vext_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], 
+  // LLVM-SAME: <2 x i32> <i32 1, i32 2>
+  // LLVM: ret <2 x i32> [[RES]]
+}
+
+int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
+  return vextq_s32(a, b, 1);
+
+  // CIR-LABEL: vextq_s32
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s32i x 4>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i,
+  // CIR-SAME:  #cir.int<3> : !s32i, #cir.int<4> : !s32i] : !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vextq_s32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], 
+  // LLVM-SAME: <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  // LLVM: ret <4 x i32> [[RES]]
+}
+
+int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
+  return vext_s64(a, b, 0);
+  
+  // CIR-LABEL: vext_s64
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s64i x 1>) 
+  // CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}test_vext_s64(<1 x i64>{{.*}}[[A:%.*]], <1 x i64>{{.*}}[[B:%.*]])
+  // LLVM: ret <1 x i64> [[A]]
+}
+
+int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
+  return vextq_s64(a, b, 1);
+
+  // CIR-LABEL: vextq_s64
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s64i x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vextq_s64(<2 x i64>{{.*}}[[A:%.*]], <2 x i64>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[A]], <2 x i64> [[B]], 
+  // LLVM-SAME: <2 x i32> <i32 1, i32 2>
+  // LLVM: ret <2 x i64> [[RES]]
+}
+
+float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
+  return vext_f32(a, b, 1);
+
+  // CIR-LABEL: vext_f32
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!cir.float x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!cir.float x 2>
+
+  // LLVM: {{.*}}test_vext_f32(<2 x float>{{.*}}[[A:%.*]], <2 x float>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], 
+  // LLVM-SAME: <2 x i32> <i32 1, i32 2>
+  // LLVM: ret <2 x float> [[RES]]
+}
+
+float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
+  return vextq_f32(a, b, 1);
+
+  // CIR-LABEL: vextq_f32
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!cir.float x 4>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, 
+  // CIR-SAME:  #cir.int<4> : !s32i] : !cir.vector<!cir.float x 4>
+
+  // LLVM: {{.*}}test_vextq_f32(<4 x float>{{.*}}[[A:%.*]], <4 x float>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], 
+  // LLVM-SAME: <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  // LLVM: ret <4 x float> [[RES]]
+}
+
+
+float64x1_t test_vext_f64(float64x1_t a, float64x1_t b) {
+  return vext_f64(a, b, 0);
+  
+  // CIR-LABEL: vext_f64
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!cir.double x 1>) 
+  // CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!cir.double x 1>
+
+  // LLVM: {{.*}}test_vext_f64(<1 x double>{{.*}}[[A:%.*]], <1 x double>{{.*}}[[B:%.*]])
+  // LLVM: ret <1 x double> [[A]]
+}
+
+float64x2_t test_vextq_f64(float64x2_t a, float64x2_t b) {
+  return vextq_f64(a, b, 1);
+
+  // CIR-LABEL: vextq_f64
+  // CIR: {{%.*}}= cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!cir.double x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!cir.double x 2>
+
+  // LLVM: {{.*}}test_vextq_f64(<2 x double>{{.*}}[[A:%.*]], <2 x double>{{.*}}[[B:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <2 x double> [[A]], <2 x double> [[B]], 
+  // LLVM-SAME: <2 x i32> <i32 1, i32 2>
+  // LLVM: ret <2 x double> [[RES]]
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/neon-fp16.c b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-fp16.c
new file mode 100644
index 0000000000000..44738109eccd8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-fp16.c
@@ -0,0 +1,693 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -fno-clangir-call-conv-lowering -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// This test mimics clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c, which eventually
+// CIR shall be able to support fully. Since this is going to take some time to converge,
+// the unsupported/NYI code is commented out, so that we can incrementally improve this.
+// The NYI filecheck used contains the LLVM output from OG codegen that should guide the
+// correct result when implementing this into the CIR pipeline.
+
+#include <arm_fp16.h>
+
+// CIR-LABEL: vabsh_f16
+// CIR: {{%.*}}  = cir.fabs {{%.*}} : !cir.f16
+//
+// LLVM-LABEL: test_vabsh_f16
+// LLVM-SAME: (half [[a:%.]])
+// LLVM:  [[ABS:%.*]] =  call half @llvm.fabs.f16(half [[a]])
+// LLVM:  ret half [[ABS]]
+float16_t test_vabsh_f16(float16_t a) {
+  return vabsh_f16(a);
+}
+
+// NYI-LABEL: test_vceqzh_f16
+// NYI:  [[TMP1:%.*]] = fcmp oeq half %a, 0xH0000
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vceqzh_f16(float16_t a) {
+//   return vceqzh_f16(a);
+// }
+
+// NYI-LABEL: test_vcgezh_f16
+// NYI:  [[TMP1:%.*]] = fcmp oge half %a, 0xH0000
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vcgezh_f16(float16_t a) {
+//   return vcgezh_f16(a);
+// }
+
+// NYI-LABEL: test_vcgtzh_f16
+// NYI:  [[TMP1:%.*]] = fcmp ogt half %a, 0xH0000
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vcgtzh_f16(float16_t a) {
+//   return vcgtzh_f16(a);
+// }
+
+// NYI-LABEL: test_vclezh_f16
+// NYI:  [[TMP1:%.*]] = fcmp ole half %a, 0xH0000
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vclezh_f16(float16_t a) {
+//   return vclezh_f16(a);
+// }
+
+// NYI-LABEL: test_vcltzh_f16
+// NYI:  [[TMP1:%.*]] = fcmp olt half %a, 0xH0000
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vcltzh_f16(float16_t a) {
+//   return vcltzh_f16(a);
+// }
+
+// NYI-LABEL: test_vcvth_f16_s16
+// NYI:  [[VCVT:%.*]] = sitofp i16 %a to half
+// NYI:  ret half [[VCVT]]
+// float16_t test_vcvth_f16_s16 (int16_t a) {
+//   return vcvth_f16_s16(a);
+// }
+
+// NYI-LABEL: test_vcvth_f16_s32
+// NYI:  [[VCVT:%.*]] = sitofp i32 %a to half
+// NYI:  ret half [[VCVT]]
+// float16_t test_vcvth_f16_s32 (int32_t a) {
+//   return vcvth_f16_s32(a);
+// }
+
+// NYI-LABEL: test_vcvth_f16_s64
+// NYI:  [[VCVT:%.*]] = sitofp i64 %a to half
+// NYI:  ret half [[VCVT]]
+// float16_t test_vcvth_f16_s64 (int64_t a) {
+//   return vcvth_f16_s64(a);
+// }
+
+// NYI-LABEL: test_vcvth_f16_u16
+// NYI:  [[VCVT:%.*]] = uitofp i16 %a to half
+// NYI:  ret half [[VCVT]]
+// float16_t test_vcvth_f16_u16 (uint16_t a) {
+//   return vcvth_f16_u16(a);
+// }
+
+// NYI-LABEL: test_vcvth_f16_u32
+// NYI:  [[VCVT:%.*]] = uitofp i32 %a to half
+// NYI:  ret half [[VCVT]]
+// float16_t test_vcvth_f16_u32 (uint32_t a) {
+//   return vcvth_f16_u32(a);
+// }
+
+// NYI-LABEL: test_vcvth_f16_u64
+// NYI:  [[VCVT:%.*]] = uitofp i64 %a to half
+// NYI:  ret half [[VCVT]]
+// float16_t test_vcvth_f16_u64 (uint64_t a) {
+//   return vcvth_f16_u64(a);
+// }
+
+// NYI-LABEL: test_vcvth_s16_f16
+// NYI:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
+// NYI:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
+// NYI:  ret i16 [[TRUNC]]
+// int16_t test_vcvth_s16_f16 (float16_t a) {
+//   return vcvth_s16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvth_s32_f16
+// NYI:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
+// NYI:  ret i32 [[VCVT]]
+// int32_t test_vcvth_s32_f16 (float16_t a) {
+//   return vcvth_s32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvth_s64_f16
+// NYI:  [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a)
+// NYI:  ret i64 [[VCVT]]
+// int64_t test_vcvth_s64_f16 (float16_t a) {
+//   return vcvth_s64_f16(a);
+// }
+
+// NYI-LABEL: test_vcvth_u16_f16
+// NYI:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
+// NYI:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
+// NYI:  ret i16 [[TRUNC]]
+// uint16_t test_vcvth_u16_f16 (float16_t a) {
+//   return vcvth_u16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvth_u32_f16
+// NYI:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
+// NYI:  ret i32 [[VCVT]]
+// uint32_t test_vcvth_u32_f16 (float16_t a) {
+//   return vcvth_u32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvth_u64_f16
+// NYI:  [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a)
+// NYI:  ret i64 [[VCVT]]
+// uint64_t test_vcvth_u64_f16 (float16_t a) {
+//   return vcvth_u64_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtah_s16_f16
+// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// NYI: ret i16 [[RET]]
+// int16_t test_vcvtah_s16_f16 (float16_t a) {
+//   return vcvtah_s16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtah_s32_f16
+// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
+// NYI: ret i32 [[VCVT]]
+// int32_t test_vcvtah_s32_f16 (float16_t a) {
+//   return vcvtah_s32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtah_s64_f16
+// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a)
+// NYI: ret i64 [[VCVT]]
+// int64_t test_vcvtah_s64_f16 (float16_t a) {
+//   return vcvtah_s64_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtah_u16_f16
+// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// NYI: ret i16 [[RET]]
+// uint16_t test_vcvtah_u16_f16 (float16_t a) {
+//   return vcvtah_u16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtah_u32_f16
+// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
+// NYI: ret i32 [[VCVT]]
+// uint32_t test_vcvtah_u32_f16 (float16_t a) {
+//   return vcvtah_u32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtah_u64_f16
+// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a)
+// NYI: ret i64 [[VCVT]]
+// uint64_t test_vcvtah_u64_f16 (float16_t a) {
+//   return vcvtah_u64_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtmh_s16_f16
+// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// NYI: ret i16 [[RET]]
+// int16_t test_vcvtmh_s16_f16 (float16_t a) {
+//   return vcvtmh_s16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtmh_s32_f16
+// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
+// NYI: ret i32 [[VCVT]]
+// int32_t test_vcvtmh_s32_f16 (float16_t a) {
+//   return vcvtmh_s32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtmh_s64_f16
+// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a)
+// NYI: ret i64 [[VCVT]]
+// int64_t test_vcvtmh_s64_f16 (float16_t a) {
+//   return vcvtmh_s64_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtmh_u16_f16
+// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// NYI: ret i16 [[RET]]
+// uint16_t test_vcvtmh_u16_f16 (float16_t a) {
+//   return vcvtmh_u16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtmh_u32_f16
+// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
+// NYI: ret i32 [[VCVT]]
+// uint32_t test_vcvtmh_u32_f16 (float16_t a) {
+//   return vcvtmh_u32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtmh_u64_f16
+// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a)
+// NYI: ret i64 [[VCVT]]
+// uint64_t test_vcvtmh_u64_f16 (float16_t a) {
+//   return vcvtmh_u64_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtnh_s16_f16
+// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// NYI: ret i16 [[RET]]
+// int16_t test_vcvtnh_s16_f16 (float16_t a) {
+//   return vcvtnh_s16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtnh_s32_f16
+// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
+// NYI: ret i32 [[VCVT]]
+// int32_t test_vcvtnh_s32_f16 (float16_t a) {
+//   return vcvtnh_s32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtnh_s64_f16
+// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a)
+// NYI: ret i64 [[VCVT]]
+// int64_t test_vcvtnh_s64_f16 (float16_t a) {
+//   return vcvtnh_s64_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtnh_u16_f16
+// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// NYI: ret i16 [[RET]]
+// uint16_t test_vcvtnh_u16_f16 (float16_t a) {
+//   return vcvtnh_u16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtnh_u32_f16
+// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
+// NYI: ret i32 [[VCVT]]
+// uint32_t test_vcvtnh_u32_f16 (float16_t a) {
+//   return vcvtnh_u32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtnh_u64_f16
+// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a)
+// NYI: ret i64 [[VCVT]]
+// uint64_t test_vcvtnh_u64_f16 (float16_t a) {
+//   return vcvtnh_u64_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtph_s16_f16
+// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// NYI: ret i16 [[RET]]
+// int16_t test_vcvtph_s16_f16 (float16_t a) {
+//   return vcvtph_s16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtph_s32_f16
+// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
+// NYI: ret i32 [[VCVT]]
+// int32_t test_vcvtph_s32_f16 (float16_t a) {
+//   return vcvtph_s32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtph_s64_f16
+// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a)
+// NYI: ret i64 [[VCVT]]
+// int64_t test_vcvtph_s64_f16 (float16_t a) {
+//   return vcvtph_s64_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtph_u16_f16
+// NYI: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
+// NYI: ret i16 [[RET]]
+// uint16_t test_vcvtph_u16_f16 (float16_t a) {
+//   return vcvtph_u16_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtph_u32_f16
+// NYI: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
+// NYI: ret i32 [[VCVT]]
+// uint32_t test_vcvtph_u32_f16 (float16_t a) {
+//   return vcvtph_u32_f16(a);
+// }
+
+// NYI-LABEL: test_vcvtph_u64_f16
+// NYI: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a)
+// NYI: ret i64 [[VCVT]]
+// uint64_t test_vcvtph_u64_f16 (float16_t a) {
+//   return vcvtph_u64_f16(a);
+// }
+
+// NYI-LABEL: test_vnegh_f16
+// NYI: [[NEG:%.*]] = fneg half %a
+// NYI: ret half [[NEG]]
+// float16_t test_vnegh_f16(float16_t a) {
+//   return vnegh_f16(a);
+// }
+
+// NYI-LABEL: test_vrecpeh_f16
+// NYI: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpe.f16(half %a)
+// NYI: ret half [[VREC]]
+// float16_t test_vrecpeh_f16(float16_t a) {
+//   return vrecpeh_f16(a);
+// }
+
+// NYI-LABEL: test_vrecpxh_f16
+// NYI: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpx.f16(half %a)
+// NYI: ret half [[VREC]]
+// float16_t test_vrecpxh_f16(float16_t a) {
+//   return vrecpxh_f16(a);
+// }
+
+// NYI-LABEL: test_vrndh_f16
+// NYI:  [[RND:%.*]] =  call half @llvm.trunc.f16(half %a)
+// NYI:  ret half [[RND]]
+// float16_t test_vrndh_f16(float16_t a) {
+//   return vrndh_f16(a);
+// }
+
+// NYI-LABEL: test_vrndah_f16
+// NYI:  [[RND:%.*]] =  call half @llvm.round.f16(half %a)
+// NYI:  ret half [[RND]]
+// float16_t test_vrndah_f16(float16_t a) {
+//   return vrndah_f16(a);
+// }
+
+// NYI-LABEL: test_vrndih_f16
+// NYI:  [[RND:%.*]] =  call half @llvm.nearbyint.f16(half %a)
+// NYI:  ret half [[RND]]
+// float16_t test_vrndih_f16(float16_t a) {
+//   return vrndih_f16(a);
+// }
+
+// NYI-LABEL: test_vrndmh_f16
+// NYI:  [[RND:%.*]] =  call half @llvm.floor.f16(half %a)
+// NYI:  ret half [[RND]]
+// float16_t test_vrndmh_f16(float16_t a) {
+//   return vrndmh_f16(a);
+// }
+
+// NYI-LABEL: test_vrndnh_f16
+// NYI:  [[RND:%.*]] =  call half @llvm.roundeven.f16(half %a)
+// NYI:  ret half [[RND]]
+// float16_t test_vrndnh_f16(float16_t a) {
+//   return vrndnh_f16(a);
+// }
+
+// NYI-LABEL: test_vrndph_f16
+// NYI:  [[RND:%.*]] =  call half @llvm.ceil.f16(half %a)
+// NYI:  ret half [[RND]]
+// float16_t test_vrndph_f16(float16_t a) {
+//   return vrndph_f16(a);
+// }
+
+// NYI-LABEL: test_vrndxh_f16
+// NYI:  [[RND:%.*]] =  call half @llvm.rint.f16(half %a)
+// NYI:  ret half [[RND]]
+// float16_t test_vrndxh_f16(float16_t a) {
+//   return vrndxh_f16(a);
+// }
+
+// NYI-LABEL: test_vrsqrteh_f16
+// NYI:  [[RND:%.*]] = call half @llvm.aarch64.neon.frsqrte.f16(half %a)
+// NYI:  ret half [[RND]]
+// float16_t test_vrsqrteh_f16(float16_t a) {
+//   return vrsqrteh_f16(a);
+// }
+
+// NYI-LABEL: test_vsqrth_f16
+// NYI:  [[SQR:%.*]] = call half @llvm.sqrt.f16(half %a)
+// NYI:  ret half [[SQR]]
+// float16_t test_vsqrth_f16(float16_t a) {
+//   return vsqrth_f16(a);
+// }
+
+// CIR-LABEL: vaddh_f16
+// CIR: {{%.*}} = cir.binop(add, {{%.*}}, {{%.*}}) : !cir.f16
+//
+// LLVM-LABEL: test_vaddh_f16 
+// LLVM-SAME: (half [[a:%.]], half [[b:%.]])
+// LLVM:  [[ADD:%.*]] = fadd half [[a]], [[b]]
+// LLVM:  ret half [[ADD]]
+float16_t test_vaddh_f16(float16_t a, float16_t b) {
+  return vaddh_f16(a, b);
+}
+
+// NYI-LABEL: test_vabdh_f16
+// NYI:  [[ABD:%.*]] = call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b)
+// NYI:  ret half [[ABD]]
+// float16_t test_vabdh_f16(float16_t a, float16_t b) {
+//   return vabdh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vcageh_f16
+// NYI:  [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f16(half %a, half %b)
+// NYI: [[RET:%.*]] = trunc i32 [[FACG]] to i16
+// NYI: ret i16 [[RET]]
+// uint16_t test_vcageh_f16(float16_t a, float16_t b) {
+//   return vcageh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vcagth_f16
+// NYI:  [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f16(half %a, half %b)
+// NYI: [[RET:%.*]] = trunc i32 [[FACG]] to i16
+// NYI: ret i16 [[RET]]
+// uint16_t test_vcagth_f16(float16_t a, float16_t b) {
+//   return vcagth_f16(a, b);
+// }
+
+// NYI-LABEL: test_vcaleh_f16
+// NYI:  [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f16(half %b, half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FACG]] to i16
+// NYI: ret i16 [[RET]]
+// uint16_t test_vcaleh_f16(float16_t a, float16_t b) {
+//   return vcaleh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vcalth_f16
+// NYI:  [[FACG:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f16(half %b, half %a)
+// NYI: [[RET:%.*]] = trunc i32 [[FACG]] to i16
+// NYI: ret i16 [[RET]]
+// uint16_t test_vcalth_f16(float16_t a, float16_t b) {
+//   return vcalth_f16(a, b);
+// }
+
+// NYI-LABEL: test_vceqh_f16
+// NYI:  [[TMP1:%.*]] = fcmp oeq half %a, %b
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vceqh_f16(float16_t a, float16_t b) {
+//   return vceqh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vcgeh_f16
+// NYI:  [[TMP1:%.*]] = fcmp oge half %a, %b
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vcgeh_f16(float16_t a, float16_t b) {
+//  return vcgeh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vcgth_f16
+//NYI:  [[TMP1:%.*]] = fcmp ogt half %a, %b
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vcgth_f16(float16_t a, float16_t b) {
+//   return vcgth_f16(a, b);
+// }
+
+// NYI-LABEL: test_vcleh_f16
+// NYI:  [[TMP1:%.*]] = fcmp ole half %a, %b
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vcleh_f16(float16_t a, float16_t b) {
+//   return vcleh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vclth_f16
+// NYI:  [[TMP1:%.*]] = fcmp olt half %a, %b
+// NYI:  [[TMP2:%.*]] = sext i1 [[TMP1]] to i16
+// NYI:  ret i16 [[TMP2]]
+// uint16_t test_vclth_f16(float16_t a, float16_t b) {
+//   return vclth_f16(a, b);
+// }
+
+// NYI-LABEL: test_vcvth_n_f16_s16
+// NYI: [[SEXT:%.*]] = sext i16 %a to i32
+// NYI:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 [[SEXT]], i32 1)
+// NYI:  ret half [[CVT]]
+// float16_t test_vcvth_n_f16_s16(int16_t a) {
+//   return vcvth_n_f16_s16(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_f16_s32
+// NYI:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1)
+// NYI:  ret half [[CVT]]
+// float16_t test_vcvth_n_f16_s32(int32_t a) {
+//   return vcvth_n_f16_s32(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_f16_s64
+// NYI:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 1)
+// NYI:  ret half [[CVT]]
+// float16_t test_vcvth_n_f16_s64(int64_t a) {
+//   return vcvth_n_f16_s64(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_s16_f16
+// NYI:  [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1)
+// NYI: [[RET:%.*]] = trunc i32 [[CVT]] to i16
+// NYI: ret i16 [[RET]]
+// int16_t test_vcvth_n_s16_f16(float16_t a) {
+//   return vcvth_n_s16_f16(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_s32_f16
+// NYI:  [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1)
+// NYI:  ret i32 [[CVT]]
+// int32_t test_vcvth_n_s32_f16(float16_t a) {
+//   return vcvth_n_s32_f16(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_s64_f16
+// NYI:  [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 1)
+// NYI:  ret i64 [[CVT]]
+// int64_t test_vcvth_n_s64_f16(float16_t a) {
+//   return vcvth_n_s64_f16(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_f16_u16
+// NYI: [[SEXT:%.*]] = zext i16 %a to i32
+// NYI:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 [[SEXT]], i32 1)
+// NYI:  ret half [[CVT]]
+// float16_t test_vcvth_n_f16_u16(int16_t a) {
+//   return vcvth_n_f16_u16(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_f16_u32
+// NYI:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1)
+// NYI:  ret half [[CVT]]
+// float16_t test_vcvth_n_f16_u32(int32_t a) {
+//   return vcvth_n_f16_u32(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_f16_u64
+// NYI:  [[CVT:%.*]] = call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i64(i64 %a, i32 1)
+// NYI:  ret half [[CVT]]
+// float16_t test_vcvth_n_f16_u64(int64_t a) {
+//   return vcvth_n_f16_u64(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_u16_f16
+// NYI:  [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1)
+// NYI: [[RET:%.*]] = trunc i32 [[CVT]] to i16
+// NYI: ret i16 [[RET]]
+// int16_t test_vcvth_n_u16_f16(float16_t a) {
+//   return vcvth_n_u16_f16(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_u32_f16
+// NYI:  [[CVT:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1)
+// NYI:  ret i32 [[CVT]]
+// int32_t test_vcvth_n_u32_f16(float16_t a) {
+//   return vcvth_n_u32_f16(a, 1);
+// }
+
+// NYI-LABEL: test_vcvth_n_u64_f16
+// NYI:  [[CVT:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f16(half %a, i32 1)
+// NYI:  ret i64 [[CVT]]
+// int64_t test_vcvth_n_u64_f16(float16_t a) {
+//   return vcvth_n_u64_f16(a, 1);
+// }
+
+// CIR-LABEL: vdivh_f16
+// CIR: {{%.*}} = cir.binop(div, {{%.*}}, {{%.*}}) : !cir.f16
+//
+// LLVM-LABEL: test_vdivh_f16
+// LLVM-SAME: (half [[a:%.]], half [[b:%.]])
+// LLVM:  [[DIV:%.*]] = fdiv half [[a]], [[b]]
+// LLVM:  ret half [[DIV]]
+float16_t test_vdivh_f16(float16_t a, float16_t b) {
+  return vdivh_f16(a, b);
+}
+
+// NYI-LABEL: test_vmaxh_f16
+// NYI:  [[MAX:%.*]] = call half @llvm.aarch64.neon.fmax.f16(half %a, half %b)
+// NYI:  ret half [[MAX]]
+// float16_t test_vmaxh_f16(float16_t a, float16_t b) {
+//   return vmaxh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vmaxnmh_f16
+// NYI:  [[MAX:%.*]] = call half @llvm.aarch64.neon.fmaxnm.f16(half %a, half %b)
+// NYI:  ret half [[MAX]]
+// float16_t test_vmaxnmh_f16(float16_t a, float16_t b) {
+//   return vmaxnmh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vminh_f16
+// NYI:  [[MIN:%.*]] = call half @llvm.aarch64.neon.fmin.f16(half %a, half %b)
+// NYI:  ret half [[MIN]]
+// float16_t test_vminh_f16(float16_t a, float16_t b) {
+//   return vminh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vminnmh_f16
+// NYI:  [[MIN:%.*]] = call half @llvm.aarch64.neon.fminnm.f16(half %a, half %b)
+// NYI:  ret half [[MIN]]
+// float16_t test_vminnmh_f16(float16_t a, float16_t b) {
+//   return vminnmh_f16(a, b);
+// }
+
+// CIR-LABEL: vmulh_f16
+// CIR: {{%.*}} = cir.binop(mul, {{%.*}}, {{%.*}}) : !cir.f16
+//
+// LLVM-LABEL: test_vmulh_f16
+// LLVM-SAME: (half [[a:%.]], half [[b:%.]])
+// LLVM:  [[MUL:%.*]] = fmul half [[a]], [[b]]
+// LLVM:  ret half [[MUL]]
+float16_t test_vmulh_f16(float16_t a, float16_t b) {
+  return vmulh_f16(a, b);
+}
+
+// NYI-LABEL: test_vmulxh_f16
+// NYI:  [[MUL:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b)
+// NYI:  ret half [[MUL]]
+// float16_t test_vmulxh_f16(float16_t a, float16_t b) {
+//   return vmulxh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vrecpsh_f16
+// NYI: [[RECPS:%.*]] = call half @llvm.aarch64.neon.frecps.f16(half %a, half %b)
+// NYI: ret half [[RECPS]]
+// float16_t test_vrecpsh_f16(float16_t a, float16_t b) {
+//   return vrecpsh_f16(a, b);
+// }
+
+// NYI-LABEL: test_vrsqrtsh_f16
+// NYI:  [[RSQRTS:%.*]] = call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b)
+// NYI:  ret half [[RSQRTS]]
+// float16_t test_vrsqrtsh_f16(float16_t a, float16_t b) {
+//   return vrsqrtsh_f16(a, b);
+// }
+
+// CIR-LABEL: vsubh_f16
+// CIR: {{%.*}} = cir.binop(sub, {{%.*}}, {{%.*}}) : !cir.f16
+//
+// LLVM-LABEL: test_vsubh_f16
+// LLVM-SAME: (half [[a:%.]], half [[b:%.]])
+// LLVM:  [[SUB:%.*]] = fsub half [[a]], [[b]]
+// LLVM:  ret half [[SUB]]
+float16_t test_vsubh_f16(float16_t a, float16_t b) {
+  return vsubh_f16(a, b);
+}
+
+// NYI-LABEL: test_vfmah_f16
+// NYI:  [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half %c, half %a)
+// NYI:  ret half [[FMA]]
+// float16_t test_vfmah_f16(float16_t a, float16_t b, float16_t c) {
+//   return vfmah_f16(a, b, c);
+// }
+
+// NYI-LABEL: test_vfmsh_f16
+// NYI:  [[SUB:%.*]] = fneg half %b
+// NYI:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a)
+// NYI:  ret half [[ADD]]
+// float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
+//   return vfmsh_f16(a, b, c);
+// }
+
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/neon-ldst.c b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-ldst.c
new file mode 100644
index 0000000000000..aac34e2be6ec8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-ldst.c
@@ -0,0 +1,768 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -fno-clangir-call-conv-lowering -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// This test file contains tests for the AArch64 NEON load/store intrinsics.
+
+#include <arm_neon.h>
+
+int8x8_t test_vld1_lane_s8(int8_t const * ptr, int8x8_t src) {
+    return vld1_lane_s8(ptr, src, 7);
+}
+
+// CIR-LABEL: test_vld1_lane_s8
+// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s8i>
+// CIR: [[VAL:%.*]] = cir.load align(1) [[PTR]] : !cir.ptr<!s8i>, !s8i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 8>
+
+// LLVM: {{.*}}test_vld1_lane_s8(ptr{{.*}}[[PTR:%.*]], <8 x i8>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VAL:%.*]] = load i8, ptr [[PTR]], align 1
+// LLVM: {{.*}} = insertelement <8 x i8> [[SRC]], i8 [[INTRN_VAL]], i32 7
+// LLVM: ret <8 x i8> {{.*}}
+
+int8x16_t test_vld1q_lane_s8(int8_t const * ptr, int8x16_t src) {
+    return vld1q_lane_s8(ptr, src, 15);
+}
+
+// CIR-LABEL: test_vld1q_lane_s8
+// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s8i>
+// CIR: [[VAL:%.*]] = cir.load align(1) [[PTR]] : !cir.ptr<!s8i>, !s8i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vld1q_lane_s8(ptr{{.*}}[[PTR:%.*]], <16 x i8>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VAL:%.*]] = load i8, ptr [[PTR]], align 1
+// LLVM: {{.*}} = insertelement <16 x i8> [[SRC]], i8 [[INTRN_VAL]], i32 15
+// LLVM: ret <16 x i8> {{.*}}
+
+uint8x16_t test_vld1q_lane_u8(uint8_t const * ptr, uint8x16_t src) {
+    return vld1q_lane_u8(ptr, src, 15);
+}
+
+// CIR-LABEL: test_vld1q_lane_u8
+// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u8i>
+// CIR: [[VAL:%.*]] = cir.load align(1) [[PTR]] : !cir.ptr<!u8i>, !u8i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
+
+// LLVM: {{.*}}test_vld1q_lane_u8(ptr{{.*}}[[PTR:%.*]], <16 x i8>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VAL:%.*]] = load i8, ptr [[PTR]], align 1
+// LLVM: {{.*}} = insertelement <16 x i8> [[SRC]], i8 [[INTRN_VAL]], i32 15
+// LLVM: ret <16 x i8> {{.*}}
+
+uint8x8_t test_vld1_lane_u8(uint8_t const * ptr, uint8x8_t src) {
+    return vld1_lane_u8(ptr, src, 7);
+}
+
+// CIR-LABEL: test_vld1_lane_u8
+// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u8i>
+// CIR: [[VAL:%.*]] = cir.load align(1) [[PTR]] : !cir.ptr<!u8i>, !u8i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
+
+// LLVM: {{.*}}test_vld1_lane_u8(ptr{{.*}}[[PTR:%.*]], <8 x i8>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VAL:%.*]] = load i8, ptr [[PTR]], align 1
+// LLVM: {{.*}} = insertelement <8 x i8> [[SRC]], i8 [[INTRN_VAL]], i32 7
+// LLVM: ret <8 x i8> {{.*}}
+
+int16x4_t test_vld1_lane_s16(int16_t const * ptr, int16x4_t src) {
+    return vld1_lane_s16(ptr, src, 3);
+}
+
+// CIR-LABEL: test_vld1_lane_s16
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s16i>
+// CIR: [[VAL:%.*]] = cir.load align(2) [[PTR]] : !cir.ptr<!s16i>, !s16i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 4>
+
+// LLVM: {{.*}}test_vld1_lane_s16(ptr{{.*}}[[PTR:%.*]], <4 x i16>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <4 x i16>
+// LLVM: [[INTRN_VAL:%.*]] = load i16, ptr [[PTR]], align 2
+// LLVM: {{.*}} = insertelement <4 x i16> [[INTRN_VEC_CAST1]], i16 [[INTRN_VAL]], i32 3
+// LLVM: ret <4 x i16> {{.*}}
+
+uint16x4_t test_vld1_lane_u16(uint16_t const * ptr, uint16x4_t src) {
+    return vld1_lane_u16(ptr, src, 3);
+}
+
+// CIR-LABEL: test_vld1_lane_u16
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u16i>
+// CIR: [[VAL:%.*]] = cir.load align(2) [[PTR]] : !cir.ptr<!u16i>, !u16i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4>
+
+// LLVM: {{.*}}test_vld1_lane_u16(ptr{{.*}}[[PTR:%.*]], <4 x i16>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <4 x i16>
+// LLVM: [[INTRN_VAL:%.*]] = load i16, ptr [[PTR]], align 2
+// LLVM: {{.*}} = insertelement <4 x i16> [[INTRN_VEC_CAST1]], i16 [[INTRN_VAL]], i32 3
+// LLVM: ret <4 x i16> {{.*}}
+
+int16x8_t test_vld1q_lane_s16(int16_t const * ptr, int16x8_t src) {
+    return vld1q_lane_s16(ptr, src, 7);
+}
+
+// CIR-LABEL: test_vld1q_lane_s16
+// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s16i>
+// CIR: [[VAL:%.*]] = cir.load align(2) [[PTR]] : !cir.ptr<!s16i>, !s16i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 8>
+
+// LLVM: {{.*}}test_vld1q_lane_s16(ptr{{.*}}[[PTR:%.*]], <8 x i16>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <8 x i16>
+// LLVM: [[INTRN_VAL:%.*]] = load i16, ptr [[PTR]], align 2
+// LLVM: {{.*}} = insertelement <8 x i16> [[INTRN_VEC_CAST1]], i16 [[INTRN_VAL]], i32 7
+// LLVM: ret <8 x i16> {{.*}}
+
+uint16x8_t test_vld1q_lane_u16(uint16_t const * ptr, uint16x8_t src) {
+    return vld1q_lane_u16(ptr, src, 7);
+}
+
+// CIR-LABEL: test_vld1q_lane_u16
+// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u16i>
+// CIR: [[VAL:%.*]] = cir.load align(2) [[PTR]] : !cir.ptr<!u16i>, !u16i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
+
+// LLVM: {{.*}}test_vld1q_lane_u16(ptr{{.*}}[[PTR:%.*]], <8 x i16>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <8 x i16>
+// LLVM: [[INTRN_VAL:%.*]] = load i16, ptr [[PTR]], align 2
+// LLVM: {{.*}} = insertelement <8 x i16> [[INTRN_VEC_CAST1]], i16 [[INTRN_VAL]], i32 7
+// LLVM: ret <8 x i16> {{.*}}
+
+int32x2_t test_vld1_lane_s32(int32_t const * ptr, int32x2_t src) {
+    return vld1_lane_s32(ptr, src, 1);
+}
+
+// CIR-LABEL: test_vld1_lane_s32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s32i>
+// CIR: [[VAL:%.*]] = cir.load align(4) [[PTR]] : !cir.ptr<!s32i>, !s32i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 2>
+
+// LLVM: {{.*}}test_vld1_lane_s32(ptr{{.*}}[[PTR:%.*]], <2 x i32>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <2 x i32>
+// LLVM: [[INTRN_VAL:%.*]] = load i32, ptr [[PTR]], align 4
+// LLVM: {{.*}} = insertelement <2 x i32> [[INTRN_VEC_CAST1]], i32 [[INTRN_VAL]], i32 1
+// LLVM: ret <2 x i32> {{.*}}
+
+uint32x2_t test_vld1_lane_u32(uint32_t const * ptr, uint32x2_t src) {
+    return vld1_lane_u32(ptr, src, 1);
+}
+
+// CIR-LABEL: test_vld1_lane_u32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u32i>
+// CIR: [[VAL:%.*]] = cir.load align(4) [[PTR]] : !cir.ptr<!u32i>, !u32i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
+
+// LLVM: {{.*}}test_vld1_lane_u32(ptr{{.*}}[[PTR:%.*]], <2 x i32>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <2 x i32>
+// LLVM: [[INTRN_VAL:%.*]] = load i32, ptr [[PTR]], align 4
+// LLVM: {{.*}} = insertelement <2 x i32> [[INTRN_VEC_CAST1]], i32 [[INTRN_VAL]], i32 1
+// LLVM: ret <2 x i32> {{.*}}
+
+
+int32x4_t test_vld1q_lane_s32(int32_t const * ptr, int32x4_t src) {
+    return vld1q_lane_s32(ptr, src, 3);
+}
+
+// CIR-LABEL: test_vld1q_lane_s32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s32i>
+// CIR: [[VAL:%.*]] = cir.load align(4) [[PTR]] : !cir.ptr<!s32i>, !s32i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 4>
+
+// LLVM: {{.*}}test_vld1q_lane_s32(ptr{{.*}}[[PTR:%.*]], <4 x i32>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <4 x i32>
+// LLVM: [[INTRN_VAL:%.*]] = load i32, ptr [[PTR]], align 4
+// LLVM: {{.*}} = insertelement <4 x i32> [[INTRN_VEC_CAST1]], i32 [[INTRN_VAL]], i32 3
+// LLVM: ret <4 x i32> {{.*}}
+
+
+uint32x4_t test_vld1q_lane_u32(uint32_t const * ptr, uint32x4_t src) {
+    return vld1q_lane_u32(ptr, src, 3);
+}
+
+// CIR-LABEL: test_vld1q_lane_u32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u32i>
+// CIR: [[VAL:%.*]] = cir.load align(4) [[PTR]] : !cir.ptr<!u32i>, !u32i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
+
+// LLVM: {{.*}}test_vld1q_lane_u32(ptr{{.*}}[[PTR:%.*]], <4 x i32>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <4 x i32>
+// LLVM: [[INTRN_VAL:%.*]] = load i32, ptr [[PTR]], align 4
+// LLVM: {{.*}} = insertelement <4 x i32> [[INTRN_VEC_CAST1]], i32 [[INTRN_VAL]], i32 3
+// LLVM: ret <4 x i32> {{.*}}
+
+int64x1_t test_vld1_lane_s64(int64_t const * ptr, int64x1_t src) {
+    return vld1_lane_s64(ptr, src, 0);
+}
+
+// CIR-LABEL: test_vld1_lane_s64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr<!s64i>, !s64i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 1>
+
+// LLVM: {{.*}}test_vld1_lane_s64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <1 x i64>
+// LLVM: [[INTRN_VAL:%.*]] = load i64, ptr [[PTR]], align 8
+// LLVM: {{.*}} = insertelement <1 x i64> [[INTRN_VEC_CAST1]], i64 [[INTRN_VAL]], i32 0
+// LLVM: ret <1 x i64> {{.*}}
+
+uint64x1_t test_vld1_lane_u64(uint64_t const * ptr, uint64x1_t src) {
+    return vld1_lane_u64(ptr, src, 0);
+}
+
+// CIR-LABEL: test_vld1_lane_u64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr<!u64i>, !u64i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
+
+// LLVM: {{.*}}test_vld1_lane_u64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <8 x i8> [[INTRN_VEC_CAST0]] to <1 x i64>
+// LLVM: [[INTRN_VAL:%.*]] = load i64, ptr [[PTR]], align 8
+// LLVM: {{.*}} = insertelement <1 x i64> [[INTRN_VEC_CAST1]], i64 [[INTRN_VAL]], i32 0
+// LLVM: ret <1 x i64> {{.*}}
+
+int64x2_t test_vld1q_lane_s64(int64_t const * ptr, int64x2_t src) {
+    return vld1q_lane_s64(ptr, src, 1);
+}
+
+// CIR-LABEL: test_vld1q_lane_s64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr<!s64i>, !s64i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 2>
+
+// LLVM: {{.*}}test_vld1q_lane_s64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <2 x i64>
+// LLVM: [[INTRN_VAL:%.*]] = load i64, ptr [[PTR]], align 8
+// LLVM: {{.*}} = insertelement <2 x i64> [[INTRN_VEC_CAST1]], i64 [[INTRN_VAL]], i32 1
+// LLVM: ret <2 x i64> {{.*}}
+
+uint64x2_t test_vld1q_lane_u64(uint64_t const * ptr, uint64x2_t src) {
+    return vld1q_lane_u64(ptr, src, 1);
+}
+
+// CIR-LABEL: test_vld1q_lane_u64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr<!u64i>, !u64i
+// CIR: {{%.*}} = cir.vec.insert [[VAL]], {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
+
+// LLVM: {{.*}}test_vld1q_lane_u64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[INTRN_VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[INTRN_VEC_CAST1:%.*]] = bitcast <16 x i8> [[INTRN_VEC_CAST0]] to <2 x i64>
+// LLVM: [[INTRN_VAL:%.*]] = load i64, ptr [[PTR]], align 8
+// LLVM: {{.*}} = insertelement <2 x i64> [[INTRN_VEC_CAST1]], i64 [[INTRN_VAL]], i32 1
+// LLVM: ret <2 x i64> {{.*}}
+
+void test_vst1_lane_s8(int8_t * ptr, int8x8_t src) {
+    vst1_lane_s8(ptr, src, 7);
+}
+
+// CIR-LABEL: test_vst1_lane_s8
+// CIR: [[LANE:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s8i x 8>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s8i>
+// CIR: cir.store align(1) [[VAL]], [[PTR]] : !s8i, !cir.ptr<!s8i>
+
+// LLVM: {{.*}}test_vst1_lane_s8(ptr{{.*}}[[PTR:%.*]], <8 x i8>{{.*}}[[SRC:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <8 x i8> [[SRC]], i32 7
+// LLVM: store i8 [[RES]], ptr [[PTR]], align 1
+
+void test_vst1_lane_s16(int16_t * ptr, int16x4_t src) {
+    vst1_lane_s16(ptr, src, 3);
+}
+
+// CIR-LABEL: test_vst1_lane_s16
+// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s16i x 4>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s16i>
+// CIR: cir.store align(2) [[VAL]], [[PTR]] : !s16i, !cir.ptr<!s16i>
+
+// LLVM: {{.*}}test_vst1_lane_s16(ptr{{.*}}[[PTR:%.*]], <4 x i16>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <8 x i8> [[VEC_CAST0]] to <4 x i16>
+// LLVM: [[RES:%.*]] = extractelement <4 x i16> [[VEC_CAST1]], i32 3
+// LLVM: store i16 [[RES]], ptr [[PTR]], align 2
+
+void test_vst1_lane_u16(uint16_t * ptr, uint16x4_t src) {
+    vst1_lane_u16(ptr, src, 3);
+}
+
+// CIR-LABEL: test_vst1_lane_u16
+// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!u16i x 4>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u16i>
+// CIR: cir.store align(2) [[VAL]], [[PTR]] : !u16i, !cir.ptr<!u16i>
+
+// LLVM: {{.*}}test_vst1_lane_u16(ptr{{.*}}[[PTR:%.*]], <4 x i16>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <8 x i8> [[VEC_CAST0]] to <4 x i16>
+// LLVM: [[RES:%.*]] = extractelement <4 x i16> [[VEC_CAST1]], i32 3
+// LLVM: store i16 [[RES]], ptr [[PTR]], align 2
+
+void test_vst1_lane_s32(int32_t * ptr, int32x2_t src) {
+    vst1_lane_s32(ptr, src, 1);
+}
+
+// CIR-LABEL: test_vst1_lane_s32
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s32i x 2>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s32i>
+// CIR: cir.store align(4) [[VAL]], [[PTR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: {{.*}}test_vst1_lane_s32(ptr{{.*}}[[PTR:%.*]], <2 x i32>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <8 x i8> [[VEC_CAST0]] to <2 x i32>
+// LLVM: [[RES:%.*]] = extractelement <2 x i32> [[VEC_CAST1]], i32 1
+// LLVM: store i32 [[RES]], ptr [[PTR]], align 4
+
+void test_vst1_lane_f32(float32_t * ptr, float32x2_t src) {
+    vst1_lane_f32(ptr, src, 1);
+}
+
+// CIR-LABEL: test_vst1_lane_f32
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.float x 2>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.float>
+// CIR: cir.store align(4) [[VAL]], [[PTR]] : !cir.float, !cir.ptr<!cir.float>
+
+// LLVM: {{.*}}test_vst1_lane_f32(ptr{{.*}}[[PTR:%.*]], <2 x float>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <8 x i8> [[VEC_CAST0]] to <2 x float>
+// LLVM: [[RES:%.*]] = extractelement <2 x float> [[VEC_CAST1]], i32 1
+// LLVM: store float [[RES]], ptr [[PTR]], align 4
+
+void test_vst1_lane_s64(int64_t * ptr, int64x1_t src) {
+    vst1_lane_s64(ptr, src, 0);
+}
+
+// CIR-LABEL: test_vst1_lane_s64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: cir.store align(8) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
+
+// LLVM: {{.*}}test_vst1_lane_s64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <8 x i8> [[VEC_CAST0]] to <1 x i64>
+// LLVM: [[RES:%.*]] = extractelement <1 x i64> [[VEC_CAST1]], i32 0
+// LLVM: store i64 [[RES]], ptr [[PTR]], align 8
+
+void test_vst1_lane_f64(float64_t * ptr, float64x1_t src) {
+    vst1_lane_f64(ptr, src, 0);
+}
+
+// CIR-LABEL: test_vst1_lane_f64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 1>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.double>
+// CIR: cir.store align(8) [[VAL]], [[PTR]] : !cir.double, !cir.ptr<!cir.double>
+
+// LLVM: {{.*}}test_vst1_lane_f64(ptr{{.*}}[[PTR:%.*]], <1 x double>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <8 x i8> [[VEC_CAST0]] to <1 x double>
+// LLVM: [[RES:%.*]] = extractelement <1 x double> [[VEC_CAST1]], i32 0
+// LLVM: store double [[RES]], ptr [[PTR]], align 8
+
+void test_vst1q_lane_s8(int8_t * ptr, int8x16_t src) {
+    vst1q_lane_s8(ptr, src, 15);
+}
+
+// CIR-LABEL: test_vst1q_lane_s8
+// CIR: [[LANE:%.*]] = cir.const #cir.int<15> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s8i x 16>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s8i>
+// CIR: cir.store align(1) [[VAL]], [[PTR]] : !s8i, !cir.ptr<!s8i>
+
+// LLVM: {{.*}}test_vst1q_lane_s8(ptr{{.*}}[[PTR:%.*]], <16 x i8>{{.*}}[[SRC:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <16 x i8> [[SRC]], i32 15
+// LLVM: store i8 [[RES]], ptr [[PTR]], align 1
+
+
+void test_vst1q_lane_s16(int16_t * ptr, int16x8_t src) {
+    vst1q_lane_s16(ptr, src, 7);
+}
+
+// CIR-LABEL: test_vst1q_lane_s16
+// CIR: [[LANE:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s16i x 8>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s16i>
+// CIR: cir.store align(2) [[VAL]], [[PTR]] : !s16i, !cir.ptr<!s16i>
+
+// LLVM: {{.*}}test_vst1q_lane_s16(ptr{{.*}}[[PTR:%.*]], <8 x i16>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <16 x i8> [[VEC_CAST0]] to <8 x i16>
+// LLVM: [[RES:%.*]] = extractelement <8 x i16> [[VEC_CAST1]], i32 7
+// LLVM: store i16 [[RES]], ptr [[PTR]], align 2
+
+void test_vst1q_lane_u16(uint16_t * ptr, uint16x8_t src) {
+    vst1q_lane_u16(ptr, src, 7);
+}
+
+// CIR-LABEL: test_vst1q_lane_u16
+// CIR: [[LANE:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!u16i x 8>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u16i>
+// CIR: cir.store align(2) [[VAL]], [[PTR]] : !u16i, !cir.ptr<!u16i>
+
+// LLVM: {{.*}}test_vst1q_lane_u16(ptr{{.*}}[[PTR:%.*]], <8 x i16>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <16 x i8> [[VEC_CAST0]] to <8 x i16>
+// LLVM: [[RES:%.*]] = extractelement <8 x i16> [[VEC_CAST1]], i32 7
+// LLVM: store i16 [[RES]], ptr [[PTR]], align 2
+
+void test_vst1q_lane_s32(int32_t * ptr, int32x4_t src) {
+    vst1q_lane_s32(ptr, src, 3);
+}
+
+// CIR-LABEL: test_vst1q_lane_s32
+// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s32i x 4>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s32i>
+// CIR: cir.store align(4) [[VAL]], [[PTR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: {{.*}}test_vst1q_lane_s32(ptr{{.*}}[[PTR:%.*]], <4 x i32>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <16 x i8> [[VEC_CAST0]] to <4 x i32>
+// LLVM: [[RES:%.*]] = extractelement <4 x i32> [[VEC_CAST1]], i32 3
+// LLVM: store i32 [[RES]], ptr [[PTR]], align 4
+
+void test_vst1q_lane_s64(int64_t * ptr, int64x2_t src) {
+    vst1q_lane_s64(ptr, src, 1);
+}
+
+// CIR-LABEL: test_vst1q_lane_s64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: cir.store align(8) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
+
+// LLVM: {{.*}}test_vst1q_lane_s64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <16 x i8> [[VEC_CAST0]] to <2 x i64>
+// LLVM: [[RES:%.*]] = extractelement <2 x i64> [[VEC_CAST1]], i32 1
+// LLVM: store i64 [[RES]], ptr [[PTR]], align 8
+
+void test_vst1q_lane_f32(float32_t * ptr, float32x4_t src) {
+    vst1q_lane_f32(ptr, src, 3);
+}
+
+// CIR-LABEL: test_vst1q_lane_f32
+// CIR: [[LANE:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.float x 4>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.float>
+// CIR: cir.store align(4) [[VAL]], [[PTR]] : !cir.float, !cir.ptr<!cir.float>
+
+// LLVM: {{.*}}test_vst1q_lane_f32(ptr{{.*}}[[PTR:%.*]], <4 x float>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <16 x i8> [[VEC_CAST0]] to <4 x float>
+// LLVM: [[RES:%.*]] = extractelement <4 x float> [[VEC_CAST1]], i32 3
+// LLVM: store float [[RES]], ptr [[PTR]], align 4
+
+void test_vst1q_lane_f64(float64_t * ptr, float64x2_t src) {
+    vst1q_lane_f64(ptr, src, 1);
+}
+
+// CIR-LABEL: test_vst1q_lane_f64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 2>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.double>
+// CIR: cir.store align(8) [[VAL]], [[PTR]] : !cir.double, !cir.ptr<!cir.double>
+
+// LLVM: {{.*}}test_vst1q_lane_f64(ptr{{.*}}[[PTR:%.*]], <2 x double>{{.*}}[[SRC:%.*]])
+// LLVM: [[VEC_CAST0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[VEC_CAST1:%.*]] = bitcast <16 x i8> [[VEC_CAST0]] to <2 x double>
+// LLVM: [[RES:%.*]] = extractelement <2 x double> [[VEC_CAST1]], i32 1
+// LLVM: store double [[RES]], ptr [[PTR]], align 8
+
+void test_vstl1q_lane_u64(uint64_t  *a, uint64x2_t b) {
+  vstl1q_lane_u64(a, b, 1);
+}
+
+// CIR-LABEL: test_vstl1q_lane_u64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 2>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !u64i, !cir.ptr<!u64i>
+
+// LLVM: {{.*}}test_vstl1q_lane_u64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
+
+void test_vstl1q_lane_s64(int64_t  *a, int64x2_t b) {
+  vstl1q_lane_s64(a, b, 1);
+}
+
+// CIR-LABEL: test_vstl1q_lane_s64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
+
+// LLVM: {{.*}}test_vstl1q_lane_s64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
+
+void test_vstl1q_lane_f64(float64_t  *a, float64x2_t b) {
+  vstl1q_lane_f64(a, b, 1);
+}
+
+// CIR-LABEL: test_vstl1q_lane_f64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 2>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.double>
+// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !cir.double, !cir.ptr<!cir.double>
+
+// LLVM: {{.*}}test_vstl1q_lane_f64(ptr{{.*}}[[PTR:%.*]], <2 x double>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// LLVM: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+// LLVM: store atomic double [[TMP2]], ptr [[PTR]] release, align 8
+
+void test_vstl1q_lane_p64(poly64_t  *a, poly64x2_t b) {
+  vstl1q_lane_p64(a, b, 1);
+}
+
+// CIR-LABEL: test_vstl1q_lane_p64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
+
+// LLVM: {{.*}}test_vstl1q_lane_p64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
+
+void test_vstl1_lane_u64(uint64_t  *a, uint64x1_t b) {
+  vstl1_lane_u64(a, b, 0);
+}
+
+// CIR-LABEL: test_vstl1_lane_u64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 1>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !u64i, !cir.ptr<!u64i>
+
+// LLVM: {{.*}}test_vstl1_lane_u64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
+
+void test_vstl1_lane_s64(int64_t  *a, int64x1_t b) {
+  vstl1_lane_s64(a, b, 0);
+}
+
+// CIR-LABEL:test_vstl1_lane_s64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
+
+// LLVM: {{.*}}test_vstl1_lane_s64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
+
+void test_vstl1_lane_f64(float64_t  *a, float64x1_t b) {
+  vstl1_lane_f64(a, b, 0);
+}
+
+// CIR-LABEL:test_vstl1_lane_f64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 1>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.double>
+// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !cir.double, !cir.ptr<!cir.double>
+
+// LLVM: {{.*}}test_vstl1_lane_f64(ptr{{.*}}[[PTR:%.*]], <1 x double>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// LLVM: [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// LLVM: store atomic double [[TMP2]], ptr [[PTR]] release, align 8
+
+void test_vstl1_lane_p64(poly64_t  *a, poly64x1_t b) {
+  vstl1_lane_p64(a, b, 0);
+}
+
+// CIR-LABEL: test_vstl1_lane_p64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[VAL:%.*]] = cir.vec.extract {{%.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
+// CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: cir.store align(8) atomic(release) [[VAL]], [[PTR]] : !s64i, !cir.ptr<!s64i>
+
+// LLVM: {{.*}}test_vstl1_lane_p64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// LLVM: store atomic i64 [[TMP2]], ptr [[PTR]] release, align 8
+
+uint64x2_t test_vldap1q_lane_u64(uint64_t  *a, uint64x2_t b) {
+  return vldap1q_lane_u64(a, b, 1);
+}
+
+// CIR-LABEL:test_vldap1q_lane_u64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[TMP0:%.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) syncscope(system) atomic(acquire) [[TMP0]] : !cir.ptr<!u64i>, !u64
+// CIR: [[VEC:%.*]] = cir.cast bitcast {{.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+// CIR: [[TMP:%.*]]  = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 2>
+
+// LLVM: {{.*}}test_vldap1q_lane_u64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
+// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
+
+int64x2_t test_vldap1q_lane_s64(int64_t  *a, int64x2_t b) {
+  return vldap1q_lane_s64(a, b, 1);
+}
+
+// CIR-LABEL:test_vldap1q_lane_s64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[TMP0:%.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) syncscope(system) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
+// CIR: [[VEC:%.*]] = cir.cast bitcast {{.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+// CIR: [[TMP:%.*]]  = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
+
+// LLVM: {{.*}}test_vldap1q_lane_s64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
+// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
+
+float64x2_t test_vldap1q_lane_f64(float64_t  *a, float64x2_t b) {
+  return vldap1q_lane_f64(a, b, 1);
+}
+
+// CIR-LABEL:test_vldap1q_lane_f64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[TMP0:%.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.double>
+// CIR: [[VAL:%.*]] = cir.load align(8) syncscope(system) atomic(acquire) [[TMP0]] : !cir.ptr<!cir.double>, !cir.double
+// CIR: [[VEC:%.*]] = cir.cast bitcast {{.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!cir.double x 2>
+// CIR: [[TMP:%.*]]  = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 2>
+
+// LLVM: {{.*}}test_vldap1q_lane_f64(ptr{{.*}}[[PTR:%.*]], <2 x double>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] acquire, align 8
+// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1
+
+poly64x2_t test_vldap1q_lane_p64(poly64_t  *a, poly64x2_t b) {
+  return vldap1q_lane_p64(a, b, 1);
+}
+
+// CIR-LABEL:test_vldap1q_lane_p64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[TMP0:%.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) syncscope(system) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
+// CIR: [[VEC:%.*]] = cir.cast bitcast {{.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+// CIR: [[TMP:%.*]]  = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 2>
+
+// LLVM: {{.*}}test_vldap1q_lane_p64(ptr{{.*}}[[PTR:%.*]], <2 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <16 x i8>, ptr %{{.*}}
+// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
+// LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
+
+uint64x1_t test_vldap1_lane_u64(uint64_t  *a, uint64x1_t b) {
+  return vldap1_lane_u64(a, b, 0);
+}
+
+// CIR-LABEL:test_vldap1_lane_u64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[TMP0:%.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) syncscope(system) atomic(acquire) [[TMP0]] : !cir.ptr<!u64i>, !u64
+// CIR: [[VEC:%.*]] = cir.cast bitcast {{.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1>
+// CIR: [[TMP:%.*]]  = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!u64i x 1>
+
+// LLVM: {{.*}}test_vldap1_lane_u64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
+// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
+
+int64x1_t test_vldap1_lane_s64(int64_t  *a, int64x1_t b) {
+  return vldap1_lane_s64(a, b, 0);
+}
+
+// CIR-LABEL:test_vldap1_lane_s64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[TMP0:%.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) syncscope(system) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
+// CIR: [[VEC:%.*]] = cir.cast bitcast {{.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s64i x 1>
+// CIR: [[TMP:%.*]]  = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
+
+// LLVM: {{.*}}test_vldap1_lane_s64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
+// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
+
+
+float64x1_t test_vldap1_lane_f64(float64_t  *a, float64x1_t b) {
+  return vldap1_lane_f64(a, b, 0);
+}
+
+// CIR-LABEL: test_vldap1_lane_f64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[TMP0:%.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.double>
+// CIR: [[VAL:%.*]] = cir.load align(8) syncscope(system) atomic(acquire) [[TMP0]] : !cir.ptr<!cir.double>, !cir.double
+// CIR: [[VEC:%.*]] = cir.cast bitcast {{.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!cir.double x 1>
+// CIR: [[TMP:%.*]]  = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!cir.double x 1>
+
+// LLVM: {{.*}}test_vldap1_lane_f64(ptr{{.*}}[[PTR:%.*]], <1 x double>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[TMP2:%.*]] = load atomic double, ptr [[PTR]] acquire, align 8
+// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0
+
+poly64x1_t test_vldap1_lane_p64(poly64_t  *a, poly64x1_t b) {
+  return vldap1_lane_p64(a, b, 0);
+}
+
+// CIR-LABEL: test_vldap1_lane_p64
+// CIR: [[LANE:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[TMP0:%.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!s64i>
+// CIR: [[VAL:%.*]] = cir.load align(8) syncscope(system) atomic(acquire) [[TMP0]] : !cir.ptr<!s64i>, !s64
+// CIR: [[VEC:%.*]] = cir.cast bitcast {{.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s64i x 1>
+// CIR: [[TMP:%.*]]  = cir.vec.insert [[VAL]], {{.*}}[[[LANE]] : !s32i] : !cir.vector<!s64i x 1>
+
+// LLVM: {{.*}}test_vldap1_lane_p64(ptr{{.*}}[[PTR:%.*]], <1 x i64>{{.*}}[[SRC:%.*]])
+// LLVM: [[TMP0:%.*]] = load <8 x i8>, ptr %{{.*}}
+// LLVM: [[TMP2:%.*]] = load atomic i64, ptr [[PTR]] acquire, align 8
+// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// LLVM: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/neon-misc.c b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-misc.c
new file mode 100644
index 0000000000000..6d379a58bf0f3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/neon-misc.c
@@ -0,0 +1,2816 @@
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
+// RUN:    -target-feature +dotprod -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
+// RUN:    -target-feature +dotprod -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -fno-clangir-call-conv-lowering -emit-llvm -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -target-feature +neon \
+// RUN:    -target-feature +dotprod -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// This test file contains test cases for the intrinsics that are not covered
+// by the other neon test files.
+
+#include <arm_neon.h>
+
+uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
+  return vset_lane_u8(a, b, 7);
+}
+
+// CIR-LABEL: test_vset_lane_u8
+// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i loc(#loc7)
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 8>
+
+// LLVM: {{.*}}test_vset_lane_u8(i8{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <8 x i8> [[B]], i8 [[A]], i32 7
+// LLVM: ret <8 x i8> [[INTRN_RES]]
+
+uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
+  return vset_lane_u16(a, b, 3);
+}
+
+// CIR-LABEL: test_vset_lane_u16
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 4>
+
+// LLVM: {{.*}}test_vset_lane_u16(i16{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <4 x i16> [[B]], i16 [[A]], i32 3
+// LLVM: ret <4 x i16> [[INTRN_RES]]
+
+uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
+  return vset_lane_u32(a, b, 1);
+}
+
+// CIR-LABEL: test_vset_lane_u32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 2>
+
+// LLVM: {{.*}}test_vset_lane_u32(i32{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x i32> [[B]], i32 [[A]], i32 1
+// LLVM: ret <2 x i32> [[INTRN_RES]]
+
+uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
+  return vset_lane_u64(a, b, 0);
+}
+
+// CIR-LABEL: test_vset_lane_u64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 1>
+
+// LLVM: {{.*}}test_vset_lane_u64(i64{{.*}}[[A:%.*]], <1 x i64>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <1 x i64> [[B]], i64 [[A]], i32 0
+// LLVM: ret <1 x i64> [[INTRN_RES]]
+
+float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
+  return vset_lane_f32(a, b, 1);
+}
+
+// CIR-LABEL: test_vset_lane_f32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
+
+// LLVM: {{.*}}test_vset_lane_f32(float{{.*}}[[A:%.*]], <2 x float>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x float> [[B]], float [[A]], i32 1
+// LLVM: ret <2 x float> [[INTRN_RES]]
+
+uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
+  return vsetq_lane_u8(a, b, 15);
+}
+
+// CIR-LABEL: test_vsetq_lane_u8
+// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vsetq_lane_u8(i8{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <16 x i8> [[B]], i8 [[A]], i32 15
+// LLVM: ret <16 x i8> [[INTRN_RES]]
+
+uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
+  return vsetq_lane_u16(a, b, 7);
+}
+
+// CIR-LABEL: test_vsetq_lane_u16
+// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s16i x 8>
+
+// LLVM: {{.*}}test_vsetq_lane_u16(i16{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <8 x i16> [[B]], i16 [[A]], i32 7
+// LLVM: ret <8 x i16> [[INTRN_RES]]
+
+uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
+  return vsetq_lane_u32(a, b, 3);
+}
+
+// CIR-LABEL: test_vsetq_lane_u32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s32i x 4>
+
+// LLVM: {{.*}}test_vsetq_lane_u32(i32{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <4 x i32> [[B]], i32 [[A]], i32 3
+// LLVM: ret <4 x i32> [[INTRN_RES]]
+
+int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
+  return vsetq_lane_s64(a, b, 1);
+}
+
+// CIR-LABEL: test_vsetq_lane_s64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!s64i x 2>
+
+// LLVM: {{.*}}test_vsetq_lane_s64(i64{{.*}}[[A:%.*]], <2 x i64>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x i64> [[B]], i64 [[A]], i32 1
+// LLVM: ret <2 x i64> [[INTRN_RES]]
+
+float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
+  return vsetq_lane_f32(a, b, 3);
+}
+
+// CIR-LABEL: test_vsetq_lane_f32
+// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
+
+// LLVM: {{.*}}test_vsetq_lane_f32(float{{.*}}[[A:%.*]], <4 x float>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <4 x float> [[B]], float [[A]], i32 3
+// LLVM: ret <4 x float> [[INTRN_RES]]
+
+float64x1_t test_vset_lane_f64(float64_t a, float64x1_t b) {
+  return vset_lane_f64(a, b, 0);
+}
+
+// CIR-LABEL: test_vset_lane_f64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
+
+// LLVM: {{.*}}test_vset_lane_f64(double{{.*}}[[A:%.*]], <1 x double>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <1 x double> [[B]], double [[A]], i32 0
+// LLVM: ret <1 x double> [[INTRN_RES]]
+
+float64x2_t test_vsetq_lane_f64(float64_t a, float64x2_t b) {
+  return vsetq_lane_f64(a, b, 0);
+}
+
+// CIR-LABEL: test_vsetq_lane_f64
+// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
+
+// LLVM: {{.*}}test_vsetq_lane_f64(double{{.*}}[[A:%.*]], <2 x double>{{.*}}[[B:%.*]])
+// LLVM: [[INTRN_RES:%.*]] = insertelement <2 x double> [[B]], double [[A]], i32 0
+// LLVM: ret <2 x double> [[INTRN_RES]]
+
+uint8_t test_vget_lane_u8(uint8x8_t a) {
+  return vget_lane_u8(a, 7);
+}
+
+// CIR-LABEL: test_vget_lane_u8
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
+
+// LLVM: {{.*}}test_vget_lane_u8(<8 x i8>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <8 x i8> [[ARG]], i32 7
+// LLVM: ret i8 [[RES]]
+
+uint8_t test_vgetq_lane_u8(uint8x16_t a) {
+  return vgetq_lane_u8(a, 15);
+}
+
+// CIR-LABEL: test_vgetq_lane_u8
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<15> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
+
+// LLVM: {{.*}}test_vgetq_lane_u8(<16 x i8>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <16 x i8> [[ARG]], i32 15
+// LLVM: ret i8 [[RES]]
+
+uint16_t test_vget_lane_u16(uint16x4_t a) {
+  return vget_lane_u16(a, 3);
+}
+
+// CIR-LABEL: test_vget_lane_u16
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4>
+
+// LLVM: {{.*}}test_vget_lane_u16(<4 x i16>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <4 x i16> [[ARG]], i32 3
+// LLVM: ret i16 [[RES]]
+
+uint16_t test_vgetq_lane_u16(uint16x8_t a) {
+  return vgetq_lane_u16(a, 7);
+}
+
+// CIR-LABEL: test_vgetq_lane_u16
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
+
+// LLVM: {{.*}}test_vgetq_lane_u16(<8 x i16>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <8 x i16> [[ARG]], i32 7
+// LLVM: ret i16 [[RES]]
+
+uint32_t test_vget_lane_u32(uint32x2_t a) {
+  return vget_lane_u32(a, 1);
+}
+
+// CIR-LABEL: test_vget_lane_u32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
+
+// LLVM: {{.*}}test_vget_lane_u32(<2 x i32>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <2 x i32> [[ARG]], i32 1
+// LLVM: ret i32 [[RES]]
+
+uint32_t test_vgetq_lane_u32(uint32x4_t a) {
+  return vgetq_lane_u32(a, 3);
+}
+
+// CIR-LABEL: test_vgetq_lane_u32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
+
+// LLVM: {{.*}}test_vgetq_lane_u32(<4 x i32>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <4 x i32> [[ARG]], i32 3
+// LLVM: ret i32 [[RES]]
+
+uint64_t test_vget_lane_u64(uint64x1_t a) {
+  return vget_lane_u64(a, 0);
+}
+
+// CIR-LABEL: test_vget_lane_u64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
+
+// LLVM: {{.*}}test_vget_lane_u64(<1 x i64>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <1 x i64> [[ARG]], i32 0
+// LLVM: ret i64 [[RES]]
+
+uint64_t test_vgetq_lane_u64(uint64x2_t a) {
+  return vgetq_lane_u64(a, 1);
+}
+
+// CIR-LABEL: test_vgetq_lane_u64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
+
+// LLVM: {{.*}}test_vgetq_lane_u64(<2 x i64>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <2 x i64> [[ARG]], i32 1
+// LLVM: ret i64 [[RES]]
+
+float32_t test_vget_lane_f32(float32x2_t a) {
+  return vget_lane_f32(a, 1);
+}
+
+// CIR-LABEL: test_vget_lane_f32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
+
+// LLVM: {{.*}}test_vget_lane_f32(<2 x float>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <2 x float> [[ARG]], i32 1
+// LLVM: ret float [[RES]]
+
+float64_t test_vget_lane_f64(float64x1_t a) {
+  return vget_lane_f64(a, 0);
+}
+
+// CIR-LABEL: test_vget_lane_f64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
+
+// LLVM: {{.*}}test_vget_lane_f64(<1 x double>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <1 x double> [[ARG]], i32 0
+// LLVM: ret double [[RES]]
+
+float32_t test_vgetq_lane_f32(float32x4_t a) {
+  return vgetq_lane_f32(a, 3);
+}
+
+// CIR-LABEL: test_vgetq_lane_f32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
+
+// LLVM: {{.*}}test_vgetq_lane_f32(<4 x float>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <4 x float> [[ARG]], i32 3
+// LLVM: ret float [[RES]]
+
+float64_t test_vgetq_lane_f64(float64x2_t a) {
+  return vgetq_lane_f64(a, 1);
+}
+
+// CIR-LABEL: test_vgetq_lane_f64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
+
+// LLVM: {{.*}}test_vgetq_lane_f64(<2 x double>{{.*}}[[ARG:%.*]])
+// LLVM: [[RES:%.*]] = extractelement <2 x double> [[ARG]], i32 1
+// LLVM: ret double [[RES]]
+
+uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
+  return vtrn_u8(a, b);
+
+  // CIR-LABEL: vtrn_u8
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u8i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u8i x 8>) 
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : !s32i, 
+  // CIR-SAME: #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i,
+  // CIR-SAME: #cir.int<14> : !s32i] : !cir.vector<!u8i x 8>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u8i x 8>, !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u8i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u8i x 8>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : !s32i, 
+  // CIR-SAME: #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, #cir.int<15> : !s32i] : 
+  // CIR-SAME: !cir.vector<!u8i x 8>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u8i x 8>, !cir.ptr<!cir.vector<!u8i x 8>>
+
+  // LLVM: {{.*}}test_vtrn_u8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]]) 
+  // LLVM: [[VTRN:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], 
+  // LLVM-SAME: <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  // LLVM: store <8 x i8> [[VTRN]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<8 x i8>, ptr [[RES]], i64 1
+  // LLVM: [[VTRN1:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  // LLVM: store <8 x i8> [[VTRN1]], ptr [[RES1]], align 8
+  // LLVM: ret %struct.uint8x8x2_t {{.*}}
+}
+
+uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
+  return vtrn_u16(a, b);
+
+  // CIR-LABEL: vtrn_u16
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u16i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u16i x 4>) 
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<!u16i x 4>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u16i x 4>, !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u16i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u16i x 4>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] :
+  // CIR-SAME: !cir.vector<!u16i x 4>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u16i x 4>, !cir.ptr<!cir.vector<!u16i x 4>>
+
+  // LLVM: {{.*}}test_vtrn_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]]) 
+  // LLVM: [[VTRN:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i16> {{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  // LLVM: store <4 x i16> [[VTRN]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<4 x i16>, ptr [[RES]], i64 1
+  // LLVM: [[VTRN1:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i16> {{.*}}, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  // LLVM: store <4 x i16> [[VTRN1]], ptr [[RES1]], align 8
+  // LLVM: ret %struct.uint16x4x2_t {{.*}}
+}
+
+int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
+  return vtrn_s32(a, b);
+
+  // CIR-LABEL: vtrn_s32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!s32i x 2>>, !s32i) -> !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!s32i x 2>) 
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 2>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!s32i x 2>>, !s32i) -> !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!s32i x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i] :
+  // CIR-SAME: !cir.vector<!s32i x 2>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+
+  // LLVM: {{.*}}test_vtrn_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])   
+  // LLVM: [[VTRN:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> {{.*}}, <2 x i32> <i32 0, i32 2>
+  // LLVM: store <2 x i32> [[VTRN]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<2 x i32>, ptr [[RES]], i64 1
+  // LLVM: [[VTRN1:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> {{.*}}, <2 x i32> <i32 1, i32 3>
+  // LLVM: store <2 x i32> [[VTRN1]], ptr [[RES1]], align 8
+  // LLVM: ret %struct.int32x2x2_t {{.*}}
+}
+
+uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
+  return vtrnq_u8(a, b);
+
+  // CIR-LABEL: vtrnq_u8
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u8i x 16>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u8i x 16>) 
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<2> : !s32i, #cir.int<18> : !s32i, 
+  // CIR-SAME: #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<6> : !s32i, #cir.int<22> : !s32i,
+  // CIR-SAME: #cir.int<8> : !s32i, #cir.int<24> : !s32i, #cir.int<10> : !s32i, #cir.int<26> : !s32i,
+  // CIR-SAME: #cir.int<12> : !s32i, #cir.int<28> : !s32i, #cir.int<14> : !s32i, #cir.int<30> : !s32i] : !cir.vector<!u8i x 16>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u8i x 16>, !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u8i x 16>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u8i x 16>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<17> : !s32i, #cir.int<3> : !s32i, #cir.int<19> : !s32i, 
+  // CIR-SAME: #cir.int<5> : !s32i, #cir.int<21> : !s32i, #cir.int<7> : !s32i, #cir.int<23> : !s32i,
+  // CIR-SAME: #cir.int<9> : !s32i, #cir.int<25> : !s32i, #cir.int<11> : !s32i, #cir.int<27> : !s32i,
+  // CIR-SAME: #cir.int<13> : !s32i, #cir.int<29> : !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<!u8i x 16>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u8i x 16>, !cir.ptr<!cir.vector<!u8i x 16>>
+
+  // LLVM: {{.*}}test_vtrnq_u8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VTRN:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], 
+  // LLVM-SAME: <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, 
+  // LLVM-SAME: i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+  // LLVM: store <16 x i8> [[VTRN]], ptr [[RES:%.*]], align 16
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<16 x i8>, ptr [[RES]], i64 1
+  // LLVM: [[VTRN1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], 
+  // LLVM-SAME: <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23,
+  // LLVM-SAME: i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 
+  // LLVM: store <16 x i8> [[VTRN1]], ptr [[RES1]], align 16
+  // LLVM: ret %struct.uint8x16x2_t {{.*}}
+}
+
+int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
+  return vtrnq_s16(a, b);
+
+  // CIR-LABEL: vtrnq_s16
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!s16i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!s16i x 8>) 
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : !s32i, 
+  // CIR-SAME: #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i,
+  // CIR-SAME: #cir.int<14> : !s32i] : !cir.vector<!s16i x 8>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!s16i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!s16i x 8>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : !s32i, 
+  // CIR-SAME: #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, #cir.int<15> : !s32i] : 
+  // CIR-SAME: !cir.vector<!s16i x 8>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>
+
+  // LLVM: {{.*}}test_vtrnq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[VTRN:%.*]] = shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  // LLVM: store <8 x i16> [[VTRN]], ptr [[RES:%.*]], align 16
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<8 x i16>, ptr [[RES]], i64 1
+  // LLVM: [[VTRN1:%.*]] = shufflevector <8 x i16> {{.*}}, <8 x i16> {{.*}}, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  // LLVM: store <8 x i16> [[VTRN1]], ptr [[RES1]], align 16
+  // LLVM: ret %struct.int16x8x2_t {{.*}}
+}
+
+uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
+  return vtrnq_u32(a, b);
+
+  // CIR-LABEL: vtrnq_u32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u32i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u32i x 4>) 
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] :
+  // CIR-SAME: !cir.vector<!u32i x 4>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u32i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u32i x 4>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] :
+  // CIR-SAME: !cir.vector<!u32i x 4>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
+  // LLVM: ret %struct.uint32x4x2_t {{.*}}
+}
+
+uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
+  return vuzp_u8(a, b);
+
+  // CIR-LABEL:vuzp_u8
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u8i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u8i x 8>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i,
+  // CIR-SAME: #cir.int<8> : !s32i, #cir.int<10> : !s32i, #cir.int<12> : !s32i,
+  // CIR-SAME: #cir.int<14> : !s32i] : !cir.vector<!u8i x 8>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u8i x 8>, !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u8i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u8i x 8>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME: #cir.int<9> : !s32i, #cir.int<11> : !s32i, #cir.int<13> : !s32i, #cir.int<15> : !s32i] :
+  // CIR-SAME: !cir.vector<!u8i x 8>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u8i x 8>, !cir.ptr<!cir.vector<!u8i x 8>>
+
+  // LLVM: {{.*}}test_vuzp_u8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VTRN:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]],
+  // LLVM-SAME: <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  // LLVM: store <8 x i8> [[VTRN]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<8 x i8>, ptr [[RES]], i64 1
+  // LLVM: [[VTRN1:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  // LLVM: store <8 x i8> [[VTRN1]], ptr [[RES1]], align 8
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.uint8x8x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.uint8x8x2_t [[RET]]
+}
+
+uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
+  return vuzp_u16(a, b);
+
+  // CIR-LABEL: vuzp_u16
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u16i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u16i x 4>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i] : !cir.vector<!u16i x 4>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u16i x 4>, !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u16i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u16i x 4>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i] :
+  // CIR-SAME: !cir.vector<!u16i x 4>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u16i x 4>, !cir.ptr<!cir.vector<!u16i x 4>>
+
+  // LLVM: {{.*}}test_vuzp_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[vuzp:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i16> {{.*}}, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  // LLVM: store <4 x i16> [[vuzp]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<4 x i16>, ptr [[RES]], i64 1
+  // LLVM: [[vuzp1:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i16> {{.*}}, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  // LLVM: store <4 x i16> [[vuzp1]], ptr [[RES1]], align 8
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.uint16x4x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.uint16x4x2_t [[RET]]
+}
+
+int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
+  return vuzp_s32(a, b);
+
+  // CIR-LABEL: vuzp_s32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!s32i x 2>>, !s32i) -> !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!s32i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 2>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!s32i x 2>>, !s32i) -> !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!s32i x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i] :
+  // CIR-SAME: !cir.vector<!s32i x 2>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+
+  // LLVM: {{.*}}test_vuzp_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[vuzp:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> {{.*}}, <2 x i32> <i32 0, i32 2>
+  // LLVM: store <2 x i32> [[vuzp]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<2 x i32>, ptr [[RES]], i64 1
+  // LLVM: [[vuzp1:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> {{.*}}, <2 x i32> <i32 1, i32 3>
+  // LLVM: store <2 x i32> [[vuzp1]], ptr [[RES1]], align 8
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.int32x2x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.int32x2x2_t [[RET]]
+}
+
+float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
+  return vuzp_f32(a, b);
+
+  // CIR-LABEL: vuzp_f32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!cir.float x 2>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!cir.float x 2>>, !s32i) -> !cir.ptr<!cir.vector<!cir.float x 2>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!cir.float x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!cir.float x 2>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!cir.float x 2>>, !s32i) -> !cir.ptr<!cir.vector<!cir.float x 2>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!cir.float x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i] :
+  // CIR-SAME: !cir.vector<!cir.float x 2>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
+
+  // LLVM: {{.*}}test_vuzp_f32(<2 x float>{{.*}}[[A:%.*]], <2 x float>{{.*}}[[B:%.*]])
+  // LLVM: [[vuzp:%.*]] = shufflevector <2 x float> {{.*}} <2 x i32> <i32 0, i32 2>
+  // LLVM: store <2 x float> [[vuzp]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<2 x float>, ptr [[RES]], i64 1
+  // LLVM: [[vuzp1:%.*]] = shufflevector <2 x float> {{.*}} <2 x i32> <i32 1, i32 3>
+  // LLVM: store <2 x float> [[vuzp1]], ptr [[RES1]], align 8
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.float32x2x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.float32x2x2_t [[RET]]
+}
+
+uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
+  return vuzpq_u8(a, b);
+
+  // CIR-LABEL: vuzpq_u8
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u8i x 16>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u8i x 16>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i,
+  // CIR-SAME: #cir.int<8> : !s32i, #cir.int<10> : !s32i, #cir.int<12> : !s32i, #cir.int<14> : !s32i,
+  // CIR-SAME: #cir.int<16> : !s32i, #cir.int<18> : !s32i, #cir.int<20> : !s32i, #cir.int<22> : !s32i,
+  // CIR-SAME: #cir.int<24> : !s32i, #cir.int<26> : !s32i, #cir.int<28> : !s32i, #cir.int<30> : !s32i] : !cir.vector<!u8i x 16>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u8i x 16>, !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u8i x 16>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u8i x 16>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME: #cir.int<9> : !s32i, #cir.int<11> : !s32i, #cir.int<13> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME: #cir.int<17> : !s32i, #cir.int<19> : !s32i, #cir.int<21> : !s32i, #cir.int<23> : !s32i,
+  // CIR-SAME: #cir.int<25> : !s32i, #cir.int<27> : !s32i, #cir.int<29> : !s32i, #cir.int<31> : !s32i] : !cir.vector<!u8i x 16>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u8i x 16>, !cir.ptr<!cir.vector<!u8i x 16>>
+
+  // LLVM: {{.*}}test_vuzpq_u8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[vuzp:%.*]] = shufflevector <16 x i8> {{.*}} <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14,
+  // LLVM-SAME: i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  // LLVM: store <16 x i8> [[vuzp]], ptr [[RES:%.*]], align 16
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<16 x i8>, ptr [[RES]], i64 1
+  // LLVM: [[vuzp1:%.*]] = shufflevector <16 x i8> {{.*}} <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15,
+  // LLVM-SAME: i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  // LLVM: store <16 x i8> [[vuzp1]], ptr [[RES1]], align 16
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.uint8x16x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.uint8x16x2_t [[RET]]
+}
+
+int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
+  return vuzpq_s16(a, b);
+
+  // CIR-LABEL: vuzpq_s16
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!s16i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!s16i x 8>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i,
+  // CIR-SAME: #cir.int<8> : !s32i, #cir.int<10> : !s32i, #cir.int<12> : !s32i,
+  // CIR-SAME: #cir.int<14> : !s32i] : !cir.vector<!s16i x 8>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!s16i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!s16i x 8>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME: #cir.int<9> : !s32i, #cir.int<11> : !s32i, #cir.int<13> : !s32i,
+  // CIR-SAME: #cir.int<15> : !s32i] : !cir.vector<!s16i x 8>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>
+
+  // LLVM: {{.*}}test_vuzpq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[vuzp:%.*]] = shufflevector <8 x i16> {{.*}} <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  // LLVM: store <8 x i16> [[vuzp]], ptr [[RES:%.*]], align 16
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<8 x i16>, ptr [[RES]], i64 1
+  // LLVM: [[vuzp1:%.*]] = shufflevector <8 x i16> {{.*}} <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  // LLVM: store <8 x i16> [[vuzp1]], ptr [[RES1]], align 16
+  // LLVM: [[RET:%.*]] = load %struct.int16x8x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.int16x8x2_t [[RET]]
+}
+
+uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
+  return vuzpq_u32(a, b);
+
+  // CIR-LABEL: vuzpq_u32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u32i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u32i x 4>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i] :
+  // CIR-SAME: !cir.vector<!u32i x 4>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u32i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u32i x 4>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i] :
+  // CIR-SAME: !cir.vector<!u32i x 4>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
+  // LLVM: [[RET:%.*]] = load %struct.uint32x4x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.uint32x4x2_t [[RET]]
+}
+
+float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
+  return vuzpq_f32(a, b);
+
+  // CIR-LABEL: vuzpq_f32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!cir.float x 4>>, !s32i) -> !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!cir.float x 4>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i] :
+  // CIR-SAME: !cir.vector<!cir.float x 4>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!cir.float x 4>>, !s32i) -> !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!cir.float x 4>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i] :
+  // CIR-SAME: !cir.vector<!cir.float x 4>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+  // LLVM: [[RET:%.*]] = load %struct.float32x4x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.float32x4x2_t [[RET]]
+}
+
+uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
+  return vzip_u8(a, b);
+
+  // CIR-LABEL:vzip_u8
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u8i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u8i x 8>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<9> : !s32i,
+  // CIR-SAME: #cir.int<2> : !s32i, #cir.int<10> : !s32i, #cir.int<3> : !s32i,
+  // CIR-SAME: #cir.int<11> : !s32i] : !cir.vector<!u8i x 8>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u8i x 8>, !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u8i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 8>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u8i x 8>)
+  // CIR-SAME: [#cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i,
+  // CIR-SAME: #cir.int<6> : !s32i, #cir.int<14> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME: #cir.int<15> : !s32i] : !cir.vector<!u8i x 8>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u8i x 8>, !cir.ptr<!cir.vector<!u8i x 8>>
+
+  // LLVM: {{.*}}test_vzip_u8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VTRN:%.*]] = shufflevector <8 x i8> {{.*}} <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  // LLVM: store <8 x i8> [[VTRN]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<8 x i8>, ptr [[RES]], i64 1
+  // LLVM: [[VTRN1:%.*]] = shufflevector <8 x i8> {{.*}} <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  // LLVM: store <8 x i8> [[VTRN1]], ptr [[RES1]], align 8
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.uint8x8x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.uint8x8x2_t [[RET]]
+}
+
+uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
+  return vzip_u16(a, b);
+
+  // CIR-LABEL: vzip_u16
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u16i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u16i x 4>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<5> : !s32i] : !cir.vector<!u16i x 4>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u16i x 4>, !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u16i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u16i x 4>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u16i x 4>)
+  // CIR-SAME: [#cir.int<2> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] :
+  // CIR-SAME: !cir.vector<!u16i x 4>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u16i x 4>, !cir.ptr<!cir.vector<!u16i x 4>>
+
+  // LLVM: {{.*}}test_vzip_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[vzip:%.*]] = shufflevector {{.*}} <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  // LLVM: store <4 x i16> [[vzip]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<4 x i16>, ptr [[RES]], i64 1
+  // LLVM: [[vzip1:%.*]] = shufflevector <4 x i16> {{.*}} <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  // LLVM: store <4 x i16> [[vzip1]], ptr [[RES1]], align 8
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.uint16x4x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.uint16x4x2_t [[RET]]
+}
+
+int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
+  return vzip_s32(a, b);
+
+  // CIR-LABEL: vzip_s32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!s32i x 2>>, !s32i) -> !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!s32i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 2>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!s32i x 2>>, !s32i) -> !cir.ptr<!cir.vector<!s32i x 2>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!s32i x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i] :
+  // CIR-SAME: !cir.vector<!s32i x 2>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+
+  // LLVM: {{.*}}test_vzip_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[vzip:%.*]] = shufflevector <2 x i32> {{.*}} <2 x i32> <i32 0, i32 2>
+  // LLVM: store <2 x i32> [[vzip]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<2 x i32>, ptr [[RES]], i64 1
+  // LLVM: [[vzip1:%.*]] = shufflevector <2 x i32> {{.*}} <2 x i32> <i32 1, i32 3>
+  // LLVM: store <2 x i32> [[vzip1]], ptr [[RES1]], align 8
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.int32x2x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.int32x2x2_t [[RET]]
+}
+
+float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
+  return vzip_f32(a, b);
+
+  // CIR-LABEL: vzip_f32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!cir.float x 2>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!cir.float x 2>>, !s32i) -> !cir.ptr<!cir.vector<!cir.float x 2>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!cir.float x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!cir.float x 2>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!cir.float x 2>>, !s32i) -> !cir.ptr<!cir.vector<!cir.float x 2>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!cir.float x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<3> : !s32i] :
+  // CIR-SAME: !cir.vector<!cir.float x 2>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!cir.float x 2>, !cir.ptr<!cir.vector<!cir.float x 2>>
+
+  // LLVM: {{.*}}test_vzip_f32(<2 x float>{{.*}}[[A:%.*]], <2 x float>{{.*}}[[B:%.*]])
+  // LLVM: [[vzip:%.*]] = shufflevector <2 x float> {{.*}} <2 x i32> <i32 0, i32 2>
+  // LLVM: store <2 x float> [[vzip]], ptr [[RES:%.*]], align 8
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<2 x float>, ptr [[RES]], i64 1
+  // LLVM: [[vzip1:%.*]] = shufflevector <2 x float> {{.*}} <2 x i32> <i32 1, i32 3>
+  // LLVM: store <2 x float> [[vzip1]], ptr [[RES1]], align 8
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.float32x2x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.float32x2x2_t [[RET]]
+}
+
+uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
+  return vzipq_u8(a, b);
+
+  // CIR-LABEL: vzipq_u8
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u8i x 16>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u8i x 16>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<1> : !s32i, #cir.int<17> : !s32i,
+  // CIR-SAME: #cir.int<2> : !s32i, #cir.int<18> : !s32i, #cir.int<3> : !s32i, #cir.int<19> : !s32i,
+  // CIR-SAME: #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<5> : !s32i, #cir.int<21> : !s32i,
+  // CIR-SAME: #cir.int<6> : !s32i, #cir.int<22> : !s32i, #cir.int<7> : !s32i, #cir.int<23> : !s32i] : !cir.vector<!u8i x 16>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u8i x 16>, !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u8i x 16>>, !s32i) -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u8i x 16>)
+  // CIR-SAME: [#cir.int<8> : !s32i, #cir.int<24> : !s32i, #cir.int<9> : !s32i, #cir.int<25> : !s32i,
+  // CIR-SAME: #cir.int<10> : !s32i, #cir.int<26> : !s32i, #cir.int<11> : !s32i, #cir.int<27> : !s32i,
+  // CIR-SAME: #cir.int<12> : !s32i, #cir.int<28> : !s32i, #cir.int<13> : !s32i, #cir.int<29> : !s32i,
+  // CIR-SAME: #cir.int<14> : !s32i, #cir.int<30> : !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<!u8i x 16>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u8i x 16>, !cir.ptr<!cir.vector<!u8i x 16>>
+
+  // LLVM: {{.*}}test_vzipq_u8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[vzip:%.*]] = shufflevector <16 x i8> {{.*}} <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19,
+  // LLVM-SAME: i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  // LLVM: store <16 x i8> [[vzip]], ptr [[RES:%.*]], align 16
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<16 x i8>, ptr [[RES]], i64 1
+  // LLVM: [[vzip1:%.*]] = shufflevector <16 x i8> {{.*}} <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27,
+  // LLVM-SAME: i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  // LLVM: store <16 x i8> [[vzip1]], ptr [[RES1]], align 16
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.uint8x16x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.uint8x16x2_t [[RET]]
+}
+
+int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
+  return vzipq_s16(a, b);
+
+  // CIR-LABEL: vzipq_s16
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!s16i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!s16i x 8>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<9> : !s32i,
+  // CIR-SAME: #cir.int<2> : !s32i, #cir.int<10> : !s32i, #cir.int<3> : !s32i,
+  // CIR-SAME: #cir.int<11> : !s32i] : !cir.vector<!s16i x 8>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!s16i x 8>>, !s32i) -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!s16i x 8>)
+  // CIR-SAME: [#cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i,
+  // CIR-SAME: #cir.int<6> : !s32i, #cir.int<14> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME: #cir.int<15> : !s32i] : !cir.vector<!s16i x 8>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>
+
+  // LLVM: {{.*}}test_vzipq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[vzip:%.*]] = shufflevector <8 x i16> {{.*}} <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  // LLVM: store <8 x i16> [[vzip]], ptr [[RES:%.*]], align 16
+  // LLVM: [[RES1:%.*]] = getelementptr {{.*}}<8 x i16>, ptr [[RES]], i64 1
+  // LLVM: [[vzip1:%.*]] = shufflevector <8 x i16> {{.*}} <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  // LLVM: store <8 x i16> [[vzip1]], ptr [[RES1]], align 16
+  // LLVM-NEXT: [[RET:%.*]] = load %struct.int16x8x2_t, ptr {{.*}}
+  // LLVM-NEXT: ret %struct.int16x8x2_t [[RET]]
+}
+
+uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
+  return vzipq_u32(a, b);
+
+  // CIR-LABEL: vzipq_u32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!u32i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!u32i x 4>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<5> : !s32i] :
+  // CIR-SAME: !cir.vector<!u32i x 4>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!u32i x 4>>, !s32i) -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!u32i x 4>)
+  // CIR-SAME: [#cir.int<2> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] :
+  // CIR-SAME: !cir.vector<!u32i x 4>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
+  // LLVM: [[RET:%.*]] = load %struct.uint32x4x2_t, ptr {{.*}}
+  // LLVM: ret %struct.uint32x4x2_t [[RET]]
+}
+
+float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
+  return vzipq_f32(a, b);
+
+  // CIR-LABEL: vzipq_f32
+  // CIR: [[PTR:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: [[ADDR:%.*]] = cir.ptr_stride [[PTR]], [[ZERO]] : (!cir.ptr<!cir.vector<!cir.float x 4>>, !s32i) -> !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR: [[RES:%.*]] = cir.vec.shuffle([[INP1:%.*]], [[INP2:%.*]] : !cir.vector<!cir.float x 4>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<5> : !s32i] :
+  // CIR-SAME: !cir.vector<!cir.float x 4>
+  // CIR:  cir.store [[RES]], [[ADDR]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR: [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: [[ADDR1:%.*]] = cir.ptr_stride [[PTR]], [[ONE]] : (!cir.ptr<!cir.vector<!cir.float x 4>>, !s32i) -> !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR: [[RES1:%.*]] = cir.vec.shuffle([[INP1]], [[INP2]] : !cir.vector<!cir.float x 4>)
+  // CIR-SAME: [#cir.int<2> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] :
+  // CIR-SAME: !cir.vector<!cir.float x 4>
+  // CIR:  cir.store [[RES1]], [[ADDR1]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+  // LLVM: [[RET:%.*]] = load %struct.float32x4x2_t, ptr {{.*}}
+  // LLVM: ret %struct.float32x4x2_t [[RET]]
+}
+
+uint8x8_t test_vqmovun_s16(int16x8_t a) {
+  return vqmovun_s16(a);
+
+  // CIR-LABEL: vqmovun_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqxtun" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>) -> !cir.vector<!u8i x 8>
+  
+  // LLVM: {{.*}}test_vqmovun_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8
+  // LLVM: ret <8 x i8> [[VQMOVUN_V1_I]]
+}
+
+uint16x4_t test_vqmovun_s32(int32x4_t a) {
+  return vqmovun_s32(a);
+
+  // CIR-LABEL: vqmovun_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqxtun" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vqmovun_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16
+  // LLVM: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
+}
+
+uint32x2_t test_vqmovun_s64(int64x2_t a) {
+  return vqmovun_s64(a);
+
+  // CIR-LABEL: vqmovun_s64
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqxtun" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s64i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vqmovun_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtun.v2i32
+  // LLVM: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
+}
+
+uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
+  return vtst_s8(v1, v2);
+
+  // CIR-LABEL: vtst_s8
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u8i x 8> 
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u8i x 8>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u8i x 8>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u8i x 8>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>
+  
+  // LLVM: {{.*}}test_vtst_s8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[TMP0:%.*]] = and <8 x i8> [[V1]], [[V2]]
+  // LLVM: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+  // LLVM: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+  // LLVM: ret <8 x i8> [[VTST_I]]
+}
+
+uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
+  return vtst_u8(v1, v2);
+
+  // CIR-LABEL: vtst_u8
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u8i x 8> 
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u8i x 8>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u8i x 8>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u8i x 8>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}test_vtst_u8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[TMP0:%.*]] = and <8 x i8> [[V1]], [[V2]]
+  // LLVM: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+  // LLVM: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+  // LLVM: ret <8 x i8> [[VTST_I]]
+}
+
+uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
+  return vtst_s16(v1, v2);
+
+  // CIR-LABEL: vtst_s16
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u16i x 4> 
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u16i x 4>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u16i x 4>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u16i x 4>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vtst_s16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <4 x i16> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
+}
+
+uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
+  return vtst_u16(v1, v2);
+
+  // CIR-LABEL: vtst_u16
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u16i x 4> 
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u16i x 4>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u16i x 4>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u16i x 4>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vtst_u16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <4 x i16> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
+}
+
+uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
+  return vtst_s32(v1, v2);
+
+  // CIR-LABEL: vtst_s32
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u32i x 2> 
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u32i x 2>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u32i x 2>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u32i x 2>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vtst_s32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <2 x i32> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+}
+
+uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
+  return vtst_u32(v1, v2);
+
+  // CIR-LABEL: vtst_u32
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u32i x 2> 
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u32i x 2>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u32i x 2>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u32i x 2>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vtst_u32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <2 x i32> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+}
+
+uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
+  return vtst_s64(a, b);
+
+  // CIR-LABEL: vtst_s64
+  // CIR: [[A:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1> 
+  // CIR: [[B:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[A]], [[B]]) : !cir.vector<!u64i x 1>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u64i x 1>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}test_vtst_s64(<1 x i64>{{.*}}[[A:%.*]], <1 x i64>{{.*}}[[B:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <1 x i64> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+}
+
+uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
+  return vtst_u64(a, b);
+
+  // CIR-LABEL: vtst_u64
+  // CIR: [[A:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1> 
+  // CIR: [[B:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[A]], [[B]]) : !cir.vector<!u64i x 1>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u64i x 1>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}test_vtst_u64(<1 x i64>{{.*}}[[A:%.*]], <1 x i64>{{.*}}[[B:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <1 x i64> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+}
+
+uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
+  return vtstq_s8(v1, v2);
+
+  // CIR-LABEL: vtstq_s8
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u8i x 16> 
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u8i x 16>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u8i x 16>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u8i x 16>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}test_vtstq_s8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[TMP0:%.*]] = and <16 x i8> {{.*}}, {{.*}}
+  // LLVM: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+  // LLVM: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+}
+
+uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
+  return vtstq_u8(v1, v2);
+
+  // CIR-LABEL: vtstq_u8
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u8i x 16> 
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u8i x 16>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]]) : !cir.vector<!u8i x 16>
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u8i x 16>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}test_vtstq_u8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[TMP0:%.*]] = and <16 x i8> {{.*}}, {{.*}}
+  // LLVM: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+  // LLVM: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+}
+
+uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
+  return vtstq_s16(v1, v2);
+
+  // CIR-LABEL: vtstq_s16
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u16i x 8>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vtstq_s16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <8 x i16> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
+}
+
+uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
+  return vtstq_u16(v1, v2);
+
+  // CIR-LABEL: vtstq_u16
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u16i x 8>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vtstq_u16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <8 x i16> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
+}
+
+uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
+  return vtstq_s32(v1, v2);
+
+  // CIR-LABEL: vtstq_s32
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u32i x 4>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vtstq_s32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <4 x i32> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+}
+
+uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
+  return vtstq_u32(v1, v2);
+
+  // CIR-LABEL: vtstq_u32
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u32i x 4>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vtstq_u32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <4 x i32> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+}
+
+uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
+  return vtstq_s64(v1, v2);
+
+  // CIR-LABEL: vtstq_s64
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u64i x 2>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vtstq_s64(<2 x i64>{{.*}}[[V1:%.*]], <2 x i64>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and <2 x i64> {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+}
+
+uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
+  return vtstq_u64(v1, v2);
+
+  // CIR-LABEL: vtstq_u64
+  // CIR: [[V1:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: [[V2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: [[AND:%.*]] = cir.binop(and, [[V1]], [[V2]])
+  // CIR: [[ZERO_VEC:%.*]] = cir.const #cir.zero : !cir.vector<!u64i x 2>
+  // CIR: {{%.*}} = cir.vec.cmp(ne, [[AND]], [[ZERO_VEC]]) : !cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vtstq_u64(<2 x i64>{{.*}}[[V1:%.*]], <2 x i64>{{.*}}[[V2:%.*]])
+  // LLVM:   [[TMP2:%.*]] = and {{.*}}, {{.*}}
+  // LLVM:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
+  // LLVM:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+}
+
+int8x8_t test_vqmovn_s16(int16x8_t a) {
+  return vqmovn_s16(a);
+
+  // CIR-LABEL: vqmovn_s16
+  // {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqxtn" {{%.*}} : (!cir.vector<!s16i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_vqmovn_s16(<8 x i16>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16>
+}
+
+int16x4_t test_vqmovn_s32(int32x4_t a) {
+  return vqmovn_s32(a);
+
+  // CIR-LABEL: vqmovn_s32
+  // {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqxtn" {{%.*}} : (!cir.vector<!s32i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_vqmovn_s32(<4 x i32>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32>
+}
+
+int32x2_t test_vqmovn_s64(int64x2_t a) {
+  return vqmovn_s64(a);
+
+  // CIR-LABEL: vqmovn_s64
+  // {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqxtn" {{%.*}} : (!cir.vector<!s64i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_vqmovn_s64(<2 x i64>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqxtn.v2i32(<2 x i64>
+}
+
+uint8x8_t test_vqmovn_u16(uint16x8_t a) {
+  return vqmovn_u16(a);
+
+  // CIR-LABEL: vqmovn_u16
+  // {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqxtn" {{%.*}} : (!cir.vector<!u16i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_vqmovn_u16(<8 x i16>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16>
+}
+
+uint16x4_t test_vqmovn_u32(uint32x4_t a) {
+  return vqmovn_u32(a);
+
+  // CIR-LABEL: vqmovn_u32
+  // {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqxtn" {{%.*}} : (!cir.vector<!u32i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_vqmovn_u32(<4 x i32>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32>
+}
+
+uint32x2_t test_vqmovn_u64(uint64x2_t a) {
+  return vqmovn_u64(a);
+
+  // CIR-LABEL: vqmovn_u64
+  // {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqxtn" {{%.*}} : (!cir.vector<!u64i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_vqmovn_u64(<2 x i64>{{.*}}[[A:%[a-z0-9]+]])
+  // LLVM:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqxtn.v2i32(<2 x i64>
+}
+
+float32x2_t test_vcvt_f32_s32(int32x2_t a) {
+  return vcvt_f32_s32(a);
+
+  // CIR-LABEL: vcvt_f32_s32
+  // {{%.*}} = cir.cast int_to_float {{%.*}} : !cir.vector<!s32i x 2> -> !cir.vector<!cir.float x 2>
+
+  // LLVM: {{.*}}test_vcvt_f32_s32(<2 x i32>{{.*}}[[a:%.*]])
+  // LLVM:  [[VCVT_I:%.*]] = sitofp <2 x i32> {{.*}} to <2 x float>
+}
+
+float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
+  return vcvt_f32_u32(a);
+
+  // CIR-LABEL: vcvt_f32_u32
+  // {{%.*}} = cir.cast int_to_float {{%.*}} : !cir.vector<!u32i x 2> -> !cir.vector<!cir.float x 2>
+
+  // LLVM: {{.*}}test_vcvt_f32_u32(<2 x i32>{{.*}}[[a:%.*]])
+  // LLVM:  [[VCVT_I:%.*]] = uitofp <2 x i32> {{.*}} to <2 x float>
+}
+
+float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
+  return vcvtq_f32_s32(a);
+
+  // CIR-LABEL: vcvtq_f32_s32
+  // {{%.*}} = cir.cast int_to_float {{%.*}} : !cir.vector<!s32i x 4> -> !cir.vector<!cir.float x 4>
+
+  // LLVM: {{.*}}test_vcvtq_f32_s32(<4 x i32>{{.*}}[[a:%.*]])
+  // LLVM:  [[VCVT_I:%.*]] = sitofp <4 x i32> {{.*}} to <4 x float>
+}
+
+float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
+  return vcvtq_f32_u32(a);
+
+  // CIR-LABEL: vcvtq_f32_u32
+  // {{%.*}} = cir.cast int_to_float {{%.*}} : !cir.vector<!u32i x 4> -> !cir.vector<!cir.float x 4>
+
+  // LLVM: {{.*}}test_vcvtq_f32_u32(<4 x i32>{{.*}}[[a:%.*]])
+  // LLVM:  [[VCVT_I:%.*]] = uitofp <4 x i32> {{.*}} to <4 x float>
+}
+
+int8x8_t test_splat_lane_s8(int8x8_t v) {
+  return (int8x8_t) __builtin_neon_splat_lane_v((int8x8_t)v, 7, 0);
+
+  // CIR-LABEL: test_splat_lane_s8
+  // CIR: [[VEC:%.*]] = cir.load {{.*}} {{%.*}} : !cir.ptr<!cir.vector<!s8i x 8>>, !cir.vector<!s8i x 8>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s8i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s8i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, 
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_splat_lane_s8(<8 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <8 x i8> [[RES]]
+}
+
+int16x4_t test_splat_lane_s16(int16x4_t v) {
+  return (int16x4_t) __builtin_neon_splat_lane_v((int8x8_t)v, 3, 1);
+
+  // CIR-LABEL: test_splat_lane_s16
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s16i x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s16i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s16i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_splat_lane_s16(<4 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+int32x2_t test_splat_lane_s32(int32x2_t v) {
+  return (int32x2_t) __builtin_neon_splat_lane_v((int8x8_t)v, 1, 2);
+
+  // CIR-LABEL: test_splat_lane_s32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s32i x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s32i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s32i x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_splat_lane_s32(<2 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+  // LLVM: ret <2 x i32> [[RES]]
+}  
+
+int64x1_t test_splat_lane_s64(int64x1_t v) {
+  return (int64x1_t) __builtin_neon_splat_lane_v((int8x8_t)v, 0, 3);
+
+  // CIR-LABEL: test_splat_lane_s64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s64i x 1>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s64i x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s64i x 1>) [#cir.int<0> : !s32i] : !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}@test_splat_lane_s64(<1 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer
+  // LLVM: ret <1 x i64> [[RES]]
+}
+
+uint8x8_t test_splat_lane_u8(uint8x8_t v) {
+  return (uint8x8_t) __builtin_neon_splat_lane_v((int8x8_t)v, 7, 16);
+
+  // CIR-LABEL: test_splat_lane_u8
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u8i x 8>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u8i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u8i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_splat_lane_u8(<8 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <8 x i8> [[RES]]
+}
+uint16x4_t test_splat_lane_u16(uint16x4_t v) {
+  return (uint16x4_t) __builtin_neon_splat_lane_v((int8x8_t)v, 3, 17);
+
+  // CIR-LABEL: test_splat_lane_u16
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u16i x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u16i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u16i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_splat_lane_u16(<4 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+uint32x2_t test_splat_lane_u32(uint32x2_t v) {
+  return (uint32x2_t) __builtin_neon_splat_lane_v((int8x8_t)v, 1, 18);
+
+  // CIR-LABEL: test_splat_lane_u32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u32i x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u32i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u32i x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_splat_lane_u32(<2 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+  // LLVM: ret <2 x i32> [[RES]]
+}
+
+uint64x1_t test_splat_lane_u64(uint64x1_t v) {
+  return (uint64x1_t) __builtin_neon_splat_lane_v((int8x8_t)v, 0, 19);
+
+  // CIR-LABEL: test_splat_lane_u64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u64i x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u64i x 1>) [#cir.int<0> : !s32i] : !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}@test_splat_lane_u64(<1 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer
+  // LLVM: ret <1 x i64> [[RES]]
+}
+
+float32x2_t test_splat_lane_f32(float32x2_t v) {
+  return (float32x2_t) __builtin_neon_splat_lane_v((int8x8_t)v, 1, 9);
+
+  // CIR-LABEL: test_splat_lane_f32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!cir.float x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!cir.float x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!cir.float x 2>) 
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 2>
+
+  // LLVM: {{.*}}@test_splat_lane_f32(<2 x float>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
+  // LLVM: ret <2 x float> [[RES]]
+}
+
+float64x1_t test_splat_lane_f64(float64x1_t v) {
+  return (float64x1_t) __builtin_neon_splat_lane_v((int8x8_t)v, 0, 10);
+
+  // CIR-LABEL: test_splat_lane_f64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!cir.double x 1>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!cir.double x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!cir.double x 1>) [#cir.int<0> : !s32i] : !cir.vector<!cir.double x 1>
+
+  // LLVM: {{.*}}@test_splat_lane_f64(<1 x double>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <1 x i32> zeroinitializer
+  // LLVM: ret <1 x double> [[RES]]
+}
+
+int8x16_t test_splatq_lane_s8(int8x8_t v) {
+  return (int8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 7, 0);
+
+  // CIR-LABEL: test_splatq_lane_s8
+  // CIR: [[VEC:%.*]] = cir.load {{.*}} {{%.*}} : !cir.ptr<!cir.vector<!s8i x 8>>, !cir.vector<!s8i x 8>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s8i x 8>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s8i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, 
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}@test_splatq_lane_s8(<8 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, 
+  // LLVM-SAME: <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <16 x i8> [[RES]]
+} 
+
+int16x8_t test_splatq_lane_s16(int16x4_t v) {
+  return (int16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 3, 1);
+
+  // CIR-LABEL: test_splatq_lane_s16
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s16i x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s16i x 4>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s16i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i,
+  // CIR-SAME:  #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}@test_splatq_lane_s16(<4 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+int32x4_t test_splatq_lane_s32(int32x2_t v) {
+  return (int32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 1, 2);
+
+  // CIR-LABEL: test_splatq_lane_s32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s32i x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s32i x 2>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s32i x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}@test_splatq_lane_s32(<2 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // LLVM: ret <4 x i32> [[RES]]
+}  
+
+int64x2_t test_splatq_lane_s64(int64x1_t v) {
+  return (int64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 0, 3);
+
+  // CIR-LABEL: test_splatq_lane_s64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s64i x 1>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s64i x 1>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s64i x 1>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}@test_splatq_lane_s64(<1 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <2 x i32> zeroinitializer
+  // LLVM: ret <2 x i64> [[RES]]
+}
+
+uint8x16_t test_splatq_lane_u8(uint8x8_t v) {
+  return (uint8x16_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 7, 16);
+
+  // CIR-LABEL: test_splatq_lane_u8
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u8i x 8>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u8i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u8i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_splatq_lane_u8(<8 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, 
+  // LLVM-SAME: <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <16 x i8> [[RES]]
+}
+
+uint16x8_t test_splatq_lane_u16(uint16x4_t v) {
+  return (uint16x8_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 3, 17);
+
+  // CIR-LABEL: test_splatq_lane_u16
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u16i x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u16i x 4>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u16i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i,
+  // CIR-SAME:  #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_splatq_lane_u16(<4 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+uint32x4_t test_splatq_lane_u32(uint32x2_t v) {
+  return (uint32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 1, 18);
+
+  // CIR-LABEL: test_splatq_lane_u32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u32i x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u32i x 2>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u32i x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_splatq_lane_u32(<2 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // LLVM: ret <4 x i32> [[RES]]
+}
+
+uint64x2_t test_splatq_lane_u64(uint64x1_t v) {
+  return (uint64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 0, 19);
+
+  // CIR-LABEL: test_splatq_lane_u64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u64i x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u64i x 1>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}@test_splatq_lane_u64(<1 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x i64> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <2 x i32> zeroinitializer
+  // LLVM: ret <2 x i64> [[RES]]
+}
+
+float32x4_t test_splatq_lane_f32(float32x2_t v) {
+  return (float32x4_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 1, 9);
+
+  // CIR-LABEL: test_splatq_lane_f32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!cir.float x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!cir.float x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!cir.float x 2>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 4>
+
+  // LLVM: {{.*}}@test_splatq_lane_f32(<2 x float>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x float> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // LLVM: ret <4 x float> [[RES]]
+}
+
+float64x2_t test_splatq_lane_f64(float64x1_t v) {
+  return (float64x2_t) __builtin_neon_splatq_lane_v((int8x8_t)v, 0, 10);
+
+  // CIR-LABEL: test_splatq_lane_f64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!cir.double x 1>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!cir.double x 1>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!cir.double x 1>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.double x 2>
+
+  // LLVM: {{.*}}@test_splatq_lane_f64(<1 x double>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <1 x double> [[V]] to <8 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+  // LLVM: [[RES:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> poison, <2 x i32> zeroinitializer
+  // LLVM: ret <2 x double> [[RES]]
+}
+
+int8x8_t test_splat_laneq_s8(int8x16_t v) {
+  return (int8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 15, 32);
+
+  // CIR-LABEL: test_splat_laneq_s8
+  // CIR: [[VEC:%.*]] = cir.load {{.*}} {{.*}} : !cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!s8i x 16>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s8i x 16>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s8i x 16>)
+  // CIR-SAME: [#cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_splat_laneq_s8(<16 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, 
+  // LLVM-SAME: <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  // LLVM: ret <8 x i8> [[RES]]
+}
+
+int16x4_t test_splat_laneq_s16(int16x8_t v) {
+  return (int16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 7, 33);
+
+  // CIR-LABEL: test_splat_laneq_s16
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s16i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s16i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_splat_laneq_s16(<8 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+int32x2_t test_splat_laneq_s32(int32x4_t v) {
+  return (int32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 3, 34);
+
+  // CIR-LABEL: test_splat_laneq_s32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s32i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_splat_laneq_s32(<4 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+  // LLVM: ret <2 x i32> [[RES]]
+}
+
+int64x1_t test_splat_laneq_s64(int64x2_t v) {
+  return (int64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 0, 35);
+
+  // CIR-LABEL: test_splat_laneq_s64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s64i x 2>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s64i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}@test_splat_laneq_s64(<2 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <1 x i32> zeroinitializer
+  // LLVM: ret <1 x i64> [[RES]]
+}
+
+float32x2_t test_splat_laneq_f32(float32x4_t v) {
+  return (float32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 1, 41);
+
+  // CIR-LABEL: test_splat_laneq_f32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!cir.float x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!cir.float x 4>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!cir.float x 4>)
+  // CIR-SAME: [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 2>
+
+  // LLVM: {{.*}}@test_splat_laneq_f32(<4 x float>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 1, i32 1>
+  // LLVM: ret <2 x float> [[RES]]
+}
+
+float64x1_t test_splat_laneq_f64(float64x2_t v) {
+  return (float64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 0, 42);
+
+  // CIR-LABEL: test_splat_laneq_f64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!cir.double x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!cir.double x 2>
+  // CIR: [[TMP0:%.*]] = cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!cir.double x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!cir.double x 1>
+
+  // LLVM: {{.*}}@test_splat_laneq_f64(<2 x double>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <1 x i32> zeroinitializer
+  // LLVM: ret <1 x double> [[RES]]
+}
+
+uint8x8_t test_splat_laneq_u8(uint8x16_t v) {
+  return (uint8x8_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 15, 48);
+
+  // CIR-LABEL: test_splat_laneq_u8
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u8i x 16>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u8i x 16>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u8i x 16>)
+  // CIR-SAME: [#cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_splat_laneq_u8(<16 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, 
+  // LLVM-SAME: <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  // LLVM: ret <8 x i8> [[RES]]
+}
+
+uint16x4_t test_splat_laneq_u16(uint16x8_t v) {
+  return (uint16x4_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 7, 49);
+
+  // CIR-LABEL: test_splat_laneq_u16
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u16i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u16i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_splat_laneq_u16(<8 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <4 x i16> [[RES]]
+}
+
+uint32x2_t test_splat_laneq_u32(uint32x4_t v) {
+  return (uint32x2_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 3, 50);
+
+  // CIR-LABEL: test_splat_laneq_u32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u32i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u32i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_splat_laneq_u32(<4 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 3, i32 3>
+  // LLVM: ret <2 x i32> [[RES]]
+}
+
+uint64x1_t test_splat_laneq_u64(uint64x2_t v) {
+  return (uint64x1_t) __builtin_neon_splat_laneq_v((int8x16_t)v, 0, 51);
+
+  // CIR-LABEL: test_splat_laneq_u64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u64i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u64i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i] : !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}@test_splat_laneq_u64(<2 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <1 x i32> zeroinitializer
+  // LLVM: ret <1 x i64> [[RES]]
+}
+
+int8x16_t test_splatq_laneq_s8(int8x16_t v) {
+  return (int8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 15, 32);
+
+  // CIR-LABEL: test_splatq_laneq_s8
+  // CIR: [[VEC:%.*]] = cir.load {{.*}} {{.*}} : !cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!s8i x 16>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s8i x 16>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s8i x 16>)
+  // CIR-SAME: [#cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}@test_splatq_laneq_s8(<16 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, 
+  // LLVM-SAME: <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15,
+  // LLVM-SAME:  i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  // LLVM: ret <16 x i8> [[RES]]
+}
+
+int16x8_t test_splatq_laneq_s16(int16x8_t v) {
+  return (int16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 7, 33);
+
+  // CIR-LABEL: test_splatq_laneq_s16
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s16i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s16i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}@test_splatq_laneq_s16(<8 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+int32x4_t test_splatq_laneq_s32(int32x4_t v) {
+  return (int32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 3, 34);
+
+  // CIR-LABEL: test_splatq_laneq_s32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s32i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}@test_splatq_laneq_s32(<4 x i32>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x i32> [[RES]]
+}
+
+int64x2_t test_splatq_laneq_s64(int64x2_t v) {
+  return (int64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 0, 35);
+
+  // CIR-LABEL: test_splatq_laneq_s64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s64i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!s64i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}@test_splatq_laneq_s64(<2 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
+  // LLVM: ret <2 x i64> [[RES]]
+}
+
+float32x4_t test_splatq_laneq_f32(float32x4_t v) {
+  return (float32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 3, 41);
+
+  // CIR-LABEL: test_splatq_laneq_f32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!cir.float x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!cir.float x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!cir.float x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.float x 4>
+
+  // LLVM: {{.*}}@test_splatq_laneq_f32(<4 x float>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x float> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x float> [[RES]]
+}
+
+float64x2_t test_splatq_laneq_f64(float64x2_t v) {
+  return (float64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 0, 42);
+
+  // CIR-LABEL: test_splatq_laneq_f64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!cir.double x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!cir.double x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!cir.double x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.double x 2>
+
+  // LLVM: {{.*}}@test_splatq_laneq_f64(<2 x double>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x double> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer
+  // LLVM: ret <2 x double> [[RES]]
+}
+
+uint8x16_t test_splatq_laneq_u8(uint8x16_t v) {
+  return (uint8x16_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 15, 48);
+
+  // CIR-LABEL: test_splatq_laneq_u8
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u8i x 16>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u8i x 16>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u8i x 16>)
+  // CIR-SAME: [#cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i,
+  // CIR-SAME:  #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_splatq_laneq_u8(<16 x i8>{{.*}}[[V:%.*]])
+  // LLVM: [[RES:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, 
+  // LLVM-SAME: <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15,
+  // LLVM-SAME:  i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  // LLVM: ret <16 x i8> [[RES]]
+}
+
+uint16x8_t test_splatq_laneq_u16(uint16x8_t v) {
+  return (uint16x8_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 7, 49);
+
+  // CIR-LABEL: test_splatq_laneq_u16
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u16i x 8>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u16i x 8>)
+  // CIR-SAME: [#cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i,
+  // CIR-SAME:  #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_splatq_laneq_u16(<8 x i16>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <8 x i16> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+  // LLVM: [[RES:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // LLVM: ret <8 x i16> [[RES]]
+}
+
+uint32x4_t test_splatq_laneq_u32(uint32x4_t v) {
+  return (uint32x4_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 3, 50);
+
+  // CIR-LABEL: test_splatq_laneq_u32
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u32i x 4>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u32i x 4>)
+  // CIR-SAME: [#cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_splatq_laneq_u32(<4 x i32>{{.*}}
+  // LLVM: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+  // LLVM: [[RES:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  // LLVM: ret <4 x i32> [[RES]]
+}
+
+uint64x2_t test_splatq_laneq_u64(uint64x2_t v) {
+  return (uint64x2_t) __builtin_neon_splatq_laneq_v((int8x16_t)v, 0, 51);
+
+  // CIR-LABEL: test_splatq_laneq_u64
+  // CIR: [[VEC:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!u64i x 2>
+  // CIR: cir.vec.shuffle([[VEC]], [[POISON]] : !cir.vector<!u64i x 2>)
+  // CIR-SAME: [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}@test_splatq_laneq_u64(<2 x i64>{{.*}}[[V:%.*]])
+  // LLVM: [[TMP0:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8>
+  // LLVM: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
+  // LLVM: ret <2 x i64> [[RES]]
+}
+
+int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
+  return vpadal_s8(a, b);
+
+  // CIR-LABEL: vpadal_s8
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>) -> !cir.vector<!s16i x 4>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s16i x 4>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vpadal_s8(<4 x i16>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> {{.*}})
+}
+
+int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
+  return vpadal_s16(a, b);
+
+  // CIR-LABEL: vpadal_s16
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>) -> !cir.vector<!s32i x 2>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s32i x 2>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vpadal_s16(<2 x i32>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> {{.*}})
+  // LLVM:   [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], {{.*}}
+}
+
+int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
+  return vpadal_s32(a, b);
+
+  // CIR-LABEL: vpadal_s32
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>) -> !cir.vector<!s64i x 1>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s64i x 1>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}test_vpadal_s32(<1 x i64>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>
+  // LLVM:   [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], {{.*}}
+}
+
+uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
+  return vpadal_u8(a, b);
+
+  // CIR-LABEL: vpadal_u8
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 8>) -> !cir.vector<!u16i x 4>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u16i x 4>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vpadal_u8(<4 x i16>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8>
+  // LLVM:   [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]],
+}
+
+uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
+  return vpadal_u16(a, b);
+
+  // CIR-LABEL: vpadal_u16
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 4>) -> !cir.vector<!u32i x 2>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u32i x 2>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vpadal_u16(<2 x i32>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16>
+  // LLVM:   [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]],
+}
+
+uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
+  return vpadal_u32(a, b);
+
+  // CIR-LABEL: vpadal_u32
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 2>) -> !cir.vector<!u64i x 1>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}test_vpadal_u32(<1 x i64>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>
+  // LLVM:   [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]],
+}
+
+int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
+  return vpadalq_s8(a, b);
+
+  // CIR-LABEL: vpadalq_s8
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 16>) -> !cir.vector<!s16i x 8>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vpadalq_s8(<8 x i16>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8>
+  // LLVM:   [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]],
+}
+
+int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
+  return vpadalq_s16(a, b);
+
+  // CIR-LABEL: vpadalq_s16
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>) -> !cir.vector<!s32i x 4>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vpadalq_s16(<4 x i32>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16>
+  // LLVM:   [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]],
+}
+
+int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
+  return vpadalq_s32(a, b);
+
+  // CIR-LABEL: vpadalq_s32
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>) -> !cir.vector<!s64i x 2>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vpadalq_s32(<2 x i64>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32>
+  // LLVM:   [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]],
+}
+
+uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
+  return vpadalq_u8(a, b);
+
+  // CIR-LABEL: vpadalq_u8
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 16>) -> !cir.vector<!u16i x 8>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vpadalq_u8(<8 x i16>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8>
+  // LLVM:   [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]],
+}
+
+uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
+  return vpadalq_u16(a, b);
+
+  // CIR-LABEL: vpadalq_u16
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 8>) -> !cir.vector<!u32i x 4>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vpadalq_u16(<4 x i32>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16>
+  // LLVM:   [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]],
+}
+
+uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
+  return vpadalq_u32(a, b);
+
+  // CIR-LABEL: vpadalq_u32
+  // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 4>) -> !cir.vector<!u64i x 2>
+  // CIR: [[a:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vpadalq_u32(<2 x i64>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
+  // LLVM:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32>
+  // LLVM:   [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]],
+}
+
+
+int64_t test_vaddlvq_s32(int32x4_t a) {
+  return vaddlvq_s32(a);
+
+  // CIR-LABEL: vaddlvq_s32
+  // CIR: = cir.llvm.intrinsic "aarch64.neon.saddlv" {{%.*}} : (!cir.vector<!s32i x 4>) -> !s64i
+
+  // LLVM: {{.*}}@test_vaddlvq_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM-NEXT:    [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> [[A]])
+  // LLVM-NEXT:    ret i64 [[VADDLVQ_S32_I]]
+}
+
+uint64_t test_vaddlvq_u32(uint32x4_t a) {
+  return vaddlvq_u32(a);
+
+  // CIR-LABEL: vaddlvq_u32
+  // CIR: = cir.llvm.intrinsic "aarch64.neon.uaddlv" {{%.*}} : (!cir.vector<!u32i x 4>) -> !u64i
+
+  // LLVM: {{.*}}@test_vaddlvq_u32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM-NEXT:    [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> [[A]])
+  // LLVM-NEXT:    ret i64 [[VADDLVQ_U32_I]]
+}
+
+int8_t test_vmaxv_s8(int8x8_t a) {
+  return vmaxv_s8(a);
+
+  // CIR-LABEL: vmaxv_s8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.smaxv" {{%.*}} : (!cir.vector<!s8i x 8>) -> !s8i
+
+  // LLVM-LABEL: @test_vmaxv_s8
+  // LLVM-SAME: (<8 x i8> [[a:%.*]])
+  // LLVM: [[res:%.*]] = call i8 @llvm.aarch64.neon.smaxv.i8.v8i8(<8 x i8> [[a]])
+  // LLVM: ret i8 [[res]]
+}
+
+int8_t test_vmaxv_u8(uint8x8_t a) {
+  return vmaxv_u8(a);
+
+  // CIR-LABEL: vmaxv_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.umaxv" {{%.*}} : (!cir.vector<!u8i x 8>) -> !u8i
+
+  // LLVM-LABEL: @test_vmaxv_u8
+  // LLVM-SAME: (<8 x i8> [[a:%.*]])
+  // LLVM: [[res:%.*]] = call i8 @llvm.aarch64.neon.umaxv.i8.v8i8(<8 x i8> [[a]])
+  // LLVM: ret i8 [[res]]
+}
+
+int8_t test_vmaxvq_s8(int8x16_t a) {
+  return vmaxvq_s8(a);
+
+  // CIR-LABEL: vmaxvq_s8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.smaxv" {{%.*}} : (!cir.vector<!s8i x 16>) -> !s8i
+
+  // LLVM-LABEL: @test_vmaxvq_s8
+  // LLVM-SAME: (<16 x i8> [[a:%.*]])
+  // LLVM: [[res:%.*]] = call i8 @llvm.aarch64.neon.smaxv.i8.v16i8(<16 x i8> [[a]])
+  // LLVM: ret i8 [[res]]
+}
+
+int8_t test_vmaxvq_u8(uint8x16_t a) {
+  return vmaxvq_u8(a);
+
+  // CIR-LABEL: vmaxvq_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.umaxv" {{%.*}} : (!cir.vector<!u8i x 16>) -> !u8i
+
+  // LLVM-LABEL: @test_vmaxvq_u8
+  // LLVM-SAME: (<16 x i8> [[a:%.*]])
+  // LLVM: [[res:%.*]] = call i8 @llvm.aarch64.neon.umaxv.i8.v16i8(<16 x i8> [[a]])
+  // LLVM: ret i8 [[res]]
+}
+
+uint32_t test_vmaxvq_u32(uint32x4_t a) {
+  return vmaxvq_u32(a);
+
+  // CIR-LABEL: vmaxvq_u32
+  // CIR: cir.llvm.intrinsic "vector.reduce.umax" {{%.*}} : (!cir.vector<!u32i x 4>) -> !u32i
+
+  // LLVM-LABEL: @test_vmaxvq_u32
+  // LLVM: call i32 @llvm.vector.reduce.umax.v4i32
+
+  // OGCG-LABEL: @test_vmaxvq_u32
+  // OGCG: call i32 @llvm.vector.reduce.umax.v4i32
+}
+
+int32_t test_vmaxvq_s32(int32x4_t a) {
+  return vmaxvq_s32(a);
+
+  // CIR-LABEL: vmaxvq_s32
+  // CIR: cir.llvm.intrinsic "vector.reduce.smax" {{%.*}} : (!cir.vector<!s32i x 4>) -> !s32i
+
+  // LLVM-LABEL: @test_vmaxvq_s32
+  // LLVM: call i32 @llvm.vector.reduce.smax.v4i32
+
+  // OGCG-LABEL: @test_vmaxvq_s32
+  // OGCG: call i32 @llvm.vector.reduce.smax.v4i32
+}
+
+float32_t test_vmaxvq_f32(float32x4_t a) {
+  return vmaxvq_f32(a);
+
+  // CIR-LABEL: vmaxvq_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fmaxv" {{%.*}} : (!cir.vector<!cir.float x 4>) -> !cir.float
+
+  // LLVM-LABEL: @test_vmaxvq_f32
+  // LLVM: call float @llvm.aarch64.neon.fmaxv.f32.v4f32
+
+  // OGCG-LABEL: @test_vmaxvq_f32
+  // OGCG: call float @llvm.aarch64.neon.fmaxv.f32.v4f32
+}
+
+uint32_t test_vminvq_u32(uint32x4_t a) {
+  return vminvq_u32(a);
+
+  // CIR-LABEL: vminvq_u32
+  // CIR: cir.llvm.intrinsic "vector.reduce.umin" {{%.*}} : (!cir.vector<!u32i x 4>) -> !u32i
+
+  // LLVM-LABEL: @test_vminvq_u32
+  // LLVM: call i32 @llvm.vector.reduce.umin.v4i32
+
+  // OGCG-LABEL: @test_vminvq_u32
+  // OGCG: call i32 @llvm.vector.reduce.umin.v4i32
+}
+
+int64x2_t test_vcvtnq_s64_f64(float64x2_t a) {
+  return vcvtnq_s64_f64(a);
+
+  // CIR-LABEL: vcvtnq_s64_f64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fcvtns" {{%.*}} : (!cir.vector<!cir.double x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_vcvtnq_s64_f64
+  // LLVM: call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64
+
+  // OGCG-LABEL: @test_vcvtnq_s64_f64
+  // OGCG: call <2 x i64> @llvm.aarch64.neon.fcvtns.v2i64.v2f64
+}
+
+uint64x2_t test_vcvtnq_u64_f64(float64x2_t a) {
+  return vcvtnq_u64_f64(a);
+
+  // CIR-LABEL: vcvtnq_u64_f64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fcvtnu" {{%.*}} : (!cir.vector<!cir.double x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM-LABEL: @test_vcvtnq_u64_f64
+  // LLVM: call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64
+
+  // OGCG-LABEL: @test_vcvtnq_u64_f64
+  // OGCG: call <2 x i64> @llvm.aarch64.neon.fcvtnu.v2i64.v2f64
+}
+
+float32x2_t test_vsqrt_f32(float32x2_t a) {
+  return vsqrt_f32(a);
+
+  // CIR-LABEL: vsqrt_f32
+  // CIR: cir.sqrt {{%.*}} : !cir.vector<!cir.float x 2>
+
+  // LLVM-LABEL: @test_vsqrt_f32
+  // LLVM: call <2 x float> @llvm.sqrt.v2f32
+
+  // OGCG-LABEL: @test_vsqrt_f32
+  // OGCG: call <2 x float> @llvm.sqrt.v2f32
+}
+
+float32x4_t test_vsqrtq_f32(float32x4_t a) {
+  return vsqrtq_f32(a);
+
+  // CIR-LABEL: vsqrtq_f32
+  // CIR: cir.sqrt {{%.*}} : !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vsqrtq_f32
+  // LLVM: call <4 x float> @llvm.sqrt.v4f32
+
+  // OGCG-LABEL: @test_vsqrtq_f32
+  // OGCG: call <4 x float> @llvm.sqrt.v4f32
+}
+
+float32x2_t test_vrndn_f32(float32x2_t a) {
+  return vrndn_f32(a);
+
+  // CIR-LABEL: vrndn_f32
+  // CIR: cir.roundeven {{%.*}} : !cir.vector<!cir.float x 2>
+
+  // LLVM-LABEL: @test_vrndn_f32
+  // LLVM: call <2 x float> @llvm.roundeven.v2f32
+
+  // OGCG-LABEL: @test_vrndn_f32
+  // OGCG: call <2 x float> @llvm.roundeven.v2f32
+}
+
+float32x4_t test_vrndnq_f32(float32x4_t a) {
+  return vrndnq_f32(a);
+
+  // CIR-LABEL: vrndnq_f32
+  // CIR: cir.roundeven {{%.*}} : !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vrndnq_f32
+  // LLVM: call <4 x float> @llvm.roundeven.v4f32
+
+  // OGCG-LABEL: @test_vrndnq_f32
+  // OGCG: call <4 x float> @llvm.roundeven.v4f32
+}
+
+float32x2_t test_vrnd_f32(float32x2_t a) {
+  return vrnd_f32(a);
+
+  // CIR-LABEL: vrnd_f32
+  // CIR: cir.trunc {{%.*}} : !cir.vector<!cir.float x 2>
+
+  // LLVM-LABEL: @test_vrnd_f32
+  // LLVM: call <2 x float> @llvm.trunc.v2f32
+
+  // OGCG-LABEL: @test_vrnd_f32
+  // OGCG: call <2 x float> @llvm.trunc.v2f32
+}
+
+float32x4_t test_vrndq_f32(float32x4_t a) {
+  return vrndq_f32(a);
+
+  // CIR-LABEL: vrndq_f32
+  // CIR: cir.trunc {{%.*}} : !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vrndq_f32
+  // LLVM: call <4 x float> @llvm.trunc.v4f32
+
+  // OGCG-LABEL: @test_vrndq_f32
+  // OGCG: call <4 x float> @llvm.trunc.v4f32
+}
+
+float32x2_t test_vrecpe_f32(float32x2_t a) {
+  return vrecpe_f32(a);
+
+  // CIR-LABEL: vrecpe_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.frecpe" {{%.*}} : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+
+  // LLVM-LABEL: @test_vrecpe_f32
+  // LLVM: call <2 x float> @llvm.aarch64.neon.frecpe.v2f32
+
+  // OGCG-LABEL: @test_vrecpe_f32
+  // OGCG: call <2 x float> @llvm.aarch64.neon.frecpe.v2f32
+}
+
+float32x4_t test_vrecpeq_f32(float32x4_t a) {
+  return vrecpeq_f32(a);
+
+  // CIR-LABEL: vrecpeq_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.frecpe" {{%.*}} : (!cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vrecpeq_f32
+  // LLVM: call <4 x float> @llvm.aarch64.neon.frecpe.v4f32
+
+  // OGCG-LABEL: @test_vrecpeq_f32
+  // OGCG: call <4 x float> @llvm.aarch64.neon.frecpe.v4f32
+}
+
+uint32x2_t test_vrecpe_u32(uint32x2_t a) {
+  return vrecpe_u32(a);
+
+  // CIR-LABEL: vrecpe_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urecpe" {{%.*}} : (!cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM-LABEL: @test_vrecpe_u32
+  // LLVM: call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32
+
+  // OGCG-LABEL: @test_vrecpe_u32
+  // OGCG: call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32
+}
+
+uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
+  return vrecpeq_u32(a);
+
+  // CIR-LABEL: vrecpeq_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urecpe" {{%.*}} : (!cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM-LABEL: @test_vrecpeq_u32
+  // LLVM: call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32
+
+  // OGCG-LABEL: @test_vrecpeq_u32
+  // OGCG: call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32
+}
+
+void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
+  vst3q_u32(a, b);
+
+  // CIR-LABEL: vst3q_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st3"
+
+  // LLVM-LABEL: @test_vst3q_u32
+  // LLVM: call void @llvm.aarch64.neon.st3.v4i32.p0
+
+  // OGCG-LABEL: @test_vst3q_u32
+  // OGCG: call void @llvm.aarch64.neon.st3.v4i32.p0
+}
+
+void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
+  vst4q_u32(a, b);
+
+  // CIR-LABEL: vst4q_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st4"
+
+  // LLVM-LABEL: @test_vst4q_u32
+  // LLVM: call void @llvm.aarch64.neon.st4.v4i32.p0
+
+  // OGCG-LABEL: @test_vst4q_u32
+  // OGCG: call void @llvm.aarch64.neon.st4.v4i32.p0
+}
+
+void test_vst2q_lane_u32(uint32_t *a, uint32x4x2_t b) {
+  vst2q_lane_u32(a, b, 3);
+
+  // CIR-LABEL: vst2q_lane_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st2lane"
+
+  // LLVM-LABEL: @test_vst2q_lane_u32
+  // LLVM: call void @llvm.aarch64.neon.st2lane.v4i32.p0
+
+  // OGCG-LABEL: @test_vst2q_lane_u32
+  // OGCG: call void @llvm.aarch64.neon.st2lane.v4i32.p0
+}
+
+void test_vst3q_lane_u32(uint32_t *a, uint32x4x3_t b) {
+  vst3q_lane_u32(a, b, 3);
+
+  // CIR-LABEL: vst3q_lane_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st3lane"
+
+  // LLVM-LABEL: @test_vst3q_lane_u32
+  // LLVM: call void @llvm.aarch64.neon.st3lane.v4i32.p0
+
+  // OGCG-LABEL: @test_vst3q_lane_u32
+  // OGCG: call void @llvm.aarch64.neon.st3lane.v4i32.p0
+}
+
+uint32x4x4_t test_vld1q_u32_x4(uint32_t const *ptr) {
+  return vld1q_u32_x4(ptr);
+
+  // CIR-LABEL: vld1q_u32_x4
+  // CIR: cir.llvm.intrinsic "aarch64.neon.ld1x4"
+
+  // LLVM-LABEL: @test_vld1q_u32_x4
+  // LLVM: call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0
+
+  // OGCG-LABEL: @test_vld1q_u32_x4
+  // OGCG: call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0
+}
+
+uint32x2x4_t test_vld1_u32_x4(uint32_t const *ptr) {
+  return vld1_u32_x4(ptr);
+
+  // CIR-LABEL: vld1_u32_x4
+  // CIR: cir.llvm.intrinsic "aarch64.neon.ld1x4"
+
+  // LLVM-LABEL: @test_vld1_u32_x4
+  // LLVM: call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0
+
+  // OGCG-LABEL: @test_vld1_u32_x4
+  // OGCG: call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0
+}
+
+uint32x4x3_t test_vld1q_u32_x3(uint32_t const *ptr) {
+  return vld1q_u32_x3(ptr);
+
+  // CIR-LABEL: vld1q_u32_x3
+  // CIR: cir.llvm.intrinsic "aarch64.neon.ld1x3"
+
+  // LLVM-LABEL: @test_vld1q_u32_x3
+  // LLVM: call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0
+
+  // OGCG-LABEL: @test_vld1q_u32_x3
+  // OGCG: call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0
+}
+
+uint32x2x3_t test_vld1_u32_x3(uint32_t const *ptr) {
+  return vld1_u32_x3(ptr);
+
+  // CIR-LABEL: vld1_u32_x3
+  // CIR: cir.llvm.intrinsic "aarch64.neon.ld1x3"
+
+  // LLVM-LABEL: @test_vld1_u32_x3
+  // LLVM: call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0
+
+  // OGCG-LABEL: @test_vld1_u32_x3
+  // OGCG: call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0
+}
+
+uint32x4x2_t test_vld1q_u32_x2(uint32_t const *ptr) {
+  return vld1q_u32_x2(ptr);
+
+  // CIR-LABEL: vld1q_u32_x2
+  // CIR: cir.llvm.intrinsic "aarch64.neon.ld1x2"
+
+  // LLVM-LABEL: @test_vld1q_u32_x2
+  // LLVM: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0
+
+  // OGCG-LABEL: @test_vld1q_u32_x2
+  // OGCG: call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0
+}
+
+uint32x2x2_t test_vld1_u32_x2(uint32_t const *ptr) {
+  return vld1_u32_x2(ptr);
+
+  // CIR-LABEL: vld1_u32_x2
+  // CIR: cir.llvm.intrinsic "aarch64.neon.ld1x2"
+
+  // LLVM-LABEL: @test_vld1_u32_x2
+  // LLVM: call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0
+
+  // OGCG-LABEL: @test_vld1_u32_x2
+  // OGCG: call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0
+}
+
+uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
+  return vsliq_n_u32(a, b, 1);
+
+  // CIR-LABEL: vsliq_n_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.vsli"
+
+  // LLVM-LABEL: @test_vsliq_n_u32
+  // LLVM: call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, i32 1)
+
+  // OGCG-LABEL: @test_vsliq_n_u32
+  // OGCG: call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, i32 1)
+}
+
+uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
+  return vsli_n_u8(a, b, 1);
+
+  // CIR-LABEL: vsli_n_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.vsli"
+
+  // LLVM-LABEL: @test_vsli_n_u8
+  // LLVM: call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> {{%.*}}, <8 x i8> {{%.*}}, i32 1)
+
+  // OGCG-LABEL: @test_vsli_n_u8
+  // OGCG: call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> {{%.*}}, <8 x i8> {{%.*}}, i32 1)
+}
+
+uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
+  return vsriq_n_u32(a, b, 1);
+
+  // CIR-LABEL: vsriq_n_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.vsri"
+
+  // LLVM-LABEL: @test_vsriq_n_u32
+  // LLVM: call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, i32 1)
+
+  // OGCG-LABEL: @test_vsriq_n_u32
+  // OGCG: call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, i32 1)
+}
+
+uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
+  return vsri_n_u8(a, b, 1);
+
+  // CIR-LABEL: vsri_n_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.vsri"
+
+  // LLVM-LABEL: @test_vsri_n_u8
+  // LLVM: call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> {{%.*}}, <8 x i8> {{%.*}}, i32 1)
+
+  // OGCG-LABEL: @test_vsri_n_u8
+  // OGCG: call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> {{%.*}}, <8 x i8> {{%.*}}, i32 1)
+}
+
+float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
+  return vfmaq_laneq_f32(a, b, v, 1);
+
+  // CIR-LABEL: vfmaq_laneq_f32
+  // CIR: %[[SPLAT:.*]] = cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!cir.float x 4>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i]
+  // CIR: cir.llvm.intrinsic "fma" %[[SPLAT]], {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vfmaq_laneq_f32
+  // LLVM: %[[SPLAT:.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // LLVM: call <4 x float> @llvm.fma.v4f32(<4 x float> %[[SPLAT]], <4 x float> {{%.*}}, <4 x float> {{%.*}})
+
+  // OGCG-LABEL: @test_vfmaq_laneq_f32
+  // OGCG: %[[SPLAT:.*]] = shufflevector <4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  // OGCG: call <4 x float> @llvm.fma.v4f32(<4 x float> %[[SPLAT]], <4 x float> {{%.*}}, <4 x float> {{%.*}})
+}
+
+int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
+  return vtbl4_s8(a, b);
+
+  // CIR-LABEL: vtbl4_s8
+  // CIR: %[[MERGE1:.*]] = cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s8i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i]
+  // CIR: %[[MERGE2:.*]] = cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s8i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i]
+  // CIR: cir.llvm.intrinsic "aarch64.neon.tbl2" %[[MERGE1]], %[[MERGE2]], {{%.*}} :
+
+  // LLVM-LABEL: @test_vtbl4_s8
+  // LLVM: %[[MERGE1:.*]] = shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM: %[[MERGE2:.*]] = shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM: call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %[[MERGE1]], <16 x i8> %[[MERGE2]], <8 x i8> {{%.*}})
+
+  // OGCG-LABEL: @test_vtbl4_s8
+  // OGCG: %[[MERGE1:.*]] = shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // OGCG: %[[MERGE2:.*]] = shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // OGCG: call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %[[MERGE1]], <16 x i8> %[[MERGE2]], <8 x i8> {{%.*}})
+}
+
+void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) {
+  vst1q_s32_x4(a, b);
+
+  // CIR-LABEL: vst1q_s32_x4
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st1x4" {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vst1q_s32_x4
+  // LLVM: call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, ptr {{%.*}})
+
+  // OGCG-LABEL: @test_vst1q_s32_x4
+  // OGCG: call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, ptr {{%.*}})
+}
+
+void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) {
+  vst1_s32_x2(a, b);
+
+  // CIR-LABEL: vst1_s32_x2
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st1x2" {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vst1_s32_x2
+  // LLVM: call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}, ptr {{%.*}})
+
+  // OGCG-LABEL: @test_vst1_s32_x2
+  // OGCG: call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}, ptr {{%.*}})
+}
+
+void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) {
+  vst1q_s32_x2(a, b);
+
+  // CIR-LABEL: vst1q_s32_x2
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st1x2" {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vst1q_s32_x2
+  // LLVM: call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, ptr {{%.*}})
+
+  // OGCG-LABEL: @test_vst1q_s32_x2
+  // OGCG: call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, ptr {{%.*}})
+}
+
+void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) {
+  vst1_s32_x3(a, b);
+
+  // CIR-LABEL: vst1_s32_x3
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st1x3" {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vst1_s32_x3
+  // LLVM: call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, ptr {{%.*}})
+
+  // OGCG-LABEL: @test_vst1_s32_x3
+  // OGCG: call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, ptr {{%.*}})
+}
+
+void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) {
+  vst1q_s32_x3(a, b);
+
+  // CIR-LABEL: vst1q_s32_x3
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st1x3" {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vst1q_s32_x3
+  // LLVM: call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, ptr {{%.*}})
+
+  // OGCG-LABEL: @test_vst1q_s32_x3
+  // OGCG: call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, ptr {{%.*}})
+}
+
+void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) {
+  vst1_s32_x4(a, b);
+
+  // CIR-LABEL: vst1_s32_x4
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st1x4" {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vst1_s32_x4
+  // LLVM: call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, ptr {{%.*}})
+
+  // OGCG-LABEL: @test_vst1_s32_x4
+  // OGCG: call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, ptr {{%.*}})
+}
+
+int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
+  return vpadd_s32(a, b);
+
+  // CIR-LABEL: vpadd_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.addp" {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vpadd_s32
+  // LLVM: call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+
+  // OGCG-LABEL: @test_vpadd_s32
+  // OGCG: call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> {{%.*}}, <2 x i32> {{%.*}})
+}
+
+float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
+  return vpadd_f32(a, b);
+
+  // CIR-LABEL: vpadd_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.faddp" {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vpadd_f32
+  // LLVM: call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> {{%.*}}, <2 x float> {{%.*}})
+
+  // OGCG-LABEL: @test_vpadd_f32
+  // OGCG: call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> {{%.*}}, <2 x float> {{%.*}})
+}
+
+uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
+  return vtbl1_u8(a, b);
+
+  // CIR-LABEL: vtbl1_u8
+  // CIR: %[[TBL:.*]] = cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s8i x 8>)
+  // CIR: cir.llvm.intrinsic "aarch64.neon.tbl1" %[[TBL]], {{%.*}} :
+
+  // LLVM-LABEL: @test_vtbl1_u8
+  // LLVM: %[[SHF:.*]] = shufflevector <8 x i8> {{%.*}}, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM: call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %[[SHF]], <8 x i8> {{%.*}})
+
+  // OGCG-LABEL: @test_vtbl1_u8
+  // OGCG: shufflevector <8 x i8> {{%.*}}, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // OGCG: call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> {{%.*}}, <8 x i8> {{%.*}})
+}
+
+int32x2_t test_vdot_s32(int32x2_t a, int8x8_t b, int8x8_t c) {
+  return vdot_s32(a, b, c);
+
+  // CIR-LABEL: vdot_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sdot" {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vdot_s32
+  // LLVM: call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> {{%.*}}, <8 x i8> {{%.*}}, <8 x i8> {{%.*}})
+
+  // OGCG-LABEL: @test_vdot_s32
+  // OGCG: call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> {{%.*}}, <8 x i8> {{%.*}}, <8 x i8> {{%.*}})
+}
+
+int32x4_t test_vdotq_s32(int32x4_t a, int8x16_t b, int8x16_t c) {
+  return vdotq_s32(a, b, c);
+
+  // CIR-LABEL: vdotq_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sdot" {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vdotq_s32
+  // LLVM: call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}})
+
+  // OGCG-LABEL: @test_vdotq_s32
+  // OGCG: call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}})
+}
+
+uint32x2_t test_vdot_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) {
+  return vdot_u32(a, b, c);
+
+  // CIR-LABEL: vdot_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.udot" {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vdot_u32
+  // LLVM: call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> {{%.*}}, <8 x i8> {{%.*}}, <8 x i8> {{%.*}})
+
+  // OGCG-LABEL: @test_vdot_u32
+  // OGCG: call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> {{%.*}}, <8 x i8> {{%.*}}, <8 x i8> {{%.*}})
+}
+
+uint32x4_t test_vdotq_u32(uint32x4_t a, uint8x16_t b, uint8x16_t c) {
+  return vdotq_u32(a, b, c);
+
+  // CIR-LABEL: vdotq_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.udot" {{%.*}}, {{%.*}}, {{%.*}} :
+
+  // LLVM-LABEL: @test_vdotq_u32
+  // LLVM: call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}})
+
+  // OGCG-LABEL: @test_vdotq_u32
+  // OGCG: call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}})
+}
+
+float32_t test_vminv_f32(float32x2_t a) {
+  return vminv_f32(a);
+
+  // CIR-LABEL: vminv_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fminv" {{%.*}} : (!cir.vector<!cir.float x 2>) -> !cir.float
+
+  // LLVM-LABEL: @test_vminv_f32
+  // LLVM: call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> {{%.*}})
+
+  // OGCG-LABEL: @test_vminv_f32
+  // OGCG: call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> {{%.*}})
+}
+
+int32_t test_vminvq_s32(int32x4_t a) {
+  return vminvq_s32(a);
+
+  // CIR-LABEL: vminvq_s32
+  // CIR: cir.llvm.intrinsic "vector.reduce.smin" {{%.*}} : (!cir.vector<!s32i x 4>) -> !s32i
+
+  // LLVM-LABEL: @test_vminvq_s32
+  // LLVM: call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> {{%.*}})
+
+  // OGCG-LABEL: @test_vminvq_s32
+  // OGCG: call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> {{%.*}})
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/neon.c b/clang/test/CIR/Incubator/CodeGen/AArch64/neon.c
new file mode 100644
index 0000000000000..6ec6e925aa6e7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/neon.c
@@ -0,0 +1,19515 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -fno-clangir-call-conv-lowering -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
+// RUN: | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// This test mimics clang/test/CodeGen/AArch64/neon-intrinsics.c, which eventually
+// CIR shall be able to support fully. Since this is going to take some time to converge,
+// the unsupported/NYI code is commented out, so that we can incrementally improve this.
+// The NYI filecheck used contains the LLVM output from OG codegen that should guide the
+// correct result when implementing this into the CIR pipeline.
+
+#include <arm_neon.h>
+
+// NYI-LABEL: @test_vadd_s8(
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[ADD_I]]
+// int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) {
+//   return vadd_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_s16(
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[ADD_I]]
+// int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) {
+//   return vadd_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_s32(
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[ADD_I]]
+// int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) {
+//   return vadd_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_s64(
+// NYI:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
+// NYI:   ret <1 x i64> [[ADD_I]]
+// int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) {
+//   return vadd_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_f32(
+// NYI:   [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2
+// NYI:   ret <2 x float> [[ADD_I]]
+// float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) {
+//   return vadd_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_u8(
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[ADD_I]]
+// uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vadd_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_u16(
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[ADD_I]]
+// uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vadd_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_u32(
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[ADD_I]]
+// uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vadd_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vadd_u64(
+// NYI:   [[ADD_I:%.*]] = add <1 x i64> %v1, %v2
+// NYI:   ret <1 x i64> [[ADD_I]]
+// uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) {
+//   return vadd_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_s8(
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[ADD_I]]
+// int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vaddq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_s16(
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vaddq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_s32(
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vaddq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_s64(
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vaddq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_f32(
+// NYI:   [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2
+// NYI:   ret <4 x float> [[ADD_I]]
+// float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vaddq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_f64(
+// NYI:   [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2
+// NYI:   ret <2 x double> [[ADD_I]]
+// float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vaddq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_u8(
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[ADD_I]]
+// uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vaddq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_u16(
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vaddq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_u32(
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vaddq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaddq_u64(
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %v1, %v2
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vaddq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_s8(
+// NYI:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[SUB_I]]
+// int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) {
+//   return vsub_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_s16(
+// NYI:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[SUB_I]]
+// int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) {
+//   return vsub_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_s32(
+// NYI:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[SUB_I]]
+// int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) {
+//   return vsub_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_s64(
+// NYI:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
+// NYI:   ret <1 x i64> [[SUB_I]]
+// int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) {
+//   return vsub_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_f32(
+// NYI:   [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2
+// NYI:   ret <2 x float> [[SUB_I]]
+// float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) {
+//   return vsub_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_u8(
+// NYI:   [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[SUB_I]]
+// uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vsub_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_u16(
+// NYI:   [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[SUB_I]]
+// uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vsub_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_u32(
+// NYI:   [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[SUB_I]]
+// uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vsub_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsub_u64(
+// NYI:   [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2
+// NYI:   ret <1 x i64> [[SUB_I]]
+// uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) {
+//   return vsub_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_s8(
+// NYI:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[SUB_I]]
+// int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vsubq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_s16(
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vsubq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_s32(
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vsubq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_s64(
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vsubq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_f32(
+// NYI:   [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2
+// NYI:   ret <4 x float> [[SUB_I]]
+// float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vsubq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_f64(
+// NYI:   [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2
+// NYI:   ret <2 x double> [[SUB_I]]
+// float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vsubq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_u8(
+// NYI:   [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[SUB_I]]
+// uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vsubq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_u16(
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vsubq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_u32(
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vsubq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vsubq_u64(
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vsubq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_s8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[MUL_I]]
+// int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) {
+//   return vmul_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_s16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[MUL_I]]
+// int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) {
+//   return vmul_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_s32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[MUL_I]]
+// int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) {
+//   return vmul_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2
+// NYI:   ret <2 x float> [[MUL_I]]
+// float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) {
+//   return vmul_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_u8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2
+// NYI:   ret <8 x i8> [[MUL_I]]
+// uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vmul_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_u16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2
+// NYI:   ret <4 x i16> [[MUL_I]]
+// uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vmul_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_u32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2
+// NYI:   ret <2 x i32> [[MUL_I]]
+// uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vmul_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_s8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[MUL_I]]
+// int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vmulq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_s16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[MUL_I]]
+// int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vmulq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_s32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[MUL_I]]
+// int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vmulq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_u8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2
+// NYI:   ret <16 x i8> [[MUL_I]]
+// uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vmulq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_u16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2
+// NYI:   ret <8 x i16> [[MUL_I]]
+// uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vmulq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_u32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2
+// NYI:   ret <4 x i32> [[MUL_I]]
+// uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vmulq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2
+// NYI:   ret <4 x float> [[MUL_I]]
+// float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vmulq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2
+// NYI:   ret <2 x double> [[MUL_I]]
+// float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vmulq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmul_p8(
+// NYI:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2)
+// NYI:   ret <8 x i8> [[VMUL_V_I]]
+// poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) {
+//   return vmul_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmulq_p8(
+// NYI:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2)
+// NYI:   ret <16 x i8> [[VMULQ_V_I]]
+// poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) {
+//   return vmulq_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vmla_s8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
+// NYI:   ret <8 x i8> [[ADD_I]]
+// int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+//   return vmla_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_s16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+//   return (int8x8_t)vmla_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_s32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
+// NYI:   ret <2 x i32> [[ADD_I]]
+// int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+//   return vmla_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]]
+// NYI:   ret <2 x float> [[ADD_I]]
+// float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+//   return vmla_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_u8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]]
+// NYI:   ret <8 x i8> [[ADD_I]]
+// uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+//   return vmla_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_u16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]]
+// NYI:   ret <4 x i16> [[ADD_I]]
+// uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+//   return vmla_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmla_u32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]]
+// NYI:   ret <2 x i32> [[ADD_I]]
+// uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+//   return vmla_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_s8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
+// NYI:   ret <16 x i8> [[ADD_I]]
+// int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+//   return vmlaq_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_s16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+//   return vmlaq_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_s32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+//   return vmlaq_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]]
+// NYI:   ret <4 x float> [[ADD_I]]
+// float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+//   return vmlaq_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_u8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]]
+// NYI:   ret <16 x i8> [[ADD_I]]
+// uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+//   return vmlaq_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_u16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+//   return vmlaq_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_u32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+//   return vmlaq_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlaq_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
+// NYI:   [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]]
+// NYI:   ret <2 x double> [[ADD_I]]
+// float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+//   return vmlaq_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_s8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
+// NYI:   ret <8 x i8> [[SUB_I]]
+// int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+//   return vmls_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_s16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+//   return (int8x8_t)vmls_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_s32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
+// NYI:   ret <2 x i32> [[SUB_I]]
+// int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+//   return vmls_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]]
+// NYI:   ret <2 x float> [[SUB_I]]
+// float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+//   return vmls_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_u8(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]]
+// NYI:   ret <8 x i8> [[SUB_I]]
+// uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+//   return vmls_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_u16(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]]
+// NYI:   ret <4 x i16> [[SUB_I]]
+// uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+//   return vmls_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmls_u32(
+// NYI:   [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]]
+// NYI:   ret <2 x i32> [[SUB_I]]
+// uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+//   return vmls_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_s8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
+// NYI:   ret <16 x i8> [[SUB_I]]
+// int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+//   return vmlsq_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_s16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+//   return vmlsq_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_s32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+//   return vmlsq_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_f32(
+// NYI:   [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]]
+// NYI:   ret <4 x float> [[SUB_I]]
+// float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+//   return vmlsq_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_u8(
+// NYI:   [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]]
+// NYI:   ret <16 x i8> [[SUB_I]]
+// uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+//   return vmlsq_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_u16(
+// NYI:   [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+//   return vmlsq_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_u32(
+// NYI:   [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+//   return vmlsq_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vmlsq_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3
+// NYI:   [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]]
+// NYI:   ret <2 x double> [[SUB_I]]
+// float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+//   return vmlsq_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfma_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
+// NYI:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1)
+// NYI:   ret <2 x float> [[TMP3]]
+// float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+//   return vfma_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfmaq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
+// NYI:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1)
+// NYI:   ret <4 x float> [[TMP3]]
+// float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+//   return vfmaq_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfmaq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
+// NYI:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1)
+// NYI:   ret <2 x double> [[TMP3]]
+// float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+//   return vfmaq_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfms_f32(
+// NYI:   [[SUB_I:%.*]] = fneg <2 x float> %v2
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8>
+// NYI:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1)
+// NYI:   ret <2 x float> [[TMP3]]
+// float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) {
+//   return vfms_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfmsq_f32(
+// NYI:   [[SUB_I:%.*]] = fneg <4 x float> %v2
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8>
+// NYI:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1)
+// NYI:   ret <4 x float> [[TMP3]]
+// float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+//   return vfmsq_f32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vfmsq_f64(
+// NYI:   [[SUB_I:%.*]] = fneg <2 x double> %v2
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8>
+// NYI:   [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1)
+// NYI:   ret <2 x double> [[TMP3]]
+// float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) {
+//   return vfmsq_f64(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vdivq_f64(
+// NYI:   [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2
+// NYI:   ret <2 x double> [[DIV_I]]
+// float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vdivq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vdivq_f32(
+// NYI:   [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2
+// NYI:   ret <4 x float> [[DIV_I]]
+// float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vdivq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vdiv_f32(
+// NYI:   [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2
+// NYI:   ret <2 x float> [[DIV_I]]
+// float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) {
+//   return vdiv_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vaba_s8(
+// NYI:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
+// NYI:   ret <8 x i8> [[ADD_I]]
+// int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) {
+//   return vaba_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
+// NYI:   ret <4 x i16> [[ADD_I]]
+// int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) {
+//   return vaba_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
+// NYI:   ret <2 x i32> [[ADD_I]]
+// int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) {
+//   return vaba_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_u8(
+// NYI:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3)
+// NYI:   [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]]
+// NYI:   ret <8 x i8> [[ADD_I]]
+// uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+//   return vaba_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3)
+// NYI:   [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]]
+// NYI:   ret <4 x i16> [[ADD_I]]
+// uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+//   return vaba_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vaba_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3)
+// NYI:   [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]]
+// NYI:   ret <2 x i32> [[ADD_I]]
+// uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+//   return vaba_u32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_s8(
+// NYI:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
+// NYI:   ret <16 x i8> [[ADD_I]]
+// int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) {
+//   return vabaq_s8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) {
+//   return vabaq_s16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) {
+//   return vabaq_s32(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_u8(
+// NYI:   [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3)
+// NYI:   [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]]
+// NYI:   ret <16 x i8> [[ADD_I]]
+// uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+//   return vabaq_u8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3)
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+//   return vabaq_u16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vabaq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3)
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+//   return vabaq_u32(v1, v2, v3);
+// }
+
+int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) {
+  return vabd_s8(v1, v2);
+
+  // CIR-LABEL: vabd_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>)
+
+  // LLVM: {{.*}}test_vabd_s8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+  // LLVM: ret <8 x i8> [[VABD_I]]
+}
+
+int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) {
+  return vabd_s16(v1, v2);
+
+  // CIR-LABEL: vabd_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>)
+
+  // LLVM: {{.*}}test_vabd_s16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+}
+
+int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) {
+  return vabd_s32(v1, v2);
+
+  // CIR-LABEL: vabd_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>)
+
+  // LLVM: {{.*}}test_vabd_s32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+}
+
+uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) {
+  return vabd_u8(v1, v2);
+
+  // CIR-LABEL: vabd_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>)
+
+  // LLVM: {{.*}}test_vabd_u8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+}
+
+uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) {
+  return vabd_u16(v1, v2);
+
+  // CIR-LABEL: vabd_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>)
+
+  // LLVM: {{.*}}test_vabd_u16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+}
+
+uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) {
+  return vabd_u32(v1, v2);
+
+  // CIR-LABEL: vabd_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>)
+
+  // LLVM: {{.*}}test_vabd_u32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+}
+
+float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) {
+  return vabd_f32(v1, v2);
+
+  // CIR-LABEL: vabd_f32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!cir.float x 2>, !cir.vector<!cir.float x 2>)
+
+  // LLVM: {{.*}}test_vabd_f32(<2 x float>{{.*}}[[V1:%.*]], <2 x float>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_F:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> {{.*}}, <2 x float> {{.*}})
+}
+
+int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) {
+  return vabdq_s8(v1, v2);
+
+  // CIR-LABEL: vabdq_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>)
+
+  // LLVM: {{.*}}test_vabdq_s8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+}
+
+int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) {
+  return vabdq_s16(v1, v2);
+
+  // CIR-LABEL: vabdq_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>)
+
+  // LLVM: {{.*}}test_vabdq_s16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> {{.*}}, <8 x i16> {{.*}})
+}
+
+int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) {
+  return vabdq_s32(v1, v2);
+
+  // CIR-LABEL: vabdq_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>)
+
+  // LLVM: {{.*}}test_vabdq_s32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> {{.*}}, <4 x i32> {{.*}})
+}
+
+uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) {
+  return vabdq_u8(v1, v2);
+
+  // CIR-LABEL: vabdq_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>)
+
+  // LLVM: {{.*}}test_vabdq_u8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+}
+
+uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) {
+  return vabdq_u16(v1, v2);
+
+  // CIR-LABEL: vabdq_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>)
+
+  // LLVM: {{.*}}test_vabdq_u16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> {{.*}}, <8 x i16> {{.*}})
+}
+
+uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) {
+  return vabdq_u32(v1, v2);
+
+  // CIR-LABEL: vabdq_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>)
+
+  // LLVM: {{.*}}test_vabdq_u32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> {{.*}}, <4 x i32> {{.*}})
+}
+
+float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) {
+  return vabdq_f32(v1, v2);
+
+  // CIR-LABEL: vabdq_f32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!cir.float x 4>, !cir.vector<!cir.float x 4>)
+
+  // LLVM: {{.*}}test_vabdq_f32(<4 x float>{{.*}}[[V1:%.*]], <4 x float>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_F:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}})
+}
+
+float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) {
+  return vabdq_f64(v1, v2);
+
+  // CIR-LABEL: vabdq_f64
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fabd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>)
+
+  // LLVM: {{.*}}test_vabdq_f64(<2 x double>{{.*}}[[V1:%.*]], <2 x double>{{.*}}[[V2:%.*]])
+  // LLVM: [[VABD_F:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> {{.*}}, <2 x double> {{.*}})
+}
+
+int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) {
+  return vbsl_s8(v1, v2, v3);
+
+  // CIR-LABEL: vbsl_s8
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!s8i x 8>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!s8i x 8>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}test_vbsl_s8(<8 x i8>{{.*}}[[v1:%.*]], <8 x i8>{{.*}}[[v2:%.*]], <8 x i8>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL_I:%.*]] = and <8 x i8> [[v1]], [[v2]]
+  // LLVM:   [[TMP0:%.*]] = xor <8 x i8> [[v1]], splat (i8 -1)
+  // LLVM:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], [[v3]]
+  // LLVM:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+  // LLVM:   ret <8 x i8> [[VBSL2_I]]
+}
+
+int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) {
+  return (int8x8_t)vbsl_s16(v1, v2, v3);
+
+  // CIR-LABEL: vbsl_s16
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!s16i x 4>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!s16i x 4>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vbsl_s16(<4 x i16>{{.*}}[[v1:%.*]], <4 x i16>{{.*}}[[v2:%.*]], <4 x i16>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <4 x i16>
+  // LLVM:   [[TMP3:%.*]] = xor <4 x i16> {{.*}}, splat (i16 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+  // LLVM:   [[TMP4:%.*]] = bitcast <4 x i16> {{.*}} to <8 x i8>
+  // LLVM:   ret <8 x i8> [[TMP4]]
+}
+
+int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) {
+  return vbsl_s32(v1, v2, v3);
+
+  // CIR-LABEL: vbsl_s32
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!s32i x 2>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!s32i x 2>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vbsl_s32(<2 x i32>{{.*}}[[v1:%.*]], <2 x i32>{{.*}}[[v2:%.*]], <2 x i32>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <2 x i32>
+  // LLVM:   [[TMP3:%.*]] = xor <2 x i32> {{.*}}, splat (i32 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+}
+
+int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) {
+  return vbsl_s64(v1, v2, v3);
+
+  // CIR-LABEL: vbsl_s64
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!s64i x 1>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!s64i x 1>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}test_vbsl_s64(<1 x i64>{{.*}}[[v1:%.*]], <1 x i64>{{.*}}[[v2:%.*]], <1 x i64>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <1 x i64>
+  // LLVM:   [[TMP3:%.*]] = xor <1 x i64> {{.*}}, splat (i64 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+}
+
+uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) {
+  return vbsl_u8(v1, v2, v3);
+
+  // CIR-LABEL: vbsl_u8
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!u8i x 8>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!u8i x 8>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}test_vbsl_u8(<8 x i8>{{.*}}[[v1:%.*]], <8 x i8>{{.*}}[[v2:%.*]], <8 x i8>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <8 x i8>
+  // LLVM:   [[TMP3:%.*]] = xor <8 x i8> {{.*}}, splat (i8 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <8 x i8> [[TMP3]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <8 x i8> [[VBSL3_I]], [[VBSL4_I]]
+}
+
+uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) {
+  return vbsl_u16(v1, v2, v3);
+
+  // CIR-LABEL: vbsl_u16
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!u16i x 4>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!u16i x 4>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vbsl_u16(<4 x i16>{{.*}}[[v1:%.*]], <4 x i16>{{.*}}[[v2:%.*]], <4 x i16>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <4 x i16>
+  // LLVM:   [[TMP3:%.*]] = xor <4 x i16> {{.*}}, splat (i16 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+}
+
+
+uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) {
+  return vbsl_u32(v1, v2, v3);
+
+  // CIR-LABEL: vbsl_u32
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!u32i x 2>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!u32i x 2>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vbsl_u32(<2 x i32>{{.*}}[[v1:%.*]], <2 x i32>{{.*}}[[v2:%.*]], <2 x i32>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <2 x i32>
+  // LLVM:   [[TMP3:%.*]] = xor <2 x i32> {{.*}}, splat (i32 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+}
+
+uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) {
+  return vbsl_u64(v1, v2, v3);
+
+  // CIR-LABEL: vbsl_u64
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!u64i x 1>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!u64i x 1>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}test_vbsl_u64(<1 x i64>{{.*}}[[v1:%.*]], <1 x i64>{{.*}}[[v2:%.*]], <1 x i64>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <1 x i64>
+  // LLVM:   [[TMP3:%.*]] = xor <1 x i64> {{.*}}, splat (i64 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+}
+
+float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) {
+  return vbsl_f32(v1, v2, v3);
+
+  // CIR-LABEL: test_vbsl_f32
+
+  // LLVM: {{.*}}test_vbsl_f32(<2 x i32>{{.*}}[[v1:%.*]], <2 x float>{{.*}}[[v2:%.*]], <2 x float>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <2 x i32>
+  // LLVM:   [[TMP4:%.*]] = xor <2 x i32> {{.*}}, splat (i32 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]]
+  // LLVM:   [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]]
+  // LLVM:   [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float>
+}
+
+float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) {
+  return vbsl_f64(v1, v2, v3);
+
+  // CIR-LABEL: test_vbsl_f64
+
+  // LLVM: {{.*}}test_vbsl_f64(<1 x i64>{{.*}}[[v1:%.*]], <1 x double>{{.*}}[[v2:%.*]], <1 x double>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <1 x i64>
+  // LLVM:   [[TMP4:%.*]] = xor <1 x i64> {{.*}}, splat (i64 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP4]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
+  // LLVM:   [[TMP5:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double>
+}
+
+// NYI-LABEL: @test_vbsl_p8(
+// NYI:   [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP0:%.*]] = xor <8 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// NYI:   [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3
+// NYI:   [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]]
+// NYI:   ret <8 x i8> [[VBSL2_I]]
+// poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) {
+//   return vbsl_p8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbsl_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <4 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <4 x i16> [[VBSL5_I]]
+// poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) {
+//   return vbsl_p16(v1, v2, v3);
+// }
+
+int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) {
+  return vbslq_s8(v1, v2, v3);
+
+  // CIR-LABEL: vbslq_s8
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!s8i x 16>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!s8i x 16>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}test_vbslq_s8(<16 x i8>{{.*}}[[v1:%.*]], <16 x i8>{{.*}}[[v2:%.*]], <16 x i8>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL_I:%.*]] = and <16 x i8> [[v1]], [[v2]]
+  // LLVM:   [[TMP0:%.*]] = xor <16 x i8> [[v1]], splat (i8 -1)
+  // LLVM:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], [[v3]]
+  // LLVM:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+  // LLVM:   ret <16 x i8> [[VBSL2_I]]
+}
+
+int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) {
+  return vbslq_s16(v1, v2, v3);
+
+  // CIR-LABEL: vbslq_s16
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!s16i x 8>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!s16i x 8>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vbslq_s16(<8 x i16>{{.*}}[[v1:%.*]], <8 x i16>{{.*}}[[v2:%.*]], <8 x i16>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL_I:%.*]] = and <8 x i16>
+  // LLVM:   [[TMP0:%.*]] = xor <8 x i16> {{.*}}, splat (i16 -1)
+  // LLVM:   [[VBSL1_I:%.*]] = and <8 x i16> [[TMP0]],
+  // LLVM:   [[VBSL2_I:%.*]] = or <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+}
+
+int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) {
+  return vbslq_s32(v1, v2, v3);
+
+  // CIR-LABEL: vbslq_s32
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!s32i x 4>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!s32i x 4>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vbslq_s32(<4 x i32>{{.*}}[[v1:%.*]], <4 x i32>{{.*}}[[v2:%.*]], <4 x i32>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL_I:%.*]] = and <4 x i32>
+  // LLVM:   [[TMP0:%.*]] = xor <4 x i32> {{.*}}, splat (i32 -1)
+  // LLVM:   [[VBSL1_I:%.*]] = and <4 x i32> [[TMP0]],
+  // LLVM:   [[VBSL2_I:%.*]] = or <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+}
+
+int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) {
+  return vbslq_s64(v1, v2, v3);
+
+  // CIR-LABEL: vbslq_s64
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!s64i x 2>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!s64i x 2>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vbslq_s64(<2 x i64>{{.*}}[[v1:%.*]], <2 x i64>{{.*}}[[v2:%.*]], <2 x i64>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL_I:%.*]] = and <2 x i64>
+  // LLVM:   [[TMP0:%.*]] = xor <2 x i64> {{.*}}, splat (i64 -1)
+  // LLVM:   [[VBSL1_I:%.*]] = and <2 x i64> [[TMP0]],
+  // LLVM:   [[VBSL2_I:%.*]] = or <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+}
+
+uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) {
+  return vbslq_u8(v1, v2, v3);
+
+  // CIR-LABEL: vbslq_u8
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!u8i x 16>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!u8i x 16>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}test_vbslq_u8(<16 x i8>{{.*}}[[v1:%.*]], <16 x i8>{{.*}}[[v2:%.*]], <16 x i8>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL_I:%.*]] = and <16 x i8>
+  // LLVM:   [[TMP0:%.*]] = xor <16 x i8> {{.*}}, splat (i8 -1)
+  // LLVM:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]],
+  // LLVM:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+}
+
+uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) {
+  return vbslq_u16(v1, v2, v3);
+
+  // CIR-LABEL: vbslq_u16
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!u16i x 8>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!u16i x 8>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vbslq_u16(<8 x i16>{{.*}}[[v1:%.*]], <8 x i16>{{.*}}[[v2:%.*]], <8 x i16>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL_I:%.*]] = and <8 x i16>
+  // LLVM:   [[TMP0:%.*]] = xor <8 x i16> {{.*}}, splat (i16 -1)
+  // LLVM:   [[VBSL1_I:%.*]] = and <8 x i16> [[TMP0]],
+  // LLVM:   [[VBSL2_I:%.*]] = or <8 x i16> [[VBSL_I]], [[VBSL1_I]]
+}
+
+uint32x4_t test_vbslq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) {
+  return vbslq_u32(v1, v2, v3);
+
+  // CIR-LABEL: vbslq_u32
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!u32i x 4>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!u32i x 4>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vbslq_u32(<4 x i32>{{.*}}[[v1:%.*]], <4 x i32>{{.*}}[[v2:%.*]], <4 x i32>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL_I:%.*]] = and <4 x i32>
+  // LLVM:   [[TMP0:%.*]] = xor <4 x i32> {{.*}}, splat (i32 -1)
+  // LLVM:   [[VBSL1_I:%.*]] = and <4 x i32> [[TMP0]],
+  // LLVM:   [[VBSL2_I:%.*]] = or <4 x i32> [[VBSL_I]], [[VBSL1_I]]
+}
+
+uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) {
+  return vbslq_u64(v1, v2, v3);
+
+  // CIR-LABEL: vbslq_u64
+  // CIR: [[VBSL_I:%.*]] = cir.binop(and, [[v1:%.*]], [[v2:%.*]]) : !cir.vector<!u64i x 2>
+  // CIR: [[TMP0:%.*]] = cir.unary(not, [[v1]]) : !cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>
+  // CIR: [[VBSL1_I:%.*]] = cir.binop(and, [[TMP0]], [[v3:%.*]]) : !cir.vector<!u64i x 2>
+  // CIR: [[VBSL2_I:%.*]] = cir.binop(or, [[VBSL_I]], [[VBSL1_I]]) : !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vbslq_u64(<2 x i64>{{.*}}[[v1:%.*]], <2 x i64>{{.*}}[[v2:%.*]], <2 x i64>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL_I:%.*]] = and <2 x i64>
+  // LLVM:   [[TMP0:%.*]] = xor <2 x i64> {{.*}}, splat (i64 -1)
+  // LLVM:   [[VBSL1_I:%.*]] = and <2 x i64> [[TMP0]],
+  // LLVM:   [[VBSL2_I:%.*]] = or <2 x i64> [[VBSL_I]], [[VBSL1_I]]
+}
+
+float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) {
+  return vbslq_f32(v1, v2, v3);
+
+  // CIR-LABEL: test_vbslq_f32
+
+  // LLVM: {{.*}}test_vbslq_f32(<4 x i32>{{.*}}[[v1:%.*]], <4 x float>{{.*}}[[v2:%.*]], <4 x float>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <4 x i32>
+  // LLVM:   [[TMP4:%.*]] = xor <4 x i32> {{.*}}, splat (i32 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <4 x i32> [[TMP4]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]]
+  // LLVM:   [[TMP5:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float>
+}
+
+float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) {
+  return vbslq_f64(v1, v2, v3);
+
+  // CIR-LABEL: test_vbslq_f64
+
+  // LLVM: {{.*}}test_vbslq_f64(<2 x i64>{{.*}}[[v1:%.*]], <2 x double>{{.*}}[[v2:%.*]], <2 x double>{{.*}}[[v3:%.*]])
+  // LLVM:   [[VBSL3_I:%.*]] = and <2 x i64>
+  // LLVM:   [[TMP4:%.*]] = xor <2 x i64> {{.*}}, splat (i64 -1)
+  // LLVM:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP4]],
+  // LLVM:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
+  // LLVM:   [[TMP5:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double>
+}
+
+// NYI-LABEL: @test_vbslq_p8(
+// NYI:   [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP0:%.*]] = xor <16 x i8> %v1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// NYI:   [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3
+// NYI:   [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]]
+// NYI:   ret <16 x i8> [[VBSL2_I]]
+// poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) {
+//   return vbslq_p8(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vbslq_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8>
+// NYI:   [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = xor <8 x i16> %v1, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// NYI:   [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3
+// NYI:   [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// NYI:   ret <8 x i16> [[VBSL5_I]]
+// poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) {
+//   return vbslq_p16(v1, v2, v3);
+// }
+
+// NYI-LABEL: @test_vrecps_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2)
+// NYI:   ret <2 x float> [[VRECPS_V2_I]]
+// float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) {
+//   return vrecps_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrecpsq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2)
+// NYI:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x float> [[VRECPSQ_V2_I]]
+// float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vrecpsq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrecpsq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2)
+// NYI:   [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x double> [[VRECPSQ_V2_I]]
+// float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vrecpsq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrsqrts_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2)
+// NYI:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
+// NYI:   ret <2 x float> [[VRSQRTS_V2_I]]
+// float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) {
+//   return vrsqrts_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrsqrtsq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2)
+// NYI:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x float> [[VRSQRTSQ_V2_I]]
+// float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vrsqrtsq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vrsqrtsq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2)
+// NYI:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x double> [[VRSQRTSQ_V2_I]]
+// float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vrsqrtsq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcage_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
+// NYI:   ret <2 x i32> [[VCAGE_V2_I]]
+// uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcage_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcage_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x i64> [[VCAGE_V2_I]]
+// uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) {
+//   return vcage_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcageq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
+// NYI:   ret <4 x i32> [[VCAGEQ_V2_I]]
+// uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcageq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcageq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
+// NYI:   ret <2 x i64> [[VCAGEQ_V2_I]]
+// uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcageq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcagt_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2)
+// NYI:   ret <2 x i32> [[VCAGT_V2_I]]
+// uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcagt_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcagt_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x i64> [[VCAGT_V2_I]]
+// uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) {
+//   return vcagt_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcagtq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2)
+// NYI:   ret <4 x i32> [[VCAGTQ_V2_I]]
+// uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcagtq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcagtq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2)
+// NYI:   ret <2 x i64> [[VCAGTQ_V2_I]]
+// uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcagtq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcale_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
+// NYI:   ret <2 x i32> [[VCALE_V2_I]]
+// uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcale_f32(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcale_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
+// NYI:   ret <1 x i64> [[VCALE_V2_I]]
+// uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) {
+//   return vcale_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcaleq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
+// NYI:   ret <4 x i32> [[VCALEQ_V2_I]]
+// uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcaleq_f32(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcaleq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
+// NYI:   ret <2 x i64> [[VCALEQ_V2_I]]
+// uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcaleq_f64(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcalt_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8>
+// NYI:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1)
+// NYI:   ret <2 x i32> [[VCALT_V2_I]]
+// uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcalt_f32(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcalt_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a)
+// NYI:   ret <1 x i64> [[VCALT_V2_I]]
+// uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) {
+//   return vcalt_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcaltq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8>
+// NYI:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1)
+// NYI:   ret <4 x i32> [[VCALTQ_V2_I]]
+// uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcaltq_f32(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vcaltq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8>
+// NYI:   [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1)
+// NYI:   ret <2 x i64> [[VCALTQ_V2_I]]
+// uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcaltq_f64(v1, v2);
+//   // Using registers other than v0, v1 are possible, but would be odd.
+// }
+
+// NYI-LABEL: @test_vtst_s8(
+// NYI:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VTST_I]]
+// uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) {
+//   return vtst_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VTST_I]]
+// uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) {
+//   return vtst_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VTST_I]]
+// uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) {
+//   return vtst_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_u8(
+// NYI:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VTST_I]]
+// uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vtst_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VTST_I]]
+// uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vtst_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <2 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VTST_I]]
+// uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vtst_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_s8(
+// NYI:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// NYI:   ret <16 x i8> [[VTST_I]]
+// uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vtstq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VTST_I]]
+// uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vtstq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VTST_I]]
+// uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vtstq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_u8(
+// NYI:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// NYI:   ret <16 x i8> [[VTST_I]]
+// uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vtstq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VTST_I]]
+// uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vtstq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i32> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VTST_I]]
+// uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vtstq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VTST_I]]
+// uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vtstq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <2 x i64> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VTST_I]]
+// uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vtstq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_p8(
+// NYI:   [[TMP0:%.*]] = and <8 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VTST_I]]
+// uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) {
+//   return vtst_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <4 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VTST_I]]
+// uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) {
+//   return vtst_p16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_p8(
+// NYI:   [[TMP0:%.*]] = and <16 x i8> %v1, %v2
+// NYI:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
+// NYI:   ret <16 x i8> [[VTST_I]]
+// uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) {
+//   return vtstq_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtstq_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8>
+// NYI:   [[TMP2:%.*]] = and <8 x i16> %v1, %v2
+// NYI:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VTST_I]]
+// uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) {
+//   return vtstq_p16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vtst_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <1 x i64> %a, %b
+// NYI:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// NYI:   ret <1 x i64> [[VTST_I]]
+// uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) {
+//   return vtst_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vtst_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = and <1 x i64> %a, %b
+// NYI:   [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
+// NYI:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+// NYI:   ret <1 x i64> [[VTST_I]]
+// uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) {
+//   return vtst_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vceq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) {
+//   return vceq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) {
+//   return vceq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) {
+//   return vceq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) {
+//   return vceq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vceq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) {
+//   return vceq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vceq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) {
+//   return vceq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) {
+//   return vceq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vceq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vceq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vceq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vceq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceq_p8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) {
+//   return vceq_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vceqq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vceqq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vceqq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vceqq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vceqq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vceqq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vceqq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_p8(
+// NYI:   [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) {
+//   return vceqq_p8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vceqq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vceqq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vceqq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vceqq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) {
+//   return vcge_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) {
+//   return vcge_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) {
+//   return vcge_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) {
+//   return vcge_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcge_u64(
+// NYI:   [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) {
+//   return vcge_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcge_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcge_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) {
+//   return vcge_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcge_u8(
+// NYI:   [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vcge_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_u16(
+// NYI:   [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vcge_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcge_u32(
+// NYI:   [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vcge_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vcgeq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vcgeq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vcgeq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcgeq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vcgeq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vcgeq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vcgeq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vcgeq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vcgeq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgeq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcgeq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// Notes about vcle:
+// LE condition predicate implemented as GE, so check reversed operands.
+// Using registers other than v0, v1 are possible, but would be odd.
+// uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) {
+//   return vcle_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) {
+//   return vcle_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) {
+//   return vcle_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) {
+//   return vcle_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcle_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) {
+//   return vcle_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcle_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcle_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) {
+//   return vcle_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcle_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vcle_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vcle_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcle_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vcle_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vcleq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vcleq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vcleq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcleq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vcleq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vcleq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vcleq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vcleq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vcleq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcleq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcleq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) {
+//   return vcgt_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) {
+//   return vcgt_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) {
+//   return vcgt_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) {
+//   return vcgt_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgt_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) {
+//   return vcgt_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgt_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) {
+//   return vcgt_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) {
+//   return vcgt_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgt_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vcgt_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vcgt_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgt_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vcgt_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vcgtq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vcgtq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vcgtq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcgtq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vcgtq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vcgtq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vcgtq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vcgtq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vcgtq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcgtq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcgtq_f64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_s8(
+// NYI:   [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// Notes about vclt:
+// LT condition predicate implemented as GT, so check reversed operands.
+// Using registers other than v0, v1 are possible, but would be odd.
+// uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) {
+//   return vclt_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_s16(
+// NYI:   [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) {
+//   return vclt_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_s32(
+// NYI:   [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) {
+//   return vclt_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_s64(
+// NYI:   [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) {
+//   return vclt_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vclt_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) {
+//   return vclt_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vclt_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) {
+//   return vclt_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b
+// NYI:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
+// NYI:   ret <1 x i64> [[SEXT_I]]
+// uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) {
+//   return vclt_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vclt_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[SEXT_I]]
+// uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) {
+//   return vclt_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[SEXT_I]]
+// uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) {
+//   return vclt_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vclt_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[SEXT_I]]
+// uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) {
+//   return vclt_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_s8(
+// NYI:   [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) {
+//   return vcltq_s8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_s16(
+// NYI:   [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) {
+//   return vcltq_s16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_s32(
+// NYI:   [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) {
+//   return vcltq_s32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_f32(
+// NYI:   [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) {
+//   return vcltq_f32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_u8(
+// NYI:   [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
+// NYI:   ret <16 x i8> [[SEXT_I]]
+// uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) {
+//   return vcltq_u8(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_u16(
+// NYI:   [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[SEXT_I]]
+// uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) {
+//   return vcltq_u16(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_u32(
+// NYI:   [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[SEXT_I]]
+// uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) {
+//   return vcltq_u32(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_s64(
+// NYI:   [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) {
+//   return vcltq_s64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_u64(
+// NYI:   [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) {
+//   return vcltq_u64(v1, v2);
+// }
+
+// NYI-LABEL: @test_vcltq_f64(
+// NYI:   [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2
+// NYI:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[SEXT_I]]
+// uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) {
+//   return vcltq_f64(v1, v2);
+// }
+
+int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) {
+  return vhadd_s8(v1, v2);
+
+  // CIR-LABEL: vhadd_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}test_vhadd_s8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> [[V1]], <8 x i8> [[V2]])
+  // LLVM: ret <8 x i8> [[VHADD_V_I]]
+}
+
+int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) {
+  return vhadd_s16(v1, v2);
+
+  // CIR-LABEL: vhadd_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vhadd_s16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
+  // LLVM:  [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+  // LLVM:  [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+}
+
+int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) {
+  return vhadd_s32(v1, v2);
+
+  // CIR-LABEL: vhadd_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vhadd_s32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
+  // LLVM:  [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+  // LLVM:  [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+}
+
+uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) {
+  return vhadd_u8(v1, v2);
+
+  // CIR-LABEL: vhadd_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}test_vhadd_u8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+}
+
+uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) {
+  return vhadd_u16(v1, v2);
+
+  // CIR-LABEL: vhadd_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vhadd_u16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
+  // LLVM:  [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+  // LLVM:  [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
+}
+
+uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) {
+  return vhadd_u32(v1, v2);
+
+  // CIR-LABEL: vhadd_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vhadd_u32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
+  // LLVM:  [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+  // LLVM:  [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
+}
+
+int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) {
+  return vhaddq_s8(v1, v2);
+
+  // CIR-LABEL: vhaddq_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}test_vhaddq_s8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VHADD_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+}
+
+int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) {
+  return vhaddq_s16(v1, v2);
+
+  // CIR-LABEL: vhaddq_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vhaddq_s16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
+  // LLVM:  [[VHADD_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> {{.*}}, <8 x i16> {{.*}})
+  // LLVM:  [[VHADD_V3_I:%.*]] = bitcast <8 x i16> [[VHADD_V2_I]] to <16 x i8>
+}
+
+int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) {
+  return vhaddq_s32(v1, v2);
+
+  // CIR-LABEL: vhaddq_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vhaddq_s32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
+  // LLVM:  [[VHADD_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> {{.*}}, <4 x i32> {{.*}})
+  // LLVM:  [[VHADD_V3_I:%.*]] = bitcast <4 x i32> [[VHADD_V2_I]] to <16 x i8>
+}
+
+uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+  return vhaddq_u8(v1, v2);
+
+  // CIR-LABEL: vhaddq_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}test_vhaddq_u8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VHADD_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+}
+
+uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+  return vhaddq_u16(v1, v2);
+
+  // CIR-LABEL: vhaddq_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vhaddq_u16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
+  // LLVM:  [[VHADD_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> {{.*}}, <8 x i16> {{.*}})
+  // LLVM:  [[VHADD_V3_I:%.*]] = bitcast <8 x i16> [[VHADD_V2_I]] to <16 x i8>
+}
+
+uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+  return vhaddq_u32(v1, v2);
+
+  // CIR-LABEL: vhaddq_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vhaddq_u32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
+  // LLVM:  [[VHADD_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> {{.*}}, <4 x i32> {{.*}})
+  // LLVM:  [[VHADD_V3_I:%.*]] = bitcast <4 x i32> [[VHADD_V2_I]] to <16 x i8>
+}
+
+int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) {
+  return vhsub_s8(v1, v2);
+
+  // CIR-LABEL: vhsub_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_vhsub_s8(<8 x i8>{{.*}}[[v1:%.*]], <8 x i8>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+}
+
+int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) {
+  return vhsub_s16(v1, v2);
+
+  // CIR-LABEL: vhsub_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_vhsub_s16(<4 x i16>{{.*}}[[v1:%.*]], <4 x i16>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+  // LLVM:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+}
+
+int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) {
+  return vhsub_s32(v1, v2);
+
+  // CIR-LABEL: vhsub_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_vhsub_s32(<2 x i32>{{.*}}[[v1:%.*]], <2 x i32>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+  // LLVM:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+}
+
+uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) {
+  return vhsub_u8(v1, v2);
+
+  // CIR-LABEL: vhsub_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_vhsub_u8(<8 x i8>{{.*}}[[v1:%.*]], <8 x i8>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+}
+
+uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) {
+  return vhsub_u16(v1, v2);
+
+  // CIR-LABEL: vhsub_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_vhsub_u16(<4 x i16>{{.*}}[[v1:%.*]], <4 x i16>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16>
+  // LLVM:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
+}
+
+uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) {
+  return vhsub_u32(v1, v2);
+
+  // CIR-LABEL: vhsub_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_vhsub_u32(<2 x i32>{{.*}}[[v1:%.*]], <2 x i32>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32>
+  // LLVM:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
+}
+
+int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) {
+  return vhsubq_s8(v1, v2);
+
+  // CIR-LABEL: vhsubq_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}@test_vhsubq_s8(<16 x i8>{{.*}}[[v1:%.*]], <16 x i8>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8>
+}
+
+int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) {
+  return vhsubq_s16(v1, v2);
+
+  // CIR-LABEL: vhsubq_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}@test_vhsubq_s16(<8 x i16>{{.*}}[[v1:%.*]], <8 x i16>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16>
+  // LLVM:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+}
+
+int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) {
+  return vhsubq_s32(v1, v2);
+
+  // CIR-LABEL: vhsubq_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.shsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}@test_vhsubq_s32(<4 x i32>{{.*}}[[v1:%.*]], <4 x i32>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32>
+  // LLVM:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+}
+
+uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) {
+  return vhsubq_u8(v1, v2);
+
+  // CIR-LABEL: vhsubq_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_vhsubq_u8(<16 x i8>{{.*}}[[v1:%.*]], <16 x i8>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8>
+}
+
+uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) {
+  return vhsubq_u16(v1, v2);
+
+  // CIR-LABEL: vhsubq_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_vhsubq_u16(<8 x i16>{{.*}}[[v1:%.*]], <8 x i16>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16>
+  // LLVM:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
+}
+
+uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) {
+  return vhsubq_u32(v1, v2);
+
+  // CIR-LABEL: vhsubq_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uhsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_vhsubq_u32(<4 x i32>{{.*}}[[v1:%.*]], <4 x i32>{{.*}}[[v2:%.*]])
+  // LLVM:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32>
+  // LLVM:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
+}
+
+int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) {
+  return vrhadd_s8(v1, v2);
+
+  // CIR-LABEL: vrhadd_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_vrhadd_s8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>
+}
+
+int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) {
+  return vrhadd_s16(v1, v2);
+
+  // CIR-LABEL: vrhadd_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_vrhadd_s16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>
+  // LLVM: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+}
+
+int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) {
+  return vrhadd_s32(v1, v2);
+
+  // CIR-LABEL: vrhadd_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_vrhadd_s32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>
+  // LLVM: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+}
+
+uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) {
+  return vrhadd_u8(v1, v2);
+
+  // CIR-LABEL: vrhadd_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_vrhadd_u8(<8 x i8>{{.*}}[[V1:%.*]], <8 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>
+}
+
+uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) {
+  return vrhadd_u16(v1, v2);
+
+  // CIR-LABEL: vrhadd_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_vrhadd_u16(<4 x i16>{{.*}}[[V1:%.*]], <4 x i16>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>
+  // LLVM: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
+}
+
+uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) {
+  return vrhadd_u32(v1, v2);
+
+  // CIR-LABEL: vrhadd_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_vrhadd_u32(<2 x i32>{{.*}}[[V1:%.*]], <2 x i32>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>
+  // LLVM: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
+}
+
+int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) {
+  return vrhaddq_s8(v1, v2);
+
+  // CIR-LABEL: vrhaddq_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}@test_vrhaddq_s8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+  // LLVM: ret <16 x i8> [[VRHADDQ_V_I]]
+}
+
+int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) {
+  return vrhaddq_s16(v1, v2);
+
+  // CIR-LABEL: vrhaddq_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}@test_vrhaddq_s16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>
+  // LLVM: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+}
+
+int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) {
+  return vrhaddq_s32(v1, v2);
+
+  // CIR-LABEL: vrhaddq_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}@test_vrhaddq_s32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>
+  // LLVM: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+}
+
+uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) {
+  return vrhaddq_u8(v1, v2);
+
+  // CIR-LABEL: vrhaddq_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_vrhaddq_u8(<16 x i8>{{.*}}[[V1:%.*]], <16 x i8>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> [[V1]], <16 x i8> [[V2]])
+  // LLVM: ret <16 x i8> [[VRHADDQ_V_I]]
+}
+
+uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) {
+  return vrhaddq_u16(v1, v2);
+
+  // CIR-LABEL: vrhaddq_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_vrhaddq_u16(<8 x i16>{{.*}}[[V1:%.*]], <8 x i16>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>
+  // LLVM: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
+}
+
+uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) {
+  return vrhaddq_u32(v1, v2);
+
+  // CIR-LABEL: vrhaddq_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urhadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_vrhaddq_u32(<4 x i32>{{.*}}[[V1:%.*]], <4 x i32>{{.*}}[[V2:%.*]])
+  // LLVM: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>
+  // LLVM: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
+}
+
+int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
+  return vqadd_s8(a, b);
+  // CIR-LABEL: vqadd_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM-LABEL: @test_vqadd_s8(
+  // LLVM:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %0, <8 x i8> %1)
+  // LLVM:   ret <8 x i8> [[VQADD_V_I]]
+}
+
+  int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
+    return vqadd_s16(a, b);
+    // CIR-LABEL: vqadd_s16
+    // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+    // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+    // LLVM-LABEL: @test_vqadd_s16(
+    // LLVM:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>
+    // LLVM:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
+  }
+
+  int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
+    return vqadd_s32(a, b);
+    // CIR-LABEL: vqadd_s32
+    // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+    // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+    // LLVM-LABEL: @test_vqadd_s32(
+    // LLVM:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>
+    // LLVM:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
+  }
+
+  int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
+    return vqadd_s64(a, b);
+    // CIR-LABEL: vqadd_s64
+    // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+    // CIR-SAME: (!cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
+
+    // LLVM-LABEL: @test_vqadd_s64(
+    // LLVM:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>
+    // LLVM:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
+  }
+
+  uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
+    return vqadd_u8(a, b);
+    // CIR-LABEL: vqadd_u8
+    // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqadd" {{%.*}}, {{%.*}} :
+    // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+    // LLVM-LABEL: @test_vqadd_u8(
+    // LLVM:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %0, <8 x i8> %1)
+    // LLVM:   ret <8 x i8> [[VQADD_V_I]]
+  }
+
+  uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
+    return vqadd_u16(a, b);
+    // CIR-LABEL: vqadd_u16
+    // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqadd" {{%.*}}, {{%.*}} :
+    // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+    // LLVM-LABEL: @test_vqadd_u16(
+    // LLVM:   [[VQADD_V_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+  }
+
+  uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
+    return vqadd_u32(a, b);
+    // CIR-LABEL: vqadd_u32
+    // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqadd" {{%.*}}, {{%.*}} :
+    // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+    // LLVM-LABEL: @test_vqadd_u32(
+    // LLVM:   [[VQADD_V_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+  }
+
+  uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
+    return vqadd_u64(a, b);
+    // CIR-LABEL: vqadd_u64
+    // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uqadd" {{%.*}}, {{%.*}} :
+    // CIR-SAME: (!cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>) -> !cir.vector<!u64i x 1>
+
+    // LLVM-LABEL: @test_vqadd_u64(
+    // LLVM:   [[VQADD_V_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> {{.*}}, <1 x i64> {{.*}})
+  }
+
+// NYI-LABEL: @test_vqaddq_s8(
+// NYI:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQADDQ_V_I]]
+// int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
+//   return vqaddq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQADDQ_V2_I]]
+// int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
+//   return vqaddq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQADDQ_V2_I]]
+// int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
+//   return vqaddq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQADDQ_V2_I]]
+// int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
+//   return vqaddq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_u8(
+// NYI:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQADDQ_V_I]]
+// uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vqaddq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQADDQ_V2_I]]
+// uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vqaddq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQADDQ_V2_I]]
+// uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vqaddq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQADDQ_V2_I]]
+// uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
+//   return vqaddq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_s8(
+// NYI:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQSUB_V_I]]
+// int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
+//   return vqsub_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQSUB_V2_I]]
+// int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
+//   return vqsub_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQSUB_V2_I]]
+// int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
+//   return vqsub_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQSUB_V2_I]]
+// int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
+//   return vqsub_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_u8(
+// NYI:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQSUB_V_I]]
+// uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
+//   return vqsub_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQSUB_V2_I]]
+// uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
+//   return vqsub_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQSUB_V2_I]]
+// uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
+//   return vqsub_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsub_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQSUB_V2_I]]
+// uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
+//   return vqsub_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_s8(
+// NYI:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQSUBQ_V_I]]
+// int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
+//   return vqsubq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQSUBQ_V2_I]]
+// int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
+//   return vqsubq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQSUBQ_V2_I]]
+// int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
+//   return vqsubq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQSUBQ_V2_I]]
+// int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
+//   return vqsubq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_u8(
+// NYI:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQSUBQ_V_I]]
+// uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vqsubq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQSUBQ_V2_I]]
+// uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vqsubq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQSUBQ_V2_I]]
+// uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vqsubq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQSUBQ_V2_I]]
+// uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
+//   return vqsubq_u64(a, b);
+// }
+
+int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
+  return vshl_s8(a, b);
+
+  // CIR-LABEL: vshl_s8
+  // CIR: {{%.*}} = cir.shift(left, {{%.*}} : !cir.vector<!s8i x 8>, {{%.*}} : !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}test_vshl_s8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHL_V_I:%.*]] = shl <8 x i8> {{.*}}, {{.*}}
+  // LLVM:   ret <8 x i8> [[VSHL_V_I]]
+}
+
+int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
+  return vshl_s16(a, b);
+
+  // CIR-LABEL: vshl_s16
+  // CIR: {{%.*}} = cir.shift(left, {{%.*}} : !cir.vector<!s16i x 4>, {{%.*}} : !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vshl_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHL_V2_I:%.*]] = shl <4 x i16>
+}
+
+int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
+  return vshl_s32(a, b);
+
+  // CIR-LABEL: vshl_s32
+  // CIR: {{%.*}} = cir.shift(left, {{%.*}} : !cir.vector<!s32i x 2>, {{%.*}} : !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vshl_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHL_V2_I:%.*]] = shl <2 x i32>
+}
+
+int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
+  return vshl_s64(a, b);
+
+  // CIR-LABEL: vshl_s64
+  // CIR: {{%.*}} = cir.shift(left, {{%.*}} : !cir.vector<!s64i x 1>, {{%.*}} : !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}test_vshl_s64(<1 x i64>{{.*}}[[A:%.*]], <1 x i64>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHL_V2_I:%.*]] = shl <1 x i64>
+}
+
+uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
+  return vshl_u8(a, b);
+
+  // CIR-LABEL: vshl_u8
+  // CIR: {{%.*}} = cir.shift(left, {{%.*}} : !cir.vector<!u8i x 8>, {{%.*}} : !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}test_vshl_u8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHL_V_I:%.*]] = shl <8 x i8>
+}
+
+uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
+  return vshl_u16(a, b);
+
+  // CIR-LABEL: vshl_u16
+  // CIR: {{%.*}} = cir.shift(left, {{%.*}} : !cir.vector<!u16i x 4>, {{%.*}} : !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vshl_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHL_V2_I:%.*]] = shl <4 x i16>
+}
+
+uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
+  return vshl_u32(a, b);
+
+  // CIR-LABEL: vshl_u32
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!u32i x 2>, {{%.*}} : !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vshl_u32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHL_V2_I:%.*]] = shl <2 x i32>
+}
+
+uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
+  return vshl_u64(a, b);
+
+  // CIR-LABEL: vshl_u64
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!u64i x 1>, {{%.*}} : !cir.vector<!u64i x 1>) -> !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}test_vshl_u64(<1 x i64>{{.*}}[[A:%.*]], <1 x i64>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHL_V2_I:%.*]] = shl <1 x i64>
+}
+
+int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
+  return vshlq_s8(a, b);
+
+  // CIR-LABEL: vshlq_s8
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!s8i x 16>, {{%.*}} : !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}test_vshlq_s8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHLQ_V_I:%.*]] = shl <16 x i8>
+}
+
+int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
+  return vshlq_s16(a, b);
+
+  // CIR-LABEL: vshlq_s16
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!s16i x 8>, {{%.*}} : !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vshlq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHLQ_V2_I:%.*]] = shl <8 x i16>
+}
+
+int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
+  return vshlq_s32(a, b);
+
+  // CIR-LABEL: vshlq_s32
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!s32i x 4>, {{%.*}} : !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vshlq_s32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHLQ_V2_I:%.*]] = shl <4 x i32>
+}
+
+int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
+  return vshlq_s64(a, b);
+
+  // CIR-LABEL: vshlq_s64
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!s64i x 2>, {{%.*}} : !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vshlq_s64(<2 x i64>{{.*}}[[A:%.*]], <2 x i64>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHLQ_V2_I:%.*]] = shl <2 x i64>
+}
+
+uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
+  return vshlq_u8(a, b);
+
+  // CIR-LABEL: vshlq_u8
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!u8i x 16>, {{%.*}} : !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}test_vshlq_u8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHLQ_V_I:%.*]] = shl <16 x i8>
+}
+
+uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
+  return vshlq_u16(a, b);
+
+  // CIR-LABEL: vshlq_u16
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!u16i x 8>, {{%.*}} : !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vshlq_u16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHLQ_V2_I:%.*]] = shl <8 x i16>
+}
+
+uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
+  return vshlq_u32(a, b);
+
+  // CIR-LABEL: vshlq_u32
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!u32i x 4>, {{%.*}} : !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vshlq_u32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHLQ_V2_I:%.*]] = shl <4 x i32>
+}
+
+uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
+  return vshlq_u64(a, b);
+
+  // CIR-LABEL: vshlq_u64
+  // CIR: cir.shift(left, {{%.*}} : !cir.vector<!u64i x 2>, {{%.*}} : !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vshlq_u64(<2 x i64>{{.*}}[[A:%.*]], <2 x i64>{{.*}}[[B:%.*]])
+  // LLVM:   [[VSHLQ_V2_I:%.*]] = shl <2 x i64>
+}
+
+// NYI-LABEL: @test_vqshl_s8(
+// NYI:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQSHL_V_I]]
+// int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
+//   return vqshl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQSHL_V2_I]]
+// int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
+//   return vqshl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQSHL_V2_I]]
+// int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
+//   return vqshl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQSHL_V2_I]]
+// int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
+//   return vqshl_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_u8(
+// NYI:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQSHL_V_I]]
+// uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
+//   return vqshl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQSHL_V2_I]]
+// uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
+//   return vqshl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQSHL_V2_I]]
+// uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
+//   return vqshl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshl_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQSHL_V2_I]]
+// uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
+//   return vqshl_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_s8(
+// NYI:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQSHLQ_V_I]]
+// int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
+//   return vqshlq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQSHLQ_V2_I]]
+// int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
+//   return vqshlq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQSHLQ_V2_I]]
+// int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
+//   return vqshlq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQSHLQ_V2_I]]
+// int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
+//   return vqshlq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_u8(
+// NYI:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQSHLQ_V_I]]
+// uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
+//   return vqshlq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQSHLQ_V2_I]]
+// uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
+//   return vqshlq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQSHLQ_V2_I]]
+// uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
+//   return vqshlq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQSHLQ_V2_I]]
+// uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
+//   return vqshlq_u64(a, b);
+// }
+
+int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
+  return vrshl_s8(a, b);
+
+  // CIR-LABEL: vrshl_s8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}test_vrshl_s8(<8 x i8>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+  // LLVM:   ret <8 x i8> [[VRSHL_V_I]]
+}
+
+int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
+  return vrshl_s16(a, b);
+
+  // CIR-LABEL: vrshl_s16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vrshl_s16(<4 x i16>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>
+  // LLVM:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
+}
+
+int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
+  return vrshl_s32(a, b);
+
+  // CIR-LABEL: vrshl_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vrshl_s32(<2 x i32>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>
+  // LLVM:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
+}
+
+int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
+  return vrshl_s64(a, b);
+
+  // CIR-LABEL: vrshl_s64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}test_vrshl_s64(<1 x i64>{{.*}}[[a:%.*]], <1 x i64>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>
+  // LLVM:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
+}
+
+uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
+  return vrshl_u8(a, b);
+
+  // CIR-LABEL: vrshl_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}test_vrshl_u8(<8 x i8>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]])
+  // LLVM: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>
+}
+
+uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
+  return vrshl_u16(a, b);
+
+  // CIR-LABEL: vrshl_u16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vrshl_u16(<4 x i16>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>
+  // LLVM:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16>
+}
+
+uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
+  return vrshl_u32(a, b);
+
+  // CIR-LABEL: vrshl_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vrshl_u32(<2 x i32>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>
+  // LLVM:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32>
+}
+
+uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
+  return vrshl_u64(a, b);
+
+  // CIR-LABEL: vrshl_u64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u64i x 1>, !cir.vector<!u64i x 1>) -> !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}test_vrshl_u64(<1 x i64>{{.*}}[[a:%.*]], <1 x i64>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>
+  // LLVM:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64>
+}
+
+int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
+  return vrshlq_s8(a, b);
+
+  // CIR-LABEL: vrshlq_s8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}test_vrshlq_s8(<16 x i8>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHL_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+  // LLVM:   ret <16 x i8> [[VRSHL_V_I]]
+}
+
+int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
+  return vrshlq_s16(a, b);
+
+  // CIR-LABEL: vrshlq_s16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vrshlq_s16(<8 x i16>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>
+  // LLVM:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+}
+
+int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
+  return vrshlq_s32(a, b);
+
+  // CIR-LABEL: vrshlq_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vrshlq_s32(<4 x i32>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>
+  // LLVM:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+}
+
+int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
+  return vrshlq_s64(a, b);
+
+  // CIR-LABEL: vrshlq_s64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vrshlq_s64(<2 x i64>{{.*}}[[a:%.*]], <2 x i64>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>
+  // LLVM:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+}
+
+uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
+  return vrshlq_u8(a, b);
+
+  // CIR-LABEL: vrshlq_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}test_vrshlq_u8(<16 x i8>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+  // LLVM:   ret <16 x i8> [[VRSHLQ_V_I]]
+}
+
+uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
+  return vrshlq_u16(a, b);
+
+  // CIR-LABEL: vrshlq_u16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vrshlq_u16(<8 x i16>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>
+  // LLVM:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
+}
+
+uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
+  return vrshlq_u32(a, b);
+
+  // CIR-LABEL: vrshlq_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vrshlq_u32(<4 x i32>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>
+  // LLVM:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
+}
+
+uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
+  return vrshlq_u64(a, b);
+
+  // CIR-LABEL: vrshlq_u64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u64i x 2>, !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vrshlq_u64(<2 x i64>{{.*}}[[a:%.*]], <2 x i64>{{.*}}[[b:%.*]])
+  // LLVM:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>
+  // LLVM:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
+}
+
+// NYI-LABEL: @test_vqrshl_s8(
+// NYI:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQRSHL_V_I]]
+// int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
+//   return vqrshl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQRSHL_V2_I]]
+// int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
+//   return vqrshl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQRSHL_V2_I]]
+// int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
+//   return vqrshl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQRSHL_V2_I]]
+// int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
+//   return vqrshl_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_u8(
+// NYI:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VQRSHL_V_I]]
+// uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
+//   return vqrshl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VQRSHL_V2_I]]
+// uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
+//   return vqrshl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VQRSHL_V2_I]]
+// uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
+//   return vqrshl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshl_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQRSHL_V2_I]]
+// uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
+//   return vqrshl_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_s8(
+// NYI:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQRSHLQ_V_I]]
+// int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
+//   return vqrshlq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQRSHLQ_V2_I]]
+// int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
+//   return vqrshlq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQRSHLQ_V2_I]]
+// int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
+//   return vqrshlq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQRSHLQ_V2_I]]
+// int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
+//   return vqrshlq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_u8(
+// NYI:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VQRSHLQ_V_I]]
+// uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
+//   return vqrshlq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VQRSHLQ_V2_I]]
+// uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
+//   return vqrshlq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQRSHLQ_V2_I]]
+// uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
+//   return vqrshlq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQRSHLQ_V2_I]]
+// uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
+//   return vqrshlq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vsli_n_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0)
+// NYI:   ret <1 x i64> [[VSLI_N2]]
+// poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) {
+//   return vsli_n_p64(a, b, 0);
+// }
+
+// NYI-LABEL: @test_vsliq_n_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0)
+// NYI:   ret <2 x i64> [[VSLI_N2]]
+// poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) {
+//   return vsliq_n_p64(a, b, 0);
+// }
+
+int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
+  return vmax_s8(a, b);
+
+  // CIR-LABEL: vmax_s8
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s8i x 8>
+
+  // LLVM-LABEL: test_vmax_s8
+  // LLVM-SAME: (<8 x i8> [[a:%.*]], <8 x i8> [[b:%.*]])
+  // LLVM:    [[VMAX_I:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+  // LLVM:    ret <8 x i8> [[VMAX_I]]
+}
+
+int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
+  return vmax_s16(a, b);
+
+  // CIR-LABEL: vmax_s16
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s16i x 4>
+
+  // LLVM-LABEL: test_vmax_s16
+  // LLVM-SAME: (<4 x i16> [[a:%.*]], <4 x i16> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16>
+}
+
+int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
+  return vmax_s32(a, b);
+
+  // CIR-LABEL: vmax_s32
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s32i x 2>
+
+  // LLVM-LABEL: test_vmax_s32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]], <2 x i32> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32>
+}
+
+uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
+  return vmax_u8(a, b);
+
+  // CIR-LABEL: vmax_u8
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u8i x 8>
+
+  // LLVM-LABEL: test_vmax_u8
+  // LLVM-SAME: (<8 x i8> [[a:%.*]], <8 x i8> [[b:%.*]])
+  // LLVM:    [[VMAX_I:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+  // LLVM:    ret <8 x i8> [[VMAX_I]]
+}
+
+uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
+  return vmax_u16(a, b);
+
+  // CIR-LABEL: vmax_u16
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u16i x 4>
+
+  // LLVM-LABEL: test_vmax_u16
+  // LLVM-SAME: (<4 x i16> [[a:%.*]], <4 x i16> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <4 x i16> @llvm.umax.v4i16(<4 x i16>
+}
+
+uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
+  return vmax_u32(a, b);
+
+  // CIR-LABEL: vmax_u32
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u32i x 2>
+
+  // LLVM-LABEL: test_vmax_u32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]], <2 x i32> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32>
+}
+
+float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
+  return vmax_f32(a, b);
+
+  // CIR-LABEL: vmax_f32
+  // CIR: cir.fmaximum {{%.*}}, {{%.*}} : !cir.vector<!cir.float x 2>
+
+  // LLVM-LABEL: test_vmax_f32
+  // LLVM-SAME: (<2 x float> [[a:%.*]], <2 x float> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float>
+}
+
+int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
+  return vmaxq_s8(a, b);
+
+  // CIR-LABEL: vmaxq_s8
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s8i x 16>
+
+  // LLVM-LABEL: test_vmaxq_s8
+  // LLVM-SAME: (<16 x i8> [[a:%.*]], <16 x i8> [[b:%.*]])
+  // LLVM:    [[VMAX_I:%.*]] = call <16 x i8> @llvm.smax.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+  // LLVM:    ret <16 x i8> [[VMAX_I]]
+}
+
+int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
+  return vmaxq_s16(a, b);
+
+  // CIR-LABEL: vmaxq_s16
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: test_vmaxq_s16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]], <8 x i16> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.smax.v8i16(<8 x i16>
+}
+
+int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
+  return vmaxq_s32(a, b);
+
+  // CIR-LABEL: vmaxq_s32
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: test_vmaxq_s32
+  // LLVM-SAME: (<4 x i32> [[a:%.*]], <4 x i32> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32>
+}
+
+uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
+  return vmaxq_u8(a, b);
+
+  // CIR-LABEL: vmaxq_u8
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u8i x 16>
+
+  // LLVM-LABEL: test_vmaxq_u8
+  // LLVM-SAME: (<16 x i8> [[a:%.*]], <16 x i8> [[b:%.*]])
+  // LLVM:    [[VMAX_I:%.*]] = call <16 x i8> @llvm.umax.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+  // LLVM:    ret <16 x i8> [[VMAX_I]]
+}
+
+uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
+  return vmaxq_u16(a, b);
+
+  // CIR-LABEL: vmaxq_u16
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u16i x 8>
+
+  // LLVM-LABEL: test_vmaxq_u16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]], <8 x i16> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <8 x i16> @llvm.umax.v8i16(<8 x i16>
+}
+
+uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
+  return vmaxq_u32(a, b);
+
+  // CIR-LABEL: vmaxq_u32
+  // CIR: cir.binop(max, {{%.*}}, {{%.*}}) : !cir.vector<!u32i x 4>
+
+  // LLVM-LABEL: test_vmaxq_u32
+  // LLVM-SAME: (<4 x i32> [[a:%.*]], <4 x i32> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32>
+}
+
+float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
+  return vmaxq_f32(a, b);
+
+  // CIR-LABEL: vmaxq_f32
+  // CIR: cir.fmaximum {{%.*}}, {{%.*}} : !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: test_vmaxq_f32
+  // LLVM-SAME: (<4 x float> [[a:%.*]], <4 x float> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <4 x float> @llvm.maximum.v4f32(<4 x float>
+}
+
+float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) {
+  return vmaxq_f64(a, b);
+
+  // CIR-LABEL: vmaxq_f64
+  // CIR: cir.fmaximum {{%.*}}, {{%.*}} : !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: test_vmaxq_f64
+  // LLVM-SAME: (<2 x double> [[a:%.*]], <2 x double> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double>
+}
+
+int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
+  return vmin_s8(a, b);
+
+  // CIR-LABEL: vmin_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.smin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_vmin_s8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+  // LLVM: ret <8 x i8> [[VMIN_I]]
+}
+
+int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
+  return vmin_s16(a, b);
+
+  // CIR-LABEL: vmin_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.smin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_vmin_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>
+}
+
+int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
+  return vmin_s32(a, b);
+
+  // CIR-LABEL: vmin_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.smin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_vmin_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>
+}
+
+uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
+  return vmin_u8(a, b);
+
+  // CIR-LABEL: vmin_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.umin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_vmin_u8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+  // LLVM: ret <8 x i8> [[VMIN_I]]
+}
+
+uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
+  return vmin_u16(a, b);
+
+  // CIR-LABEL: vmin_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.umin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_vmin_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>
+}
+
+uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
+  return vmin_u32(a, b);
+
+  // CIR-LABEL: vmin_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.umin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_vmin_u32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>
+}
+
+float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
+  return vmin_f32(a, b);
+
+  // CIR-LABEL: vmin_f32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fmin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!cir.float x 2>, !cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+
+  // LLVM: {{.*}}@test_vmin_f32(<2 x float>{{.*}}[[A:%.*]], <2 x float>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>
+}
+
+float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) {
+  return vmin_f64(a, b);
+
+  // CIR-LABEL: vmin_f64
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fmin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!cir.double x 1>, !cir.vector<!cir.double x 1>) -> !cir.vector<!cir.double x 1>
+
+  // LLVM: {{.*}}@test_vmin_f64(<1 x double>{{.*}}[[A:%.*]], <1 x double>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double>
+}
+
+int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
+  return vminq_s8(a, b);
+
+  // CIR-LABEL: vminq_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.smin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}@test_vminq_s8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+  // LLVM: ret <16 x i8> [[VMIN_I]]
+}
+
+int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
+  return vminq_s16(a, b);
+
+  // CIR-LABEL: vminq_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.smin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}@test_vminq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>
+}
+
+int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
+  return vminq_s32(a, b);
+
+  // CIR-LABEL: vminq_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.smin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}@test_vminq_s32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>
+}
+
+uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
+  return vminq_u8(a, b);
+
+  // CIR-LABEL: vminq_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.umin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_vminq_u8(<16 x i8>{{.*}}[[A:%.*]], <16 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> {{.*}}, <16 x i8> {{.*}})
+  // LLVM: ret <16 x i8> [[VMIN_I]]
+}
+
+uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
+  return vminq_u16(a, b);
+
+  // CIR-LABEL: vminq_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.umin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_vminq_u16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>
+}
+
+uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
+  return vminq_u32(a, b);
+
+  // CIR-LABEL: vminq_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.umin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_vminq_u32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>
+}
+
+float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) {
+  return vminq_f64(a, b);
+
+  // CIR-LABEL: vminq_f64
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fmin" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM: {{.*}}@test_vminq_f64(<2 x double>{{.*}}[[A:%.*]], <2 x double>{{.*}}[[B:%.*]])
+  // LLVM: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>
+}
+
+// NYI-LABEL: @test_vmaxnm_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VMAXNM2_I]]
+// float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
+//   return vmaxnm_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxnmq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VMAXNM2_I]]
+// float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
+//   return vmaxnmq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmaxnmq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VMAXNM2_I]]
+// float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) {
+//   return vmaxnmq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vminnm_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VMINNM2_I]]
+// float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
+//   return vminnm_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vminnmq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VMINNM2_I]]
+// float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
+//   return vminnmq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vminnmq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VMINNM2_I]]
+// float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) {
+//   return vminnmq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_s8(
+// NYI:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPMAX_I]]
+// int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
+//   return vpmax_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VPMAX2_I]]
+// int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
+//   return vpmax_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VPMAX2_I]]
+// int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
+//   return vpmax_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_u8(
+// NYI:   [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPMAX_I]]
+// uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
+//   return vpmax_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VPMAX2_I]]
+// uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
+//   return vpmax_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VPMAX2_I]]
+// uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
+//   return vpmax_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmax_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VPMAX2_I]]
+// float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
+//   return vpmax_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_s8(
+// NYI:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPMAX_I]]
+// int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) {
+//   return vpmaxq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VPMAX2_I]]
+// int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
+//   return vpmaxq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VPMAX2_I]]
+// int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) {
+//   return vpmaxq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_u8(
+// NYI:   [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPMAX_I]]
+// uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vpmaxq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VPMAX2_I]]
+// uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vpmaxq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VPMAX2_I]]
+// uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vpmaxq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VPMAX2_I]]
+// float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
+//   return vpmaxq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VPMAX2_I]]
+// float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) {
+//   return vpmaxq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_s8(
+// NYI:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPMIN_I]]
+// int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
+//   return vpmin_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VPMIN2_I]]
+// int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
+//   return vpmin_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VPMIN2_I]]
+// int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
+//   return vpmin_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_u8(
+// NYI:   [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPMIN_I]]
+// uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
+//   return vpmin_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VPMIN2_I]]
+// uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
+//   return vpmin_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VPMIN2_I]]
+// uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
+//   return vpmin_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmin_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VPMIN2_I]]
+// float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
+//   return vpmin_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_s8(
+// NYI:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPMIN_I]]
+// int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
+//   return vpminq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VPMIN2_I]]
+// int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
+//   return vpminq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VPMIN2_I]]
+// int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
+//   return vpminq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_u8(
+// NYI:   [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPMIN_I]]
+// uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vpminq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VPMIN2_I]]
+// uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vpminq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VPMIN2_I]]
+// uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vpminq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VPMIN2_I]]
+// float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
+//   return vpminq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VPMIN2_I]]
+// float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
+//   return vpminq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxnm_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VPMAXNM2_I]]
+// float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) {
+//   return vpmaxnm_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxnmq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VPMAXNM2_I]]
+// float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) {
+//   return vpmaxnmq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpmaxnmq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VPMAXNM2_I]]
+// float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) {
+//   return vpmaxnmq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpminnm_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VPMINNM2_I]]
+// float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
+//   return vpminnm_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminnmq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VPMINNM2_I]]
+// float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
+//   return vpminnmq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpminnmq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VPMINNM2_I]]
+// float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
+//   return vpminnmq_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_s8(
+// NYI:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPADD_V_I]]
+// int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
+//   return vpadd_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VPADD_V2_I]]
+// int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
+//   return vpadd_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VPADD_V2_I]]
+// int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
+//   return vpadd_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_u8(
+// NYI:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VPADD_V_I]]
+// uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
+//   return vpadd_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VPADD_V2_I]]
+// uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
+//   return vpadd_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VPADD_V2_I]]
+// uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
+//   return vpadd_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpadd_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
+// NYI:   ret <2 x float> [[VPADD_V2_I]]
+// float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
+//   return vpadd_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_s8(
+// NYI:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPADDQ_V_I]]
+// int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) {
+//   return vpaddq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VPADDQ_V2_I]]
+// int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) {
+//   return vpaddq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VPADDQ_V2_I]]
+// int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) {
+//   return vpaddq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_u8(
+// NYI:   [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VPADDQ_V_I]]
+// uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) {
+//   return vpaddq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <8 x i16> [[VPADDQ_V2_I]]
+// uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) {
+//   return vpaddq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VPADDQ_V2_I]]
+// uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) {
+//   return vpaddq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <4 x float> [[VPADDQ_V2_I]]
+// float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) {
+//   return vpaddq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x double> [[VPADDQ_V2_I]]
+// float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) {
+//   return vpaddq_f64(a, b);
+// }
+
+int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
+  return vqdmulh_s16(a, b);
+
+  // CIR-LABEL: vqdmulh_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmulh" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vqdmulh_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>
+  // LLVM:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
+}
+
+int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
+  return vqdmulh_s32(a, b);
+
+  // CIR-LABEL: vqdmulh_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmulh" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vqdmulh_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>
+  // LLVM:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
+}
+
+int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
+  return vqdmulhq_s16(a, b);
+
+  // CIR-LABEL: vqdmulhq_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmulh" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vqdmulhq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM:   [[VQDMULH_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>
+  // LLVM:   [[VQDMULH_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULH_V2_I]] to <16 x i8>
+}
+
+int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
+  return vqdmulhq_s32(a, b);
+
+  // CIR-LABEL: vqdmulhq_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmulh" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vqdmulhq_s32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
+  // LLVM:   [[VQDMULH_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>
+  // LLVM:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULH_V2_I]] to <16 x i8>
+}
+
+int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
+  return vqrdmulh_s16(a, b);
+
+  // CIR-LABEL: vqrdmulh_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrdmulh" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vqrdmulh_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>
+  // LLVM:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
+}
+
+int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
+  return vqrdmulh_s32(a, b);
+
+  // CIR-LABEL: vqrdmulh_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrdmulh" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vqrdmulh_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>
+  // LLVM:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
+}
+
+int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
+  return vqrdmulhq_s16(a, b);
+
+  // CIR-LABEL: vqrdmulhq_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrdmulh" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vqrdmulhq_s16(<8 x i16>{{.*}}[[A:%.*]], <8 x i16>{{.*}}[[B:%.*]])
+  // LLVM:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>
+  // LLVM:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
+}
+
+int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
+  return vqrdmulhq_s32(a, b);
+
+  // CIR-LABEL: vqrdmulhq_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrdmulh" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vqrdmulhq_s32(<4 x i32>{{.*}}[[A:%.*]], <4 x i32>{{.*}}[[B:%.*]])
+  // LLVM:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>
+  // LLVM:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
+}
+
+// NYI-LABEL: @test_vmulx_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b)
+// NYI:   ret <2 x float> [[VMULX2_I]]
+// float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
+//   return vmulx_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmulxq_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b)
+// NYI:   ret <4 x float> [[VMULX2_I]]
+// float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) {
+//   return vmulxq_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmulxq_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b)
+// NYI:   ret <2 x double> [[VMULX2_I]]
+// float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) {
+//   return vmulxq_f64(a, b);
+// }
+
+
+int8x8_t test_vshl_n_s8(int8x8_t a) {
+  return vshl_n_s8(a, 3);
+
+ // CIR-LABEL: @test_vshl_n_s8
+ // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+ // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]>
+ // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!s8i x 8>, [[AMT]] : !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+ // LLVM: {{.*}}@test_vshl_n_s8(<8 x i8>{{.*}}[[A:%.*]])
+ // LLVM: [[VSHL_N:%.*]] = shl <8 x i8> {{.*}}, splat (i8 3)
+ // LLVM: ret <8 x i8> [[VSHL_N]]
+}
+
+
+int16x4_t test_vshl_n_s16(int16x4_t a) {
+  return vshl_n_s16(a, 3);
+
+ // CIR-LABEL: @test_vshl_n_s16
+ // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i,
+ // CIR-SAME: #cir.int<3> : !s16i]>
+ // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!s16i x 4>, [[AMT]] : !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+ // LLVM: {{.*}}@test_vshl_n_s16(<4 x i16>{{.*}}[[A:%.*]])
+ // LLVM: [[VSHL_N:%.*]] = shl <4 x i16> {{.*}}, splat (i16 3)
+}
+
+int32x2_t test_vshl_n_s32(int32x2_t a) {
+  return vshl_n_s32(a, 3);
+
+  // CIR-LABEL: @test_vshl_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i]>
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!s32i x 2>, [[AMT]] : !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_vshl_n_s32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VSHL_N:%.*]] = shl <2 x i32> {{.*}}, splat (i32 3)
+}
+
+// NYI-LABEL: @test_vshlq_n_s8(
+// NYI:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// NYI:   ret <16 x i8> [[VSHL_N]]
+int8x16_t test_vshlq_n_s8(int8x16_t a) {
+  return vshlq_n_s8(a, 3);
+
+  // CIR-LABEL: @test_vshlq_n_s8
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!s8i x 16>, {{.*}} : !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}@test_vshlq_n_s8(<16 x i8>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <16 x i8> {{.*}}, splat (i8 3)
+  // LLVM:   ret <16 x i8> [[VSHL_N]]
+}
+
+int16x8_t test_vshlq_n_s16(int16x8_t a) {
+  return vshlq_n_s16(a, 3);
+
+  // CIR-LABEL: @test_vshlq_n_s16
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!s16i x 8>, {{.*}} : !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM:   {{.*}}@test_vshlq_n_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <8 x i16> {{.*}}, splat (i16 3)
+}
+
+
+int32x4_t test_vshlq_n_s32(int32x4_t a) {
+  return vshlq_n_s32(a, 3);
+
+  // CIR-LABEL: @test_vshlq_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> :
+  // CIR-SAME: !s32i, #cir.int<3> : !s32i, #cir.int<3> : !s32i]>
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!s32i x 4>, [[AMT]] :
+  // CIR-SAME: !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM:   {{.*}}@test_vshlq_n_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <4 x i32> {{.*}}, splat (i32 3)
+}
+
+int64x2_t test_vshlq_n_s64(int64x2_t a) {
+  return vshlq_n_s64(a, 3);
+
+  // CIR-LABEL: @test_vshlq_n_s64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s64i, #cir.int<3> : !s64i]>
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!s64i x 2>, [[AMT]] :
+  // CIR-SAME: !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM:   {{.*}}@test_vshlq_n_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <2 x i64> {{.*}}, splat (i64 3)
+}
+
+uint8x8_t test_vshl_n_u8(uint8x8_t a) {
+  return vshl_n_u8(a, 3);
+
+  // CIR-LABEL: @test_vshl_n_u8
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!u8i x 8>, {{.*}} :
+  // CIR-SAME: !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM:   {{.*}}@test_vshl_n_u8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <8 x i8> {{.*}}, splat (i8 3)
+  // LLVM:   ret <8 x i8> [[VSHL_N]]
+}
+
+uint16x4_t test_vshl_n_u16(uint16x4_t a) {
+  return vshl_n_u16(a, 3);
+
+  // CIR-LABEL: @test_vshl_n_u16
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!u16i x 4>, {{.*}} :
+  // CIR-SAME: !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM:   {{.*}}@test_vshl_n_u16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <4 x i16> {{.*}}, splat (i16 3)
+}
+
+uint32x2_t test_vshl_n_u32(uint32x2_t a) {
+  return vshl_n_u32(a, 3);
+
+  // CIR-LABEL: @test_vshl_n_u32
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!u32i x 2>, {{.*}} :
+  // CIR-SAME: !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM:   {{.*}}@test_vshl_n_u32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <2 x i32> {{.*}}, splat (i32 3)
+}
+
+uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
+  return vshlq_n_u8(a, 3);
+
+  // CIR-LABEL: @test_vshlq_n_u8
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!u8i x 16>, {{.*}} :
+  // CIR-SAME: !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM:   {{.*}}@test_vshlq_n_u8(<16 x i8>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <16 x i8> {{.*}}, splat (i8 3)
+  // LLVM:   ret <16 x i8> [[VSHL_N]]
+}
+
+uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
+  return vshlq_n_u16(a, 3);
+
+  // CIR-LABEL: @test_vshlq_n_u16
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!u16i x 8>, {{.*}} :
+  // CIR-SAME: !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM:   {{.*}}@test_vshlq_n_u16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <8 x i16> {{.*}}, splat (i16 3)
+}
+
+uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
+  return vshlq_n_u32(a, 3);
+
+  // CIR-LABEL: @test_vshlq_n_u32
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!u32i x 4>, {{.*}} :
+  // CIR-SAME: !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM:   {{.*}}@test_vshlq_n_u32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <4 x i32> {{.*}}, splat (i32 3)
+}
+
+uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
+  return vshlq_n_u64(a, 3);
+
+  // CIR-LABEL: @test_vshlq_n_u64
+  // CIR: {{.*}} = cir.shift(left, {{.*}} : !cir.vector<!u64i x 2>, {{.*}} :
+  // CIR-SAME: !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM:   {{.*}}@test_vshlq_n_u64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHL_N:%.*]] = shl <2 x i64> {{.*}}, splat (i64 3)
+}
+
+int8x8_t test_vshr_n_s8(int8x8_t a) {
+  return vshr_n_s8(a, 3);
+
+  // CIR-LABEL: vshr_n_s8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector<!s8i x 8>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!s8i x 8>, [[AMT]] : !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}test_vshr_n_s8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = ashr <8 x i8> {{.*}}, splat (i8 3)
+  // LLVM:   ret <8 x i8> [[VSHR_N]]
+}
+
+int16x4_t test_vshr_n_s16(int16x4_t a) {
+  return vshr_n_s16(a, 3);
+
+  // CIR-LABEL: vshr_n_s16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i,
+  // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector<!s16i x 4>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!s16i x 4>, [[AMT]] : !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vshr_n_s16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = ashr <4 x i16> {{.*}}, splat (i16 3)
+}
+
+int32x2_t test_vshr_n_s32(int32x2_t a) {
+  return vshr_n_s32(a, 3);
+
+  // CIR-LABEL: vshr_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 2>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!s32i x 2>, [[AMT]] : !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vshr_n_s32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = ashr <2 x i32> {{.*}}, splat (i32 3)
+}
+
+int64x1_t test_vshr_n_s64(int64x1_t a) {
+  return vshr_n_s64(a, 3);
+
+  // CIR-LABEL: vshr_n_s64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s64i]> : !cir.vector<!s64i x 1>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!s64i x 1>, [[AMT]] : !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}test_vshr_n_s64(<1 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VSHR_N:%.*]] = ashr <1 x i64> {{.*}}, splat (i64 3)
+}
+
+int8x16_t test_vshrq_n_s8(int8x16_t a) {
+  return vshrq_n_s8(a, 3);
+
+  // CIR-LABEL: vshrq_n_s8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector<!s8i x 16>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!s8i x 16>, [[AMT]] : !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}test_vshrq_n_s8(<16 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VSHR_N:%.*]] = ashr <16 x i8> {{.*}}, splat (i8 3)
+  // LLVM: ret <16 x i8> [[VSHR_N]]
+}
+
+int16x8_t test_vshrq_n_s16(int16x8_t a) {
+  return vshrq_n_s16(a, 3);
+
+  // CIR-LABEL: vshrq_n_s16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i,
+  // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i,
+  // CIR-SAME: #cir.int<3> : !s16i]> : !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!s16i x 8>, [[AMT]] : !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vshrq_n_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = ashr <8 x i16> {{.*}}, splat (i16 3)
+}
+
+int32x4_t test_vshrq_n_s32(int32x4_t a) {
+  return vshrq_n_s32(a, 3);
+
+  // CIR-LABEL: vshrq_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i,
+  // CIR-SAME: #cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!s32i x 4>, [[AMT]] : !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vshrq_n_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = ashr <4 x i32> {{.*}}, splat (i32 3)
+}
+
+// Vector lashr/ashr are undefined when the shift amount is equal to the vector
+// element size. Thus in code gen, for singed input, we make the shift amount
+// one less than the vector element size.
+int32x4_t test_vshrq_n_s32_32(int32x4_t a) {
+  return vshrq_n_s32(a, 32);
+
+  // CIR-LABEL: vshrq_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<31> : !s32i, #cir.int<31> : !s32i,
+  // CIR-SAME: #cir.int<31> : !s32i, #cir.int<31> : !s32i]> : !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!s32i x 4>, [[AMT]] : !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vshrq_n_s32_32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = ashr <4 x i32> {{.*}}, splat (i32 31)
+}
+
+int64x2_t test_vshrq_n_s64(int64x2_t a) {
+  return vshrq_n_s64(a, 3);
+
+  // CIR-LABEL: vshrq_n_s64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s64i, #cir.int<3> : !s64i]> : !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!s64i x 2>, [[AMT]] : !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vshrq_n_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = ashr <2 x i64> {{.*}}, splat (i64 3)
+}
+
+uint8x8_t test_vshr_n_u8(uint8x8_t a) {
+  return vshr_n_u8(a, 3);
+
+  // CIR-LABEL: vshr_n_u8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i,
+  // CIR-SAME: #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i]> : !cir.vector<!u8i x 8>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!u8i x 8>, [[AMT]] : !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}test_vshr_n_u8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = lshr <8 x i8> {{.*}}, splat (i8 3)
+  // LLVM:   ret <8 x i8> [[VSHR_N]]
+}
+
+uint16x4_t test_vshr_n_u16(uint16x4_t a) {
+  return vshr_n_u16(a, 3);
+
+  // CIR-LABEL: vshr_n_u16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !u16i, #cir.int<3> : !u16i,
+  // CIR-SAME: #cir.int<3> : !u16i, #cir.int<3> : !u16i]> : !cir.vector<!u16i x 4>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!u16i x 4>, [[AMT]] : !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vshr_n_u16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = lshr <4 x i16> {{.*}}, splat (i16 3)
+}
+
+// Vector lashr/ashr are undefined when the shift amount is equal to the vector
+// element size. Thus in code gen, for unsinged input, return zero vector.
+uint16x4_t test_vshr_n_u16_16(uint16x4_t a) {
+  return vshr_n_u16(a, 16);
+
+  // CIR-LABEL: vshr_n_u16
+  // CIR: {{%.*}} = cir.const #cir.int<16> : !s32i
+  // CIR: {{%.*}} = cir.const #cir.zero : !cir.vector<!u16i x 4>
+  // CIR-NOT: cir.shift
+
+  // LLVM: {{.*}}test_vshr_n_u16_16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM: store {{.*}} zeroinitializer
+}
+
+uint32x2_t test_vshr_n_u32(uint32x2_t a) {
+  return vshr_n_u32(a, 3);
+
+  // CIR-LABEL: vshr_n_u32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !u32i, #cir.int<3> : !u32i]> : !cir.vector<!u32i x 2>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!u32i x 2>, [[AMT]] : !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vshr_n_u32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = lshr <2 x i32> {{.*}}, splat (i32 3)
+}
+
+uint64x1_t test_vshr_n_u64(uint64x1_t a) {
+  return vshr_n_u64(a, 1);
+
+  // CIR-LABEL: vshr_n_u64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<1> : !u64i]> : !cir.vector<!u64i x 1>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!u64i x 1>, [[AMT]] : !cir.vector<!u64i x 1>) -> !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}test_vshr_n_u64(<1 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VSHR_N:%.*]] = lshr <1 x i64> {{.*}}, splat (i64 1)
+}
+
+uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
+  return vshrq_n_u8(a, 3);
+
+  // CIR-LABEL: vshrq_n_u8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i,
+  // CIR-SAME: #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i,
+  // CIR-SAME: #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i,
+  // CIR-SAME: #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i, #cir.int<3> : !u8i]> : !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}test_vshrq_n_u8(<16 x i8>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = lshr <16 x i8> {{.*}}, splat (i8 3)
+  // LLVM:   ret <16 x i8> [[VSHR_N]]
+}
+
+uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
+  return vshrq_n_u16(a, 3);
+
+  // CIR-LABEL: vshrq_n_u16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i,
+  // CIR-SAME: #cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i,
+  // CIR-SAME: #cir.int<3> : !u16i]> : !cir.vector<!u16i x 8>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!u16i x 8>, [[AMT]] : !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vshrq_n_u16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = lshr <8 x i16> {{.*}}, splat (i16 3)
+}
+
+uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
+  return vshrq_n_u32(a, 3);
+
+  // CIR-LABEL: vshrq_n_u32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !u32i, #cir.int<3> : !u32i,
+  // CIR-SAME: #cir.int<3> : !u32i, #cir.int<3> : !u32i]> : !cir.vector<!u32i x 4>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!u32i x 4>, [[AMT]] : !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vshrq_n_u32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = lshr <4 x i32> {{.*}}, splat (i32 3)
+}
+
+uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
+  return vshrq_n_u64(a, 3);
+
+  // CIR-LABEL: vshrq_n_u64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !u64i, #cir.int<3> : !u64i]> : !cir.vector<!u64i x 2>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!u64i x 2>, [[AMT]] : !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vshrq_n_u64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[VSHR_N:%.*]] = lshr <2 x i64> {{.*}}, splat (i64 3)
+}
+
+int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
+  return vsra_n_s8(a, b, 3);
+
+  // CIR-LABEL: vsra_n_s8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!s8i x 8>
+
+  // LLVM-LABEL: @test_vsra_n_s8(
+  // LLVM: [[VSRA_N:%.*]] = ashr <8 x i8> %1, splat (i8 3)
+  // LLVM: [[TMP0:%.*]] = add <8 x i8> %0, [[VSRA_N]]
+  // LLVM: ret <8 x i8> [[TMP0]]
+}
+
+int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
+  return vsra_n_s16(a, b, 3);
+
+  // CIR-LABEL: vsra_n_s16
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!s16i x 4>
+
+  // LLVM-LABEL: test_vsra_n_s16
+  // LLVM: [[VSRA_N:%.*]] = ashr <4 x i16> {{.*}}, splat (i16 3)
+  // LLVM: [[TMP4:%.*]] = add <4 x i16> {{.*}}, [[VSRA_N]]
+}
+
+
+int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
+  return vsra_n_s32(a, b, 3);
+
+  // CIR-LABEL: vsra_n_s32
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!s32i x 2>
+
+  // LLVM-LABEL: test_vsra_n_s32
+  // LLVM: [[VSRA_N:%.*]] = ashr <2 x i32> {{.*}}, splat (i32 3)
+  // LLVM: [[TMP4:%.*]] = add <2 x i32> {{.*}}, [[VSRA_N]]
+}
+
+int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
+  return vsraq_n_s8(a, b, 3);
+
+  // CIR-LABEL: vsraq_n_s8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!s8i x 16>
+
+  // LLVM-LABEL: test_vsraq_n_s8
+  // LLVM: [[VSRA_N:%.*]] = ashr <16 x i8> %1, splat (i8 3)
+  // LLVM: [[TMP0:%.*]] = add <16 x i8> %0, [[VSRA_N]]
+  // LLVM: ret <16 x i8> [[TMP0]]
+}
+
+int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
+  return vsraq_n_s16(a, b, 3);
+
+  // CIR-LABEL: vsraq_n_s16
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: test_vsraq_n_s16
+  // LLVM: [[VSRA_N:%.*]] = ashr <8 x i16> {{.*}}, splat (i16 3)
+  // LLVM: [[TMP4:%.*]] = add <8 x i16> {{.*}}, [[VSRA_N]]
+}
+
+int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
+  return vsraq_n_s32(a, b, 3);
+
+  // CIR-LABEL: vsraq_n_s32
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: test_vsraq_n_s32
+  // LLVM: [[VSRA_N:%.*]] = ashr <4 x i32> {{.*}}, splat (i32 3)
+  // LLVM: [[TMP4:%.*]] = add <4 x i32> {{.*}}, [[VSRA_N]]
+}
+
+int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
+  return vsraq_n_s64(a, b, 3);
+
+  // CIR-LABEL: vsraq_n_s64
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: test_vsraq_n_s64
+  // LLVM: [[VSRA_N:%.*]] = ashr <2 x i64> {{.*}}, splat (i64 3)
+  // LLVM: [[TMP4:%.*]] = add <2 x i64> {{.*}}, [[VSRA_N]]
+}
+
+uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
+  return vsra_n_u8(a, b, 3);
+
+  // CIR-LABEL: vsra_n_u8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!u8i x 8>) -> !cir.vector<!u8i x 8>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!u8i x 8>
+
+  // LLVM-LABEL: @test_vsra_n_u8(
+  // LLVM: [[VSRA_N:%.*]] = lshr <8 x i8> %1, splat (i8 3)
+  // LLVM: [[TMP0:%.*]] = add <8 x i8> %0, [[VSRA_N]]
+  // LLVM: ret <8 x i8> [[TMP0]]
+}
+
+uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
+  return vsra_n_u16(a, b, 3);
+
+  // CIR-LABEL: vsra_n_u16
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!u16i x 4>) -> !cir.vector<!u16i x 4>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!u16i x 4>
+
+  // LLVM-LABEL: test_vsra_n_u16
+  // LLVM: [[VSRA_N:%.*]] = lshr <4 x i16> {{.*}}, splat (i16 3)
+  // LLVM: [[TMP4:%.*]] = add <4 x i16> {{.*}}, [[VSRA_N]]
+}
+
+uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
+  return vsra_n_u32(a, b, 3);
+
+  // CIR-LABEL: vsra_n_u32
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!u32i x 2>) -> !cir.vector<!u32i x 2>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!u32i x 2>
+
+  // LLVM-LABEL: test_vsra_n_u32
+  // LLVM: [[VSRA_N:%.*]] = lshr <2 x i32> {{.*}}, splat (i32 3)
+  // LLVM: [[TMP4:%.*]] = add <2 x i32> {{.*}}, [[VSRA_N]]
+}
+
+uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
+  return vsraq_n_u8(a, b, 3);
+
+  // CIR-LABEL: vsraq_n_u8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!u8i x 16>) -> !cir.vector<!u8i x 16>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!u8i x 16>
+
+  // LLVM-LABEL: test_vsraq_n_u8
+  // LLVM: [[VSRA_N:%.*]] = lshr <16 x i8> %1, splat (i8 3)
+  // LLVM: [[TMP0:%.*]] = add <16 x i8> %0, [[VSRA_N]]
+  // LLVM: ret <16 x i8> [[TMP0]]
+}
+
+uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
+  return vsraq_n_u16(a, b, 3);
+
+  // CIR-LABEL: vsraq_n_u16
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!u16i x 8>
+
+  // LLVM-LABEL: test_vsraq_n_u16
+  // LLVM: [[VSRA_N:%.*]] = lshr <8 x i16> {{.*}}, splat (i16 3)
+  // LLVM: [[TMP4:%.*]] = add <8 x i16> {{.*}}, [[VSRA_N]]
+}
+
+uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
+  return vsraq_n_u32(a, b, 3);
+
+  // CIR-LABEL: vsraq_n_u32
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!u32i x 4>
+
+  // LLVM-LABEL: test_vsraq_n_u32
+  // LLVM: [[VSRA_N:%.*]] = lshr <4 x i32> {{.*}}, splat (i32 3)
+  // LLVM: [[TMP4:%.*]] = add <4 x i32> {{.*}}, [[VSRA_N]]
+}
+
+uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
+  return vsraq_n_u64(a, b, 3);
+
+  // CIR-LABEL: vsraq_n_u64
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!u64i x 2>
+
+  // LLVM-LABEL: test_vsraq_n_u64
+  // LLVM: [[VSRA_N:%.*]] = lshr <2 x i64> {{.*}}, splat (i64 3)
+  // LLVM: [[TMP4:%.*]] = add <2 x i64> {{.*}}, [[VSRA_N]]
+}
+
+int8x8_t test_vrshr_n_s8(int8x8_t a) {
+  return vrshr_n_s8(a, 3);
+
+  // CIR-LABEL: vrshr_n_s8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i]> : !cir.vector<!s8i x 8>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_vrshr_n_s8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> {{.*}}, <8 x i8> splat (i8 -3))
+  // LLVM: ret <8 x i8> {{.*}}
+}
+
+uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
+  return vrshr_n_u8(a, 3);
+
+  // CIR-LABEL: vrshr_n_u8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i]> : !cir.vector<!s8i x 8>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_vrshr_n_u8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> {{.*}}, <8 x i8> splat (i8 -3))
+  // LLVM: ret <8 x i8> {{.*}}
+}
+
+int16x4_t test_vrshr_n_s16(int16x4_t a) {
+  return vrshr_n_s16(a, 3);
+
+  // CIR-LABEL: vrshr_n_s16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s16i, #cir.int<-3> : !s16i,
+  // CIR-SAME: #cir.int<-3> : !s16i, #cir.int<-3> : !s16i]> : !cir.vector<!s16i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_vrshr_n_s16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> {{.*}}, <4 x i16> splat (i16 -3))
+}
+
+uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
+  return vrshr_n_u16(a, 3);
+
+  // CIR-LABEL: vrshr_n_u16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s16i, #cir.int<-3> : !s16i,
+  // CIR-SAME: #cir.int<-3> : !s16i, #cir.int<-3> : !s16i]> : !cir.vector<!s16i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_vrshr_n_u16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> {{.*}}, <4 x i16> splat (i16 -3))
+}
+
+int32x2_t test_vrshr_n_s32(int32x2_t a) {
+  return vrshr_n_s32(a, 3);
+
+  // CIR-LABEL: vrshr_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s32i, #cir.int<-3> : !s32i]> : !cir.vector<!s32i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_vrshr_n_s32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> {{.*}}, <2 x i32> splat (i32 -3))
+}
+
+uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
+  return vrshr_n_u32(a, 3);
+
+  // CIR-LABEL: vrshr_n_u32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s32i, #cir.int<-3> : !s32i]> : !cir.vector<!s32i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_vrshr_n_u32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> {{.*}}, <2 x i32> splat (i32 -3))
+}
+
+int64x1_t test_vrshr_n_s64(int64x1_t a) {
+  return vrshr_n_s64(a, 3);
+
+  // CIR-LABEL: vrshr_n_s64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s64i]> : !cir.vector<!s64i x 1>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
+
+  // LLVM: {{.*}}@test_vrshr_n_s64(<1 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> {{.*}}, <1 x i64> splat (i64 -3))
+}
+
+uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
+  return vrshr_n_u64(a, 3);
+
+  // CIR-LABEL: vrshr_n_u64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s64i]> : !cir.vector<!s64i x 1>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!u64i x 1>
+
+  // LLVM: {{.*}}@test_vrshr_n_u64(<1 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> {{.*}}, <1 x i64> splat (i64 -3))
+}
+
+int8x16_t test_vrshrq_n_s8(int8x16_t a) {
+  return vrshrq_n_s8(a, 3);
+
+  // CIR-LABEL: vrshrq_n_s8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i]> : !cir.vector<!s8i x 16>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+  // LLVM: {{.*}}@test_vrshrq_n_s8(<16 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> {{.*}}, <16 x i8> splat (i8 -3))
+  // LLVM: ret <16 x i8> {{.*}}
+}
+
+uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
+  return vrshrq_n_u8(a, 3);
+
+  // CIR-LABEL: vrshrq_n_u8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i,
+  // CIR-SAME: #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i, #cir.int<-3> : !s8i]> : !cir.vector<!s8i x 16>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_vrshrq_n_u8(<16 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> {{.*}}, <16 x i8> splat (i8 -3))
+  // LLVM: ret <16 x i8> {{.*}}
+}
+
+int16x8_t test_vrshrq_n_s16(int16x8_t a) {
+  return vrshrq_n_s16(a, 3);
+
+  // CIR-LABEL: vrshrq_n_s16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s16i, #cir.int<-3> : !s16i, #cir.int<-3> : !s16i, #cir.int<-3> : !s16i,
+  // CIR-SAME: #cir.int<-3> : !s16i, #cir.int<-3> : !s16i, #cir.int<-3> : !s16i, #cir.int<-3> : !s16i]> : !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}@test_vrshrq_n_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> {{.*}}, <8 x i16> splat (i16 -3))
+}
+
+uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
+  return vrshrq_n_u16(a, 3);
+
+  // CIR-LABEL: vrshrq_n_u16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s16i, #cir.int<-3> : !s16i, #cir.int<-3> : !s16i, #cir.int<-3> : !s16i,
+  // CIR-SAME: #cir.int<-3> : !s16i, #cir.int<-3> : !s16i, #cir.int<-3> : !s16i, #cir.int<-3> : !s16i]> : !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_vrshrq_n_u16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> {{.*}}, <8 x i16> splat (i16 -3))
+}
+
+int32x4_t test_vrshrq_n_s32(int32x4_t a) {
+  return vrshrq_n_s32(a, 3);
+
+  // CIR-LABEL: vrshrq_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s32i, #cir.int<-3> : !s32i, #cir.int<-3> : !s32i, #cir.int<-3> : !s32i]> : !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}@test_vrshrq_n_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> {{.*}}, <4 x i32> splat (i32 -3))
+}
+
+uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
+  return vrshrq_n_u32(a, 3);
+
+  // CIR-LABEL: vrshrq_n_u32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s32i, #cir.int<-3> : !s32i,
+  // CIR-SAME: #cir.int<-3> : !s32i, #cir.int<-3> : !s32i]> : !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_vrshrq_n_u32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> {{.*}}, <4 x i32> splat (i32 -3))
+}
+
+int64x2_t test_vrshrq_n_s64(int64x2_t a) {
+  return vrshrq_n_s64(a, 3);
+
+  // CIR-LABEL: vrshrq_n_s64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s64i, #cir.int<-3> : !s64i]> : !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}@test_vrshrq_n_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> {{.*}}, <2 x i64> splat (i64 -3))
+}
+
+uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
+  return vrshrq_n_u64(a, 3);
+
+  // CIR-LABEL: vrshrq_n_u64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<-3> : !s64i, #cir.int<-3> : !s64i]> : !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}@test_vrshrq_n_u64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> {{.*}}, <2 x i64> splat (i64 -3))
+}
+
+int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
+  return vrsra_n_s8(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_s8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[splat]] : (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+  // CIR: cir.binop(add, {{%.*}}, {{.*}}) : !cir.vector<!s8i x 8>
+
+  // LLVM-LABEL: test_vrsra_n_s8
+  // LLVM:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %1, <8 x i8> splat (i8 -3))
+  // LLVM:   [[TMP0:%.*]] = add <8 x i8> %0, {{.*}}
+  // LLVM:   ret <8 x i8> [[TMP0]]
+}
+
+int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
+  return vrsra_n_s16(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_s16
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s16i x 4>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{.*}}, [[splat]] : (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s16i x 4>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s16i x 4>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s16i x 4>
+
+  // LLVM-LABEL: test_vrsra_n_s16
+  // LLVM:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> {{.*}}, <4 x i16> splat (i16 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> {{.*}} to <4 x i16>
+  // LLVM:   [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]]
+}
+
+int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
+  return vrsra_n_s32(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_s32
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s32i x 2>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{.*}}, [[splat]] : (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s32i x 2>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s32i x 2>
+
+  // LLVM-LABEL: test_vrsra_n_s32
+  // LLVM:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> {{.*}}, <2 x i32> splat (i32 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> {{.*}} to <2 x i32>
+  // LLVM:   [[TMP3:%.*]] = add <2 x i32> {{.*}}, [[VRSHR_N1]]
+}
+
+int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
+  return vrsraq_n_s8(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_s8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[splat]] : (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+  // CIR: cir.binop(add, {{%.*}}, {{.*}}) : !cir.vector<!s8i x 16>
+
+  // LLVM-LABEL: test_vrsraq_n_s8
+  // LLVM:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %1, <16 x i8> splat (i8 -3))
+  // LLVM:   [[TMP0:%.*]] = add <16 x i8> %0, {{.*}}
+  // LLVM:   ret <16 x i8> [[TMP0]]
+}
+
+int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
+  return vrsraq_n_s16(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_s16
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{.*}}, [[splat]] : (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: test_vrsraq_n_s16
+  // LLVM:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> {{.*}}, <8 x i16> splat (i16 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> {{.*}} to <8 x i16>
+  // LLVM:   [[TMP3:%.*]] = add <8 x i16> {{.*}}, [[VRSHR_N1]]
+}
+
+int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
+  return vrsraq_n_s32(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_s32
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{.*}}, [[splat]] : (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: test_vrsraq_n_s32
+  // LLVM:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> {{.*}}, <4 x i32> splat (i32 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> {{.*}} to <4 x i32>
+  // LLVM:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
+}
+
+int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
+  return vrsraq_n_s64(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_s64
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{.*}}, [[splat]] : (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: test_vrsraq_n_s64
+  // LLVM:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> {{.*}}, <2 x i64> splat (i64 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> {{.*}} to <2 x i64>
+  // LLVM:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
+}
+
+uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
+  return vrsra_n_u8(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_u8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[splat]] : (!cir.vector<!u8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!u8i x 8>
+  // CIR: cir.binop(add, {{%.*}}, {{.*}}) : !cir.vector<!u8i x 8>
+
+  // LLVM-LABEL: test_vrsra_n_u8
+  // LLVM:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %1, <8 x i8> splat (i8 -3))
+  // LLVM:   [[TMP0:%.*]] = add <8 x i8> %0, {{.*}}
+  // LLVM:   ret <8 x i8> [[TMP0]]
+}
+
+uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
+  return vrsra_n_u16(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_u16
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u16i x 4>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{.*}}, [[splat]] : (!cir.vector<!u16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!u16i x 4>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u16i x 4>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u16i x 4>
+
+  // LLVM-LABEL: test_vrsra_n_u16
+  // LLVM:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> {{.*}}, <4 x i16> splat (i16 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> {{.*}} to <4 x i16>
+  // LLVM:   [[TMP3:%.*]] = add <4 x i16> {{.*}}, [[VRSHR_N1]]
+}
+
+uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
+  return vrsra_n_u32(a, b, 3);
+
+  // CIR-LABEL: vrsra_n_u32
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u32i x 2>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{.*}}, [[splat]] : (!cir.vector<!u32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!u32i x 2>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u32i x 2>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u32i x 2>
+
+  // LLVM-LABEL: test_vrsra_n_u32
+  // LLVM:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> {{.*}}, <2 x i32> splat (i32 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> {{.*}} to <2 x i32>
+  // LLVM:   [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]]
+}
+
+uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
+  return vrsraq_n_u8(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_u8
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{%.*}}, [[splat]] : (!cir.vector<!u8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!u8i x 16>
+  // CIR: cir.binop(add, {{%.*}}, {{.*}}) : !cir.vector<!u8i x 16>
+
+  // LLVM-LABEL: test_vrsraq_n_u8
+  // LLVM:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %1, <16 x i8> splat (i8 -3))
+  // LLVM:   [[TMP0:%.*]] = add <16 x i8> %0, {{.*}}
+  // LLVM:   ret <16 x i8> [[TMP0]]
+}
+
+uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
+  return vrsraq_n_u16(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_u16
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{.*}}, [[splat]] : (!cir.vector<!u16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!u16i x 8>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u16i x 8>
+
+  // LLVM-LABEL: test_vrsraq_n_u16
+  // LLVM:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> {{.*}}, <8 x i16> splat (i16 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> {{.*}} to <8 x i16>
+  // LLVM:   [[TMP3:%.*]] = add <8 x i16> {{.*}}, [[VRSHR_N1]]
+}
+
+uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
+  return vrsraq_n_u32(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_u32
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{.*}}, [[splat]] : (!cir.vector<!u32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!u32i x 4>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u32i x 4>
+
+  // LLVM-LABEL: test_vrsraq_n_u32
+  // LLVM:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> {{.*}}, <4 x i32> splat (i32 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> {{.*}} to <4 x i32>
+  // LLVM:   [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]]
+}
+
+uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
+  return vrsraq_n_u64(a, b, 3);
+
+  // CIR-LABEL: vrsraq_n_u64
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{.*}}, [[splat]] : (!cir.vector<!u64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!u64i x 2>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u64i x 2>
+
+  // LLVM-LABEL: test_vrsraq_n_u64
+  // LLVM:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> {{.*}}, <2 x i64> splat (i64 -3))
+  // LLVM:   [[TMP2:%.*]] = bitcast <16 x i8> {{.*}} to <2 x i64>
+  // LLVM:   [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]]
+}
+
+// NYI-LABEL: @test_vsri_n_s8(
+// NYI:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSRI_N]]
+// int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
+//   return vsri_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
+// NYI:   ret <4 x i16> [[VSRI_N2]]
+// int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
+//   return vsri_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
+// NYI:   ret <2 x i32> [[VSRI_N2]]
+// int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
+//   return vsri_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_s8(
+// NYI:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSRI_N]]
+// int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
+//   return vsriq_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
+// NYI:   ret <8 x i16> [[VSRI_N2]]
+// int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
+//   return vsriq_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
+// NYI:   ret <4 x i32> [[VSRI_N2]]
+// int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
+//   return vsriq_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
+// NYI:   ret <2 x i64> [[VSRI_N2]]
+// int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
+//   return vsriq_n_s64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_u8(
+// NYI:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSRI_N]]
+// uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
+//   return vsri_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3)
+// NYI:   ret <4 x i16> [[VSRI_N2]]
+// uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
+//   return vsri_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3)
+// NYI:   ret <2 x i32> [[VSRI_N2]]
+// uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
+//   return vsri_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_u8(
+// NYI:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSRI_N]]
+// uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
+//   return vsriq_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3)
+// NYI:   ret <8 x i16> [[VSRI_N2]]
+// uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
+//   return vsriq_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3)
+// NYI:   ret <4 x i32> [[VSRI_N2]]
+// uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
+//   return vsriq_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3)
+// NYI:   ret <2 x i64> [[VSRI_N2]]
+// uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
+//   return vsriq_n_u64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_p8(
+// NYI:   [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSRI_N]]
+// poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
+//   return vsri_n_p8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsri_n_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15)
+// NYI:   ret <4 x i16> [[VSRI_N2]]
+// poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
+//   return vsri_n_p16(a, b, 15);
+// }
+
+// NYI-LABEL: @test_vsriq_n_p8(
+// NYI:   [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSRI_N]]
+// poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
+//   return vsriq_n_p8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsriq_n_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15)
+// NYI:   ret <8 x i16> [[VSRI_N2]]
+// poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
+//   return vsriq_n_p16(a, b, 15);
+// }
+
+// NYI-LABEL: @test_vsli_n_s8(
+// NYI:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSLI_N]]
+// int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
+//   return vsli_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
+// NYI:   ret <4 x i16> [[VSLI_N2]]
+// int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
+//   return vsli_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
+// NYI:   ret <2 x i32> [[VSLI_N2]]
+// int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
+//   return vsli_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_s8(
+// NYI:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSLI_N]]
+// int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
+//   return vsliq_n_s8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
+// NYI:   ret <8 x i16> [[VSLI_N2]]
+// int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
+//   return vsliq_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
+// NYI:   ret <4 x i32> [[VSLI_N2]]
+// int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
+//   return vsliq_n_s32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
+// NYI:   ret <2 x i64> [[VSLI_N2]]
+// int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
+//   return vsliq_n_s64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_u8(
+// NYI:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSLI_N]]
+// uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
+//   return vsli_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3)
+// NYI:   ret <4 x i16> [[VSLI_N2]]
+// uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
+//   return vsli_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3)
+// NYI:   ret <2 x i32> [[VSLI_N2]]
+// uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
+//   return vsli_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_u8(
+// NYI:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSLI_N]]
+// uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
+//   return vsliq_n_u8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3)
+// NYI:   ret <8 x i16> [[VSLI_N2]]
+// uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
+//   return vsliq_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3)
+// NYI:   ret <4 x i32> [[VSLI_N2]]
+// uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
+//   return vsliq_n_u32(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3)
+// NYI:   ret <2 x i64> [[VSLI_N2]]
+// uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
+//   return vsliq_n_u64(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_p8(
+// NYI:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
+// NYI:   ret <8 x i8> [[VSLI_N]]
+// poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
+//   return vsli_n_p8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsli_n_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15)
+// NYI:   ret <4 x i16> [[VSLI_N2]]
+// poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
+//   return vsli_n_p16(a, b, 15);
+// }
+
+// NYI-LABEL: @test_vsliq_n_p8(
+// NYI:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
+// NYI:   ret <16 x i8> [[VSLI_N]]
+// poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
+//   return vsliq_n_p8(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vsliq_n_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15)
+// NYI:   ret <8 x i16> [[VSLI_N2]]
+// poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
+//   return vsliq_n_p16(a, b, 15);
+// }
+
+uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
+  return vqshlu_n_s8(a, 3);
+
+ // CIR-LABEL: vqshlu_n_s8
+ // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+ // CIR: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector<!s8i x 8>
+ // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
+ // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!u8i x 8>
+
+ // LLVM: {{.*}}@test_vqshlu_n_s8(<8 x i8>{{.*}}[[A:%.*]])
+ // LLVM: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> {{.*}}, <8 x i8> splat (i8 3))
+ // LLVM: ret <8 x i8> [[VQSHLU_N]]
+}
+
+uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
+  return vqshlu_n_s16(a, 3);
+
+  // CIR-LABEL: vqshlu_n_s16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i,
+  // CIR-SAME:#cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector<!s16i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_vqshlu_n_s16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> {{.*}}, <4 x i16> splat (i16 3))
+}
+
+uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
+  return vqshlu_n_s32(a, 3);
+
+  // CIR-LABEL: vqshlu_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_vqshlu_n_s32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> {{.*}}, <2 x i32> splat (i32 3))
+}
+
+uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
+  return vqshluq_n_s8(a, 3);
+
+  // CIR-LABEL: vqshluq_n_s8
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i,
+  // CIR-SAME: #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i, #cir.int<3> : !s8i]> : !cir.vector<!s8i x 16>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!u8i x 16>
+
+  // LLVM: {{.*}}@test_vqshluq_n_s8(<16 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VQSHLUQ_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> {{.*}}, <16 x i8> splat (i8 3))
+  // LLVM: ret <16 x i8> [[VQSHLUQ_N]]
+}
+
+uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
+  return vqshluq_n_s16(a, 3);
+
+  // CIR-LABEL: vqshluq_n_s16
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i,
+  // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i,
+  // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}@test_vqshluq_n_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VQSHLUQ_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> {{.*}}, <8 x i16> splat (i16 3))
+}
+
+uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
+  return vqshluq_n_s32(a, 3);
+
+  // CIR-LABEL: vqshluq_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i,
+  // CIR-SAME: #cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_vqshluq_n_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VQSHLUQ_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> {{.*}}, <4 x i32> splat (i32 3))
+}
+
+uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
+  return vqshluq_n_s64(a, 3);
+
+  // CIR-LABEL: vqshluq_n_s64
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s64i, #cir.int<3> : !s64i]> : !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqshlu" {{%.*}}, [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}@test_vqshluq_n_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VQSHLUQ_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> {{.*}}, <2 x i64> splat (i64 3))
+}
+
+int8x8_t test_vshrn_n_s16(int16x8_t a) {
+  return vshrn_n_s16(a, 3);
+
+  // CIR-LABEL: vshrn_n_s16
+  // CIR: [[TGT:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i,
+  // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector<!s16i x 8>
+  // CIR: [[RES:%.*]] = cir.shift(right, [[TGT]] : !cir.vector<!s16i x 8>, [[AMT]] : !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.cast integral [[RES]] : !cir.vector<!s16i x 8> -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_vshrn_n_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = ashr <8 x i16> {{.*}}, splat (i16 3)
+  // LLVM:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+}
+
+int16x4_t test_vshrn_n_s32(int32x4_t a) {
+  return vshrn_n_s32(a, 9);
+
+  // CIR-LABEL: vshrn_n_s32
+  // CIR: [[TGT:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<9> : !s32i, #cir.int<9> : !s32i,
+  // CIR-SAME: #cir.int<9> : !s32i, #cir.int<9> : !s32i]> : !cir.vector<!s32i x 4>
+  // CIR: [[RES:%.*]] = cir.shift(right, [[TGT]] : !cir.vector<!s32i x 4>, [[AMT]] : !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.cast integral [[RES]] : !cir.vector<!s32i x 4> -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_vshrn_n_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = ashr <4 x i32> {{.*}}, splat (i32 9)
+  // LLVM:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+}
+
+int32x2_t test_vshrn_n_s64(int64x2_t a) {
+  return vshrn_n_s64(a, 19);
+
+  // CIR-LABEL: vshrn_n_s64
+  // CIR: [[TGT:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<19> : !s64i, #cir.int<19> : !s64i]> : !cir.vector<!s64i x 2>
+  // CIR: [[RES:%.*]] = cir.shift(right, [[TGT]] : !cir.vector<!s64i x 2>, [[AMT]] : !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.cast integral [[RES]] : !cir.vector<!s64i x 2> -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_vshrn_n_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = ashr <2 x i64> {{.*}}, splat (i64 19)
+  // LLVM:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+}
+
+uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
+  return vshrn_n_u16(a, 3);
+
+  // CIR-LABEL: vshrn_n_u16
+  // CIR: [[TGT:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i,
+  // CIR-SAME: #cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i]> : !cir.vector<!u16i x 8>
+  // CIR: [[RES:%.*]] = cir.shift(right, [[TGT]] : !cir.vector<!u16i x 8>, [[AMT]] : !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+  // CIR: {{%.*}} = cir.cast integral [[RES]] : !cir.vector<!u16i x 8> -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_vshrn_n_u16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = lshr <8 x i16> {{.*}}, splat (i16 3)
+  // LLVM:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+}
+
+uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
+  return vshrn_n_u32(a, 9);
+
+  // CIR-LABEL: vshrn_n_u32
+  // CIR: [[TGT:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<9> : !u32i, #cir.int<9> : !u32i,
+  // CIR-SAME: #cir.int<9> : !u32i, #cir.int<9> : !u32i]> : !cir.vector<!u32i x 4>
+  // CIR: [[RES:%.*]] = cir.shift(right, [[TGT]] : !cir.vector<!u32i x 4>, [[AMT]] : !cir.vector<!u32i x 4>) -> !cir.vector<!u32i x 4>
+  // CIR: {{%.*}} = cir.cast integral [[RES]] : !cir.vector<!u32i x 4> -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_vshrn_n_u32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = lshr <4 x i32> {{.*}}, splat (i32 9)
+  // LLVM:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+}
+
+uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
+  return vshrn_n_u64(a, 19);
+
+  // CIR-LABEL: vshrn_n_u64
+  // CIR: [[TGT:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: [[AMT:%.*]] = cir.const #cir.const_vector<[#cir.int<19> : !u64i, #cir.int<19> : !u64i]> : !cir.vector<!u64i x 2>
+  // CIR: [[RES:%.*]] = cir.shift(right, [[TGT]] : !cir.vector<!u64i x 2>, [[AMT]] : !cir.vector<!u64i x 2>) -> !cir.vector<!u64i x 2>
+  // CIR: {{%.*}} = cir.cast integral [[RES]] : !cir.vector<!u64i x 2> -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_vshrn_n_u64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = lshr <2 x i64> {{.*}}, splat (i64 19)
+  // LLVM:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+}
+
+// NYI-LABEL: @test_vshrn_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vshrn_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vshrn_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// NYI:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vshrn_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+//   return vshrn_high_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+//   return vshrn_high_n_u32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vshrn_high_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 19, i64 19>
+// NYI:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+//   return vshrn_high_n_u64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vqshrun_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VQSHRUN_N1]]
+// uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
+//   return vqshrun_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vqshrun_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VQSHRUN_N1]]
+// uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
+//   return vqshrun_n_s32(a, 9);
+// }
+
+// NYI-LABEL: @test_vqshrun_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
+// NYI:   ret <2 x i32> [[VQSHRUN_N1]]
+// uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
+//   return vqshrun_n_s64(a, 19);
+// }
+
+// NYI-LABEL: @test_vqshrun_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vqshrun_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqshrun_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vqshrun_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqshrun_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vqshrun_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vrshrn_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// NYI:   ret <8 x i8> [[VRSHRN_N1]]
+// int8x8_t test_vrshrn_n_s16(int16x8_t a) {
+//   return vrshrn_n_s16(a, 3);
+// }
+
+// NYI-LABEL: @test_vrshrn_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// NYI:   ret <4 x i16> [[VRSHRN_N1]]
+// int16x4_t test_vrshrn_n_s32(int32x4_t a) {
+//   return vrshrn_n_s32(a, 9);
+// }
+
+int32x2_t test_vrshrn_n_s64(int64x2_t a) {
+  return vrshrn_n_s64(a, 19);
+
+  // CIR-LABEL: vrshrn_n_s64
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.rshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !s32i) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vrshrn_n_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> {{.*}}, i32 19)
+}
+
+uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
+  return vrshrn_n_u16(a, 3);
+
+  // CIR-LABEL: vrshrn_n_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.rshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !s32i) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}test_vrshrn_n_u16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> {{.*}}, i32 3)
+}
+
+uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
+  return vrshrn_n_u32(a, 9);
+
+  // CIR-LABEL: vrshrn_n_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.rshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !s32i) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}vrshrn_n_u32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> {{.*}}, i32 9)
+}
+
+uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
+  return vrshrn_n_u64(a, 19);
+
+  // CIR-LABEL: vrshrn_n_u64
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.rshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u64i x 2>, !s32i) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vrshrn_n_u64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> {{.*}}, i32 19)
+}
+
+// NYI-LABEL: @test_vrshrn_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vrshrn_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vrshrn_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vrshrn_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+//   return vrshrn_high_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+//   return vrshrn_high_n_u32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vrshrn_high_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+//   return vrshrn_high_n_u64(a, b, 19);
+// }
+
+uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
+  return vqrshrun_n_s16(a, 3);
+  // CIR-LABEL: test_vqrshrun_n_s16
+  // CIR: [[INTRN_ARG1:%.*]] = cir.const #cir.int<3> : !s32i
+  // CIR: [[INTRN_ARG0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrshrun" [[INTRN_ARG0]], [[INTRN_ARG1]] :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !s32i) -> !cir.vector<!u8i x 8>
+
+  // LLVM-LABEL: @test_vqrshrun_n_s16(
+  // LLVM:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> {{.*}}, i32 3)
+  // LLVM:   ret <8 x i8> [[VQRSHRUN_N1]]
+}
+
+uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
+  return vqrshrun_n_s32(a, 9);
+  // CIR-LABEL: test_vqrshrun_n_s32
+  // CIR: [[INTRN_ARG1:%.*]] = cir.const #cir.int<9> : !s32i
+  // CIR: [[INTRN_ARG0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrshrun" [[INTRN_ARG0]], [[INTRN_ARG1]] :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !s32i) -> !cir.vector<!u16i x 4>
+
+  // LLVM-LABEL: @test_vqrshrun_n_s32(
+  // LLVM:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> {{.*}}, i32 9)
+}
+
+uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
+  return vqrshrun_n_s64(a, 19);
+  // CIR-LABEL: test_vqrshrun_n_s64
+  // CIR: [[INTRN_ARG1:%.*]] = cir.const #cir.int<19> : !s32i
+  // CIR: [[INTRN_ARG0:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqrshrun" [[INTRN_ARG0]], [[INTRN_ARG1]] :
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !s32i) -> !cir.vector<!u32i x 2>
+
+  // LLVM-LABEL: @test_vqrshrun_n_s64(
+  // LLVM:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> {{.*}}, i32 19)
+}
+
+// NYI-LABEL: @test_vqrshrun_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vqrshrun_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqrshrun_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vqrshrun_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqrshrun_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vqrshrun_high_n_s64(a, b, 19);
+// }
+
+int8x8_t test_vqshrn_n_s16(int16x8_t a) {
+  return vqshrn_n_s16(a, 3);
+
+  // CIR-LABEL: vqshrn_n_s16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sqshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !s32i) -> !cir.vector<!s8i x 8>
+
+  // LLVM:{{.*}}test_vqshrn_n_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>
+}
+
+int16x4_t test_vqshrn_n_s32(int32x4_t a) {
+  return vqshrn_n_s32(a, 9);
+
+  // CIR-LABEL: vqshrn_n_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sqshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !s32i) -> !cir.vector<!s16i x 4>
+
+  // LLVM:{{.*}}test_vqshrn_n_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>
+}
+
+int32x2_t test_vqshrn_n_s64(int64x2_t a) {
+  return vqshrn_n_s64(a, 19);
+
+  // CIR-LABEL: vqshrn_n_s64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sqshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !s32i) -> !cir.vector<!s32i x 2>
+
+  // LLVM:{{.*}}test_vqshrn_n_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>
+}
+
+uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
+  return vqshrn_n_u16(a, 3);
+
+  // CIR-LABEL: vqshrn_n_u16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uqshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !s32i) -> !cir.vector<!u8i x 8>
+
+  // LLVM:{{.*}}test_vqshrn_n_u16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>
+}
+
+uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
+  return vqshrn_n_u32(a, 9);
+
+  // CIR-LABEL: vqshrn_n_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uqshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !s32i) -> !cir.vector<!u16i x 4>
+
+  // LLVM:{{.*}}test_vqshrn_n_u32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>
+}
+
+uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
+  return vqshrn_n_u64(a, 19);
+
+  // CIR-LABEL: vqshrn_n_u64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uqshrn" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u64i x 2>, !s32i) -> !cir.vector<!u32i x 2>
+
+  // LLVM:{{.*}}test_vqshrn_n_u64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>
+}
+
+// NYI-LABEL: @test_vqshrn_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vqshrn_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vqshrn_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vqshrn_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+//   return vqshrn_high_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+//   return vqshrn_high_n_u32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqshrn_high_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+//   return vqshrn_high_n_u64(a, b, 19);
+// }
+
+int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
+  return vqrshrn_n_s16(a, 3);
+
+  // CIR-LABEL: vqrshrn_n_s16
+  // CIR: [[AMT:%.*]] = cir.const #cir.int<3> : !s32i
+  // CIR: [[VQRSHRN_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: [[VQRSHRN_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.sqrshrn" [[VQRSHRN_N]], [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s16i x 8>, !s32i) -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}test_vqrshrn_n_s16(<8 x i16>{{.*}}[[a:%.*]])
+  // LLVM:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>
+}
+
+int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
+  return vqrshrn_n_s32(a, 9);
+
+  // CIR-LABEL: vqrshrn_n_s32
+  // CIR: [[AMT:%.*]] = cir.const #cir.int<9> : !s32i
+  // CIR: [[VQRSHRN_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: [[VQRSHRN_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.sqrshrn" [[VQRSHRN_N]], [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !s32i) -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}test_vqrshrn_n_s32(<4 x i32>{{.*}}[[a:%.*]])
+  // LLVM:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>
+
+}
+
+int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
+  return vqrshrn_n_s64(a, 19);
+
+  // CIR-LABEL: vqrshrn_n_s64
+  // CIR: [[AMT:%.*]] = cir.const #cir.int<19> : !s32
+  // CIR: [[VQRSHRN_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: [[VQRSHRN_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.sqrshrn" [[VQRSHRN_N]], [[AMT]] :
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !s32i) -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}test_vqrshrn_n_s64(<2 x i64>{{.*}}[[a:%.*]])
+  // LLVM:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>
+}
+
+uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
+  return vqrshrn_n_u16(a, 3);
+
+  // CIR-LABEL: vqrshrn_n_u16
+  // CIR: [[AMT:%.*]] = cir.const #cir.int<3> : !s32
+  // CIR: [[VQRSHRN_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: [[VQRSHRN_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.uqrshrn" [[VQRSHRN_N]], [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u16i x 8>, !s32i) -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}test_vqrshrn_n_u16(<8 x i16>{{.*}}[[a:%.*]])
+  // LLVM:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>
+}
+
+uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
+  return vqrshrn_n_u32(a, 9);
+
+  // CIR-LABEL: vqrshrn_n_u32
+  // CIR: [[AMT:%.*]] = cir.const #cir.int<9> : !s32
+  // CIR: [[VQRSHRN_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: [[VQRSHRN_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.uqrshrn" [[VQRSHRN_N]], [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u32i x 4>, !s32i) -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}test_vqrshrn_n_u32(<4 x i32>{{.*}}[[a:%.*]])
+  // LLVM:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>
+}
+
+uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
+  return vqrshrn_n_u64(a, 19);
+
+  // CIR-LABEL: vqrshrn_n_u64
+  // CIR: [[AMT:%.*]] = cir.const #cir.int<19> : !s32
+  // CIR: [[VQRSHRN_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: [[VQRSHRN_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.uqrshrn" [[VQRSHRN_N]], [[AMT]] :
+  // CIR-SAME: (!cir.vector<!u64i x 2>, !s32i) -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}test_vqrshrn_n_u64(<2 x i64>{{.*}}[[a:%.*]])
+  // LLVM:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>
+}
+
+// NYI-LABEL: @test_vqrshrn_high_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) {
+//   return vqrshrn_high_n_s16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) {
+//   return vqrshrn_high_n_s32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) {
+//   return vqrshrn_high_n_s64(a, b, 19);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I]]
+// uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) {
+//   return vqrshrn_high_n_u16(a, b, 3);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I]]
+// uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) {
+//   return vqrshrn_high_n_u32(a, b, 9);
+// }
+
+// NYI-LABEL: @test_vqrshrn_high_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19)
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I]]
+// uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) {
+//   return vqrshrn_high_n_u64(a, b, 19);
+// }
+
+int16x8_t test_vshll_n_s8(int8x8_t a) {
+  return vshll_n_s8(a, 3);
+
+  // CIR-LABEL: vshll_n_s8
+  // CIR: [[SHIFT_TGT:%.*]] = cir.cast integral {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s16i x 8>
+  // CIR: [[SHIFT_AMT:%.*]] =  cir.const #cir.const_vector<[#cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i,
+  // CIR-SAME: #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i, #cir.int<3> : !s16i]> : !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.shift(left, [[SHIFT_TGT]] : !cir.vector<!s16i x 8>, [[SHIFT_AMT]] : !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}@test_vshll_n_s8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP0:%.*]] = sext <8 x i8> {{.*}} to <8 x i16>
+  // LLVM:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
+}
+
+int32x4_t test_vshll_n_s16(int16x4_t a) {
+  return vshll_n_s16(a, 9);
+
+  // CIR-LABEL: vshll_n_s16
+  // CIR: [[SHIFT_TGT:%.*]] = cir.cast integral {{%.*}} : !cir.vector<!s16i x 4> -> !cir.vector<!s32i x 4>
+  // CIR: [[SHIFT_AMT:%.*]] =  cir.const #cir.const_vector<[#cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<9> :
+  // CIR-SAME: !s32i, #cir.int<9> : !s32i]> : !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.shift(left, [[SHIFT_TGT]] : !cir.vector<!s32i x 4>, [[SHIFT_AMT]] : !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}@test_vshll_n_s16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = sext <4 x i16> {{.*}} to <4 x i32>
+  // LLVM:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
+}
+
+int64x2_t test_vshll_n_s32(int32x2_t a) {
+  return vshll_n_s32(a, 19);
+
+  // CIR-LABEL: vshll_n_s32
+  // CIR: [[SHIFT_TGT:%.*]] = cir.cast integral {{%.*}} : !cir.vector<!s32i x 2> -> !cir.vector<!s64i x 2>
+  // CIR: [[SHIFT_AMT:%.*]] =  cir.const #cir.const_vector<[#cir.int<19> : !s64i, #cir.int<19> : !s64i]> : !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.shift(left, [[SHIFT_TGT]] : !cir.vector<!s64i x 2>, [[SHIFT_AMT]] : !cir.vector<!s64i x 2>)
+
+  // LLVM: {{.*}}@test_vshll_n_s32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = sext <2 x i32> {{.*}} to <2 x i64>
+  // LLVM:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
+}
+
+uint16x8_t test_vshll_n_u8(uint8x8_t a) {
+  return vshll_n_u8(a, 3);
+
+  // CIR-LABEL: vshll_n_u8
+  // CIR: [[SHIFT_TGT:%.*]] = cir.cast integral {{%.*}} : !cir.vector<!u8i x 8> -> !cir.vector<!u16i x 8>
+  // CIR: [[SHIFT_AMT:%.*]] =  cir.const #cir.const_vector<[#cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i,
+  // CIR-SAME: #cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i, #cir.int<3> : !u16i]> : !cir.vector<!u16i x 8>
+  // CIR: {{%.*}} = cir.shift(left, [[SHIFT_TGT]] : !cir.vector<!u16i x 8>, [[SHIFT_AMT]] : !cir.vector<!u16i x 8>)
+
+  // LLVM: {{.*}}@test_vshll_n_u8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP0:%.*]] = zext <8 x i8> {{.*}} to <8 x i16>
+  // LLVM:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3)
+}
+
+uint32x4_t test_vshll_n_u16(uint16x4_t a) {
+  return vshll_n_u16(a, 9);
+
+  // CIR-LABEL: vshll_n_u16
+  // CIR: [[SHIFT_TGT:%.*]] = cir.cast integral {{%.*}} : !cir.vector<!u16i x 4> -> !cir.vector<!u32i x 4>
+  // CIR: [[SHIFT_AMT:%.*]] =  cir.const #cir.const_vector<[#cir.int<9> : !u32i, #cir.int<9> : !u32i,
+  // CIR-SAME: #cir.int<9> : !u32i, #cir.int<9> : !u32i]> : !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}@test_vshll_n_u16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = zext <4 x i16> {{.*}} to <4 x i32>
+  // LLVM:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9)
+}
+
+uint64x2_t test_vshll_n_u32(uint32x2_t a) {
+  return vshll_n_u32(a, 19);
+
+  // CIR-LABEL: vshll_n_u32
+  // CIR: [[SHIFT_TGT:%.*]] = cir.cast integral {{%.*}} : !cir.vector<!u32i x 2> -> !cir.vector<!u64i x 2>
+  // CIR: [[SHIFT_AMT:%.*]] =  cir.const #cir.const_vector<[#cir.int<19> : !u64i, #cir.int<19> : !u64i]> : !cir.vector<!u64i x 2>
+  // CIR: {{%.*}} = cir.shift(left, [[SHIFT_TGT]] : !cir.vector<!u64i x 2>, [[SHIFT_AMT]] : !cir.vector<!u64i x 2>)
+
+  // LLVM: {{.*}}@test_vshll_n_u32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM:   [[TMP2:%.*]] = zext <2 x i32> {{.*}} to <2 x i64>
+  // LLVM:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19)
+}
+
+// NYI-LABEL: @test_vshll_high_n_s8(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// NYI:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHLL_N]]
+// int16x8_t test_vshll_high_n_s8(int8x16_t a) {
+//   return vshll_high_n_s8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_s16(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+// NYI:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   ret <4 x i32> [[VSHLL_N]]
+// int32x4_t test_vshll_high_n_s16(int16x8_t a) {
+//   return vshll_high_n_s16(a, 9);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_s32(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+// NYI:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// NYI:   ret <2 x i64> [[VSHLL_N]]
+// int64x2_t test_vshll_high_n_s32(int32x4_t a) {
+//   return vshll_high_n_s32(a, 19);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_u8(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
+// NYI:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+// NYI:   ret <8 x i16> [[VSHLL_N]]
+// uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
+//   return vshll_high_n_u8(a, 3);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_u16(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+// NYI:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 9, i32 9, i32 9, i32 9>
+// NYI:   ret <4 x i32> [[VSHLL_N]]
+// uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
+//   return vshll_high_n_u16(a, 9);
+// }
+
+// NYI-LABEL: @test_vshll_high_n_u32(
+// NYI:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+// NYI:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 19, i64 19>
+// NYI:   ret <2 x i64> [[VSHLL_N]]
+// uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
+//   return vshll_high_n_u32(a, 19);
+// }
+
+int16x8_t test_vmovl_s8(int8x8_t a) {
+  return vmovl_s8(a);
+
+  // CIR-LABEL: vmovl_s8
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vmovl_s8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVL_I:%.*]] = sext <8 x i8> {{.*}} to <8 x i16>
+}
+
+int32x4_t test_vmovl_s16(int16x4_t a) {
+  return vmovl_s16(a);
+
+  // CIR-LABEL: vmovl_s16
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !cir.vector<!s16i x 4> -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vmovl_s16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVL_I:%.*]] = sext <4 x i16> {{.*}} to <4 x i32>
+}
+
+int64x2_t test_vmovl_s32(int32x2_t a) {
+  return vmovl_s32(a);
+
+  // CIR-LABEL: vmovl_s32
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !cir.vector<!s32i x 2> -> !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vmovl_s32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVL_I:%.*]] = sext <2 x i32> {{.*}} to <2 x i64>
+}
+
+uint16x8_t test_vmovl_u8(uint8x8_t a) {
+  return vmovl_u8(a);
+
+  // CIR-LABEL: vmovl_u8
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !cir.vector<!u8i x 8> -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vmovl_u8(<8 x i8>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVL_I:%.*]] = zext <8 x i8> {{.*}} to <8 x i16>
+}
+
+uint32x4_t test_vmovl_u16(uint16x4_t a) {
+  return vmovl_u16(a);
+
+  // CIR-LABEL: vmovl_u16
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !cir.vector<!u16i x 4> -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vmovl_u16(<4 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVL_I:%.*]] = zext <4 x i16> {{.*}} to <4 x i32>
+}
+
+uint64x2_t test_vmovl_u32(uint32x2_t a) {
+  return vmovl_u32(a);
+
+  // CIR-LABEL: vmovl_u32
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !cir.vector<!u32i x 2> -> !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vmovl_u32(<2 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVL_I:%.*]] = zext <2 x i32> {{.*}} to <2 x i64>
+}
+
+// NYI-LABEL: @test_vmovl_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vmovl_high_s8(int8x16_t a) {
+//   return vmovl_high_s8(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP1]]
+// int32x4_t test_vmovl_high_s16(int16x8_t a) {
+//   return vmovl_high_s16(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP1]]
+// int64x2_t test_vmovl_high_s32(int32x4_t a) {
+//   return vmovl_high_s32(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vmovl_high_u8(uint8x16_t a) {
+//   return vmovl_high_u8(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP1]]
+// uint32x4_t test_vmovl_high_u16(uint16x8_t a) {
+//   return vmovl_high_u16(a);
+// }
+
+// NYI-LABEL: @test_vmovl_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP1]]
+// uint64x2_t test_vmovl_high_u32(uint32x4_t a) {
+//   return vmovl_high_u32(a);
+// }
+
+// NYI-LABEL: @test_vcvt_n_f32_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
+// NYI:   ret <2 x float> [[VCVT_N1]]
+// float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
+//   return vcvt_n_f32_s32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_f32_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
+// NYI:   ret <4 x float> [[VCVT_N1]]
+// float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
+//   return vcvtq_n_f32_s32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
+// NYI:   ret <2 x double> [[VCVT_N1]]
+// float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) {
+//   return vcvtq_n_f64_s64(a, 50);
+// }
+
+// NYI-LABEL: @test_vcvt_n_f32_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31)
+// NYI:   ret <2 x float> [[VCVT_N1]]
+// float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
+//   return vcvt_n_f32_u32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_f32_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31)
+// NYI:   ret <4 x float> [[VCVT_N1]]
+// float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
+//   return vcvtq_n_f32_u32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50)
+// NYI:   ret <2 x double> [[VCVT_N1]]
+// float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) {
+//   return vcvtq_n_f64_u64(a, 50);
+// }
+
+// NYI-LABEL: @test_vcvt_n_s32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
+// NYI:   ret <2 x i32> [[VCVT_N1]]
+// int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
+//   return vcvt_n_s32_f32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_s32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// NYI:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
+// NYI:   ret <4 x i32> [[VCVT_N1]]
+// int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
+//   return vcvtq_n_s32_f32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
+// NYI:   ret <2 x i64> [[VCVT_N1]]
+// int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) {
+//   return vcvtq_n_s64_f64(a, 50);
+// }
+
+// NYI-LABEL: @test_vcvt_n_u32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31)
+// NYI:   ret <2 x i32> [[VCVT_N1]]
+// uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
+//   return vcvt_n_u32_f32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_u32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
+// NYI:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31)
+// NYI:   ret <4 x i32> [[VCVT_N1]]
+// uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
+//   return vcvtq_n_u32_f32(a, 31);
+// }
+
+// NYI-LABEL: @test_vcvtq_n_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
+// NYI:   [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50)
+// NYI:   ret <2 x i64> [[VCVT_N1]]
+// uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) {
+//   return vcvtq_n_u64_f64(a, 50);
+// }
+
+// NYI-LABEL: @test_vaddl_s8(
+// NYI:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
+//   return vaddl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
+//   return vaddl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
+//   return vaddl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_u8(
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
+//   return vaddl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
+//   return vaddl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
+//   return vaddl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_s8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) {
+//   return vaddl_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_s16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) {
+//   return vaddl_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_s32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) {
+//   return vaddl_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_u8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) {
+//   return vaddl_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_u16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) {
+//   return vaddl_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddl_high_u32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) {
+//   return vaddl_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_s8(
+// NYI:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
+//   return vaddw_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
+//   return vaddw_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
+//   return vaddw_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_u8(
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
+//   return vaddw_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
+//   return vaddw_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
+//   return vaddw_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_s8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) {
+//   return vaddw_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_s16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) {
+//   return vaddw_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_s32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) {
+//   return vaddw_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_u8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) {
+//   return vaddw_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_u16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) {
+//   return vaddw_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddw_high_u32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) {
+//   return vaddw_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_s8(
+// NYI:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
+//   return vsubl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
+//   return vsubl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
+//   return vsubl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_u8(
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
+//   return vsubl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
+//   return vsubl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
+//   return vsubl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_s8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) {
+//   return vsubl_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_s16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) {
+//   return vsubl_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_s32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) {
+//   return vsubl_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_u8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) {
+//   return vsubl_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_u16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) {
+//   return vsubl_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubl_high_u32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8>
+// NYI:   [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) {
+//   return vsubl_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_s8(
+// NYI:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
+//   return vsubw_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
+//   return vsubw_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
+//   return vsubw_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_u8(
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
+//   return vsubw_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
+//   return vsubw_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
+//   return vsubw_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_s8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) {
+//   return vsubw_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_s16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) {
+//   return vsubw_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_s32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) {
+//   return vsubw_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_u8(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16>
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) {
+//   return vsubw_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_u16(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32>
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) {
+//   return vsubw_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubw_high_u32(
+// NYI:   [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64>
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) {
+//   return vsubw_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VADDHN2_I]]
+// int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
+//   return vaddhn_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VADDHN2_I]]
+// int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
+//   return vaddhn_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VADDHN2_I]]
+// int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
+//   return vaddhn_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VADDHN2_I]]
+// uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
+//   return vaddhn_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VADDHN2_I]]
+// uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
+//   return vaddhn_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
+// NYI:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
+// NYI:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VADDHN2_I]]
+// uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
+//   return vaddhn_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+//   return vaddhn_high_s16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+//   return vaddhn_high_s32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+//   return vaddhn_high_s64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+//   return vaddhn_high_u16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+//   return vaddhn_high_u32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vaddhn_high_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b
+// NYI:   [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], <i64 32, i64 32>
+// NYI:   [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+//   return vaddhn_high_u64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i8> [[VRADDHN_V2_I]]
+// int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
+//   return vraddhn_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRADDHN_V2_I]]
+// int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
+//   return vraddhn_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRADDHN_V2_I]]
+// int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
+//   return vraddhn_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i8> [[VRADDHN_V2_I]]
+// uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
+//   return vraddhn_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRADDHN_V2_I]]
+// uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
+//   return vraddhn_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRADDHN_V2_I]]
+// uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
+//   return vraddhn_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+//   return vraddhn_high_s16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+//   return vraddhn_high_s32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+//   return vraddhn_high_s64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+//   return vraddhn_high_u16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+//   return vraddhn_high_u32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vraddhn_high_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+//   return vraddhn_high_u64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VSUBHN2_I]]
+// int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
+//   return vsubhn_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VSUBHN2_I]]
+// int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
+//   return vsubhn_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VSUBHN2_I]]
+// int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
+//   return vsubhn_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
+// NYI:   ret <8 x i8> [[VSUBHN2_I]]
+// uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
+//   return vsubhn_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
+// NYI:   ret <4 x i16> [[VSUBHN2_I]]
+// uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
+//   return vsubhn_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
+// NYI:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
+// NYI:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
+// NYI:   ret <2 x i32> [[VSUBHN2_I]]
+// uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
+//   return vsubhn_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+//   return vsubhn_high_s16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+//   return vsubhn_high_s32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+//   return vsubhn_high_s64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+//   return vsubhn_high_u16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], <i32 16, i32 16, i32 16, i32 16>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+//   return vsubhn_high_u32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vsubhn_high_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b
+// NYI:   [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], <i64 32, i64 32>
+// NYI:   [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+//   return vsubhn_high_u64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i8> [[VRSUBHN_V2_I]]
+// int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
+//   return vrsubhn_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRSUBHN_V2_I]]
+// int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
+//   return vrsubhn_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRSUBHN_V2_I]]
+// int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
+//   return vrsubhn_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i8> [[VRSUBHN_V2_I]]
+// uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
+//   return vrsubhn_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
+// NYI:   ret <4 x i16> [[VRSUBHN_V2_I]]
+// uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
+//   return vrsubhn_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
+// NYI:   ret <2 x i32> [[VRSUBHN_V2_I]]
+// uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
+//   return vrsubhn_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) {
+//   return vrsubhn_high_s16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) {
+//   return vrsubhn_high_s32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) {
+//   return vrsubhn_high_s64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   ret <16 x i8> [[SHUFFLE_I_I]]
+// uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
+//   return vrsubhn_high_u16(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
+// NYI:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// NYI:   ret <8 x i16> [[SHUFFLE_I_I]]
+// uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
+//   return vrsubhn_high_u32(r, a, b);
+// }
+
+// NYI-LABEL: @test_vrsubhn_high_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8>
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// NYI:   ret <4 x i32> [[SHUFFLE_I_I]]
+// uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
+//   return vrsubhn_high_u64(r, a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_s8(
+// NYI:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I_I]]
+// int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
+//   return vabdl_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I_I]]
+// int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
+//   return vabdl_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I_I]]
+// int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
+//   return vabdl_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_u8(
+// NYI:   [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I_I]]
+// uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
+//   return vabdl_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I_I]]
+// uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
+//   return vabdl_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I_I]]
+// uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
+//   return vabdl_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vabal_s8(
+// NYI:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+//   return vabal_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+//   return vabal_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+//   return vabal_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_u8(
+// NYI:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+//   return vabal_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+//   return vabal_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+//   return vabal_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabdl_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I_I_I]]
+// int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) {
+//   return vabdl_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I_I_I]]
+// int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) {
+//   return vabdl_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I_I_I]]
+// int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) {
+//   return vabdl_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16>
+// NYI:   ret <8 x i16> [[VMOVL_I_I_I]]
+// uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) {
+//   return vabdl_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32>
+// NYI:   ret <4 x i32> [[VMOVL_I_I_I]]
+// uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) {
+//   return vabdl_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vabdl_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64>
+// NYI:   ret <2 x i64> [[VMOVL_I_I_I]]
+// uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) {
+//   return vabdl_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vabal_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I_I]]
+// int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+//   return vabal_high_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I_I]]
+// int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vabal_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I_I]]
+// int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vabal_high_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16>
+// NYI:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I_I]]
+// uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+//   return vabal_high_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32>
+// NYI:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I_I]]
+// uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+//   return vabal_high_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vabal_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8>
+// NYI:   [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64>
+// NYI:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I_I]]
+// uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+//   return vabal_high_u32(a, b, c);
+// }
+
+int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
+  return vmull_s8(a, b);
+
+  // CIR-LABEL: vmull_s8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.smull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vmull_s8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+}
+
+int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
+  return vmull_s16(a, b);
+
+  // CIR-LABEL: vmull_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.smull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vmull_s16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+}
+
+int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
+  return vmull_s32(a, b);
+
+  // CIR-LABEL: vmull_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.smull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vmull_s32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+}
+
+uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
+  return vmull_u8(a, b);
+
+  // CIR-LABEL: vmull_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.umull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u8i x 8>, !cir.vector<!u8i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{.*}}test_vmull_u8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+}
+
+uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
+  return vmull_u16(a, b);
+
+  // CIR-LABEL: vmull_u16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.umull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u16i x 4>, !cir.vector<!u16i x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM: {{.*}}test_vmull_u16(<4 x i16>{{.*}}[[A:%.*]], <4 x i16>{{.*}}[[B:%.*]])
+  // LLVM: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+}
+
+uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
+  return vmull_u32(a, b);
+
+  // CIR-LABEL: vmull_u32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.umull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!u32i x 2>, !cir.vector<!u32i x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM: {{.*}}test_vmull_u32(<2 x i32>{{.*}}[[A:%.*]], <2 x i32>{{.*}}[[B:%.*]])
+  // LLVM: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+}
+
+// NYI-LABEL: @test_vmull_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   ret <8 x i16> [[VMULL_I_I]]
+// int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) {
+//   return vmull_high_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   ret <4 x i32> [[VMULL2_I_I]]
+// int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) {
+//   return vmull_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   ret <2 x i64> [[VMULL2_I_I]]
+// int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) {
+//   return vmull_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   ret <8 x i16> [[VMULL_I_I]]
+// uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) {
+//   return vmull_high_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   ret <4 x i32> [[VMULL2_I_I]]
+// uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) {
+//   return vmull_high_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vmull_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   ret <2 x i64> [[VMULL2_I_I]]
+// uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) {
+//   return vmull_high_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vmlal_s8(
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+//   return vmlal_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+//   return vmlal_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+//   return vmlal_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_u8(
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I]]
+// uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+//   return vmlal_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I]]
+// uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+//   return vmlal_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I]]
+// uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+//   return vmlal_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I_I]]
+// int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+//   return vmlal_high_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I_I]]
+// int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vmlal_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I_I]]
+// int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vmlal_high_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]]
+// NYI:   ret <8 x i16> [[ADD_I_I]]
+// uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+//   return vmlal_high_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <4 x i32> [[ADD_I_I]]
+// uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+//   return vmlal_high_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlal_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <2 x i64> [[ADD_I_I]]
+// uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+//   return vmlal_high_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_s8(
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
+//   return vmlsl_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+//   return vmlsl_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+//   return vmlsl_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_u8(
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c)
+// NYI:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I]]
+// uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
+//   return vmlsl_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c)
+// NYI:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I]]
+// uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
+//   return vmlsl_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
+// NYI:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c)
+// NYI:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I]]
+// uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
+//   return vmlsl_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_s8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I_I]]
+// int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) {
+//   return vmlsl_high_s8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I_I]]
+// int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vmlsl_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I_I]]
+// int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vmlsl_high_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_u8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]]
+// NYI:   ret <8 x i16> [[SUB_I_I]]
+// uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
+//   return vmlsl_high_u8(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_u16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <4 x i32> [[SUB_I_I]]
+// uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
+//   return vmlsl_high_u16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmlsl_high_u32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]]
+// NYI:   ret <2 x i64> [[SUB_I_I]]
+// uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
+//   return vmlsl_high_u32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmull_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
+// NYI:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQDMULL_V2_I]]
+// int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
+//   return vqdmull_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmull_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
+// NYI:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQDMULL_V2_I]]
+// int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
+//   return vqdmull_s32(a, b);
+// }
+
+int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vqdmlal_s16(a, b, c);
+
+  // CIR-LABEL: vqdmlal_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vqdmlal_s16(<4 x i32>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]], <4 x i16>{{.*}}[[c:%.*]])
+  // LLVM:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+  // LLVM:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> {{.*}}, <4 x i32> [[VQDMLAL2_I]])
+}
+
+int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vqdmlal_s32(a, b, c);
+
+  // CIR-LABEL: vqdmlal_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vqdmlal_s32(<2 x i64>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]], <2 x i32>{{.*}}[[c:%.*]])
+  // LLVM:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+  // LLVM:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> {{.*}}, <2 x i64> [[VQDMLAL2_I]])
+}
+
+
+int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
+  return vqdmlsl_s16(a, b, c);
+
+  // CIR-LABEL: vqdmlsl_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s16i x 4>, !cir.vector<!s16i x 4>) -> !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM: {{.*}}test_vqdmlsl_s16(<4 x i32>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]], <4 x i16>{{.*}}[[c:%.*]])
+  // LLVM:   [[VQDMLSL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> {{.*}}, <4 x i16> {{.*}})
+  // LLVM:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> {{.*}}, <4 x i32> [[VQDMLSL2_I]])
+}
+
+int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
+  return vqdmlsl_s32(a, b, c);
+
+  // CIR-LABEL: vqdmlsl_s32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqdmull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s32i x 2>, !cir.vector<!s32i x 2>) -> !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM: {{.*}}test_vqdmlsl_s32(<2 x i64>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]], <2 x i32>{{.*}}[[c:%.*]])
+  // LLVM:   [[VQDMLSL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> {{.*}}, <2 x i32> {{.*}})
+  // LLVM:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> {{.*}}, <2 x i64> [[VQDMLSL2_I]])
+}
+
+// NYI-LABEL: @test_vqdmull_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8>
+// NYI:   ret <4 x i32> [[VQDMULL_V2_I_I]]
+// int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) {
+//   return vqdmull_high_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmull_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VQDMULL_V2_I_I]]
+// int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) {
+//   return vqdmull_high_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmlal_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
+// NYI:   ret <4 x i32> [[VQDMLAL_V3_I_I]]
+// int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vqdmlal_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlal_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
+// NYI:   ret <2 x i64> [[VQDMLAL_V3_I_I]]
+// int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vqdmlal_high_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlsl_high_s16(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]])
+// NYI:   ret <4 x i32> [[VQDMLSL_V3_I_I]]
+// int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) {
+//   return vqdmlsl_high_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlsl_high_s32(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3>
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8>
+// NYI:   [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]])
+// NYI:   [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]])
+// NYI:   ret <2 x i64> [[VQDMLSL_V3_I_I]]
+// int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) {
+//   return vqdmlsl_high_s32(a, b, c);
+// }
+
+poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
+  return vmull_p8(a, b);
+
+  // CIR-LABEL: vmull_p8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.pmull" {{%.*}}, {{%.*}} :
+  // CIR-SAME: (!cir.vector<!s8i x 8>, !cir.vector<!s8i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{.*}}test_vmull_p8(<8 x i8>{{.*}}[[A:%.*]], <8 x i8>{{.*}}[[B:%.*]])
+  // LLVM: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> {{.*}}, <8 x i8> {{.*}})
+}
+
+// NYI-LABEL: @test_vmull_high_p8(
+// NYI:   [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// NYI:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]])
+// NYI:   ret <8 x i16> [[VMULL_I_I]]
+// poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) {
+//   return vmull_high_p8(a, b);
+// }
+
+int64_t test_vaddd_s64(int64_t a, int64_t b) {
+  return vaddd_s64(a, b);
+
+  // CIR-LABEL: vaddd_s64
+  // CIR: {{%.*}} = cir.binop(add, {{%.*}}, {{%.*}}) : !s64i
+
+  // LLVM-LABEL: @test_vaddd_s64
+  // LLVM-SAME: (i64 [[a:%.]], i64 [[b:%.]])
+  // LLVM:   [[VADDD_I:%.*]]  = add i64 {{.*}}, {{.*}}
+  // LLVM:   ret i64 [[VADDD_I]]
+}
+
+uint64_t test_vaddd_u64(uint64_t a, uint64_t b) {
+   return vaddd_u64(a, b);
+
+  // CIR-LABEL: vaddd_u64
+  // CIR: {{%.*}} = cir.binop(add, {{%.*}}, {{%.*}}) : !u64i
+
+  // LLVM-LABEL: @test_vaddd_u64
+  // LLVM-SAME: (i64 [[a:%.]], i64 [[b:%.]])
+  // LLVM:   [[VADDD_I:%.*]]  = add i64 {{.*}}, {{.*}}
+  // LLVM:   ret i64 [[VADDD_I]]
+}
+
+int64_t test_vsubd_s64(int64_t a, int64_t b) {
+  return vsubd_s64(a, b);
+
+  // CIR-LABEL: vsubd_s64
+  // CIR: {{%.*}} = cir.binop(sub, {{%.*}}, {{%.*}}) : !s64i
+
+  // LLVM-LABEL: @test_vsubd_s64
+  // LLVM-SAME: (i64 [[a:%.]], i64 [[b:%.]])
+  // LLVM:   [[VSUBD_I:%.*]]  = sub i64 {{.*}}, {{.*}}
+  // LLVM:   ret i64 [[VSUBD_I]]
+}
+
+uint64_t test_vsubd_u64(uint64_t a, uint64_t b) {
+  return vsubd_u64(a, b);
+
+  // CIR-LABEL: vsubd_u64
+  // CIR: {{%.*}} = cir.binop(sub, {{%.*}}, {{%.*}}) : !u64i
+
+  // LLVM-LABEL: @test_vsubd_u64
+  // LLVM-SAME: (i64 [[a:%.]], i64 [[b:%.]])
+  // LLVM:   [[VSUBD_I:%.*]]  = sub i64 {{.*}}, {{.*}}
+  // LLVM:   ret i64 [[VSUBD_I]]
+}
+
+// NYI-LABEL: @test_vqaddb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vqaddb_s8(int8_t a, int8_t b) {
+//   return vqaddb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqaddh_s16(int16_t a, int16_t b) {
+//   return vqaddh_s16(a, b);
+// }
+
+int32_t test_vqadds_s32(int32_t a, int32_t b) {
+  return vqadds_s32(a, b);
+
+  // CIR: vqadds_s32
+  // CIR: cir.binop(add, {{%.*}}, {{%.*}}) sat : !s32i
+
+  // LLVM:{{.*}}test_vqadds_s32(i32{{.*}}[[a:%.*]], i32{{.*}}[[b:%.*]])
+  // LLVM:   [[VQADDS_S32_I:%.*]] = call i32 @llvm.sadd.sat.i32(i32 {{.*}}, i32 {{.*}})
+  // LLVM:   ret i32 [[VQADDS_S32_I]]
+}
+
+int64_t test_vqaddd_s64(int64_t a, int64_t b) {
+  return vqaddd_s64(a, b);
+
+  // CIR: vqaddd_s64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sqadd" {{%.*}}, {{%.*}} : (!s64i, !s64i) -> !s64i
+
+  // LLVM-LABEL: @test_vqaddd_s64
+  // LLVM-SAME: (i64{{.*}}[[a:%.*]], i64{{.*}}[[b:%.*]])
+  // LLVM:   [[VQADD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 {{.*}}, i64 {{.*}})
+  // LLVM:   ret i64 [[VQADD_S64_I]]
+}
+
+uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) {
+  return vqaddd_u64(a, b);
+
+  // CIR: vqaddd_u64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uqadd" {{%.*}}, {{%.*}} : (!u64i, !u64i) -> !u64i
+
+  // LLVM-LABEL: @test_vqaddd_u64
+  // LLVM-SAME: (i64{{.*}}[[a:%.*]], i64{{.*}}[[b:%.*]])
+  // LLVM:   [[VQADD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 {{.*}}, i64 {{.*}})
+  // LLVM:   ret i64 [[VQADD_U64_I]]
+}
+
+// NYI-LABEL: @test_vqaddb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) {
+//   return vqaddb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqaddh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) {
+//   return vqaddh_u16(a, b);
+// }
+
+uint32_t test_vqadds_u32(uint32_t a, uint32_t b) {
+  return vqadds_u32(a, b);
+
+  // CIR: vqadds_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uqadd" {{%.*}}, {{%.*}} : (!u32i, !u32i) -> !u32i
+
+  // LLVM-LABEL: @test_vqadds_u32
+  // LLVM-SAME: (i32{{.*}}[[a:%.*]], i32{{.*}}[[b:%.*]])
+  // LLVM:   [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 {{.*}}, i32 {{.*}})
+  // LLVM:   ret i32 [[VQADDS_U32_I]]
+}
+
+
+// NYI-LABEL: @test_vqsubb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vqsubb_s8(int8_t a, int8_t b) {
+//   return vqsubb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqsubh_s16(int16_t a, int16_t b) {
+//   return vqsubh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubs_s32(
+// NYI:   [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQSUBS_S32_I]]
+int32_t test_vqsubs_s32(int32_t a, int32_t b) {
+  return vqsubs_s32(a, b);
+
+  // CIR: vqsubs_s32
+  // CIR: cir.binop(sub, {{%.*}}, {{%.*}}) sat : !s32i
+
+  // LLVM:{{.*}}test_vqsubs_s32(i32{{.*}}[[a:%.*]], i32{{.*}}[[b:%.*]])
+  // LLVM:   [[VQSUBS_S32_I:%.*]] = call i32 @llvm.ssub.sat.i32(i32 {{.*}}, i32 {{.*}})
+  // LLVM:   ret i32 [[VQSUBS_S32_I]]
+}
+
+int64_t test_vqsubd_s64(int64_t a, int64_t b) {
+  return vqsubd_s64(a, b);
+
+  // CIR: vqsubd_s64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sqsub" {{%.*}}, {{%.*}} : (!s64i, !s64i) -> !s64i
+
+  // LLVM-LABEL: @test_vqsubd_s64
+  // LLVM-SAME: (i64{{.*}}[[a:%.*]], i64{{.*}}[[b:%.*]])
+  // LLVM:   [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 {{.*}}, i64 {{.*}})
+  // LLVM:   ret i64 [[VQSUBD_S64_I]]
+}
+
+// NYI-LABEL: @test_vqsubb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) {
+//   return vqsubb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) {
+//   return vqsubh_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubs_u32(
+// NYI:   [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQSUBS_U32_I]]
+// uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) {
+//   return vqsubs_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqsubd_u64(
+// NYI:   [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQSUBD_U64_I]]
+// uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) {
+//   return vqsubd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vshld_s64(
+// NYI:   [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VSHLD_S64_I]]
+// int64_t test_vshld_s64(int64_t a, int64_t b) {
+//   return vshld_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vshld_u64(
+// NYI:   [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VSHLD_U64_I]]
+// uint64_t test_vshld_u64(uint64_t a, int64_t b) {
+//   return vshld_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vqshlb_s8(int8_t a, int8_t b) {
+//   return vqshlb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqshlh_s16(int16_t a, int16_t b) {
+//   return vqshlh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshls_s32(
+// NYI:   [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQSHLS_S32_I]]
+// int32_t test_vqshls_s32(int32_t a, int32_t b) {
+//   return vqshls_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshld_s64(
+// NYI:   [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQSHLD_S64_I]]
+// int64_t test_vqshld_s64(int64_t a, int64_t b) {
+//   return vqshld_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vqshlb_u8(uint8_t a, int8_t b) {
+//   return vqshlb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqshlh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vqshlh_u16(uint16_t a, int16_t b) {
+//   return vqshlh_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqshls_u32(
+// NYI:   [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQSHLS_U32_I]]
+// uint32_t test_vqshls_u32(uint32_t a, int32_t b) {
+//   return vqshls_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqshld_u64(
+// NYI:   [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQSHLD_U64_I]]
+// uint64_t test_vqshld_u64(uint64_t a, int64_t b) {
+//   return vqshld_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vrshld_s64(
+// NYI:   [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VRSHLD_S64_I]]
+// int64_t test_vrshld_s64(int64_t a, int64_t b) {
+//   return vrshld_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vrshld_u64(
+// NYI:   [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VRSHLD_U64_I]]
+// uint64_t test_vrshld_u64(uint64_t a, int64_t b) {
+//   return vrshld_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vqrshlb_s8(int8_t a, int8_t b) {
+//   return vqrshlb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqrshlh_s16(int16_t a, int16_t b) {
+//   return vqrshlh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshls_s32(
+// NYI:   [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQRSHLS_S32_I]]
+// int32_t test_vqrshls_s32(int32_t a, int32_t b) {
+//   return vqrshls_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshld_s64(
+// NYI:   [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQRSHLD_S64_I]]
+// int64_t test_vqrshld_s64(int64_t a, int64_t b) {
+//   return vqrshld_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vqrshlb_u8(uint8_t a, int8_t b) {
+//   return vqrshlb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshlh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vqrshlh_u16(uint16_t a, int16_t b) {
+//   return vqrshlh_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshls_u32(
+// NYI:   [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQRSHLS_U32_I]]
+// uint32_t test_vqrshls_u32(uint32_t a, int32_t b) {
+//   return vqrshls_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrshld_u64(
+// NYI:   [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VQRSHLD_U64_I]]
+// uint64_t test_vqrshld_u64(uint64_t a, int64_t b) {
+//   return vqrshld_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddd_s64(
+// NYI:   [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
+// NYI:   ret i64 [[VPADDD_S64_I]]
+// int64_t test_vpaddd_s64(int64x2_t a) {
+//   return vpaddd_s64(a);
+// }
+
+// NYI-LABEL: @test_vpadds_f32(
+// NYI:   [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
+// NYI:   [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
+// NYI:   [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
+// NYI:   ret float [[VPADDD_I]]
+// float32_t test_vpadds_f32(float32x2_t a) {
+//   return vpadds_f32(a);
+// }
+
+// NYI-LABEL: @test_vpaddd_f64(
+// NYI:   [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
+// NYI:   [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
+// NYI:   [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
+// NYI:   ret double [[VPADDD_I]]
+// float64_t test_vpaddd_f64(float64x2_t a) {
+//   return vpaddd_f64(a);
+// }
+
+// NYI-LABEL: @test_vpmaxnms_f32(
+// NYI:   [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VPMAXNMS_F32_I]]
+// float32_t test_vpmaxnms_f32(float32x2_t a) {
+//   return vpmaxnms_f32(a);
+// }
+
+// NYI-LABEL: @test_vpmaxnmqd_f64(
+// NYI:   [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VPMAXNMQD_F64_I]]
+// float64_t test_vpmaxnmqd_f64(float64x2_t a) {
+//   return vpmaxnmqd_f64(a);
+// }
+
+// NYI-LABEL: @test_vpmaxs_f32(
+// NYI:   [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VPMAXS_F32_I]]
+// float32_t test_vpmaxs_f32(float32x2_t a) {
+//   return vpmaxs_f32(a);
+// }
+
+// NYI-LABEL: @test_vpmaxqd_f64(
+// NYI:   [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VPMAXQD_F64_I]]
+// float64_t test_vpmaxqd_f64(float64x2_t a) {
+//   return vpmaxqd_f64(a);
+// }
+
+// NYI-LABEL: @test_vpminnms_f32(
+// NYI:   [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VPMINNMS_F32_I]]
+// float32_t test_vpminnms_f32(float32x2_t a) {
+//   return vpminnms_f32(a);
+// }
+
+// NYI-LABEL: @test_vpminnmqd_f64(
+// NYI:   [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VPMINNMQD_F64_I]]
+// float64_t test_vpminnmqd_f64(float64x2_t a) {
+//   return vpminnmqd_f64(a);
+// }
+
+// NYI-LABEL: @test_vpmins_f32(
+// NYI:   [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VPMINS_F32_I]]
+// float32_t test_vpmins_f32(float32x2_t a) {
+//   return vpmins_f32(a);
+// }
+
+// NYI-LABEL: @test_vpminqd_f64(
+// NYI:   [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
+// NYI:   ret double [[VPMINQD_F64_I]]
+// float64_t test_vpminqd_f64(float64x2_t a) {
+//   return vpminqd_f64(a);
+// }
+
+// NYI-LABEL: @test_vqdmulhh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqdmulhh_s16(int16_t a, int16_t b) {
+//   return vqdmulhh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmulhs_s32(
+// NYI:   [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VQDMULHS_S32_I]]
+// int32_t test_vqdmulhs_s32(int32_t a, int32_t b) {
+//   return vqdmulhs_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqrdmulhh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) {
+//   return vqrdmulhh_s16(a, b);
+// }
+
+int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) {
+  return vqrdmulhs_s32(a, b);
+
+  // CIR-LABEL: vqrdmulhs_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sqrdmulh" {{%.*}}, {{%.*}} : (!s32i, !s32i) -> !s32i
+
+  // LLVM: {{.*}}test_vqrdmulhs_s32(i32{{.*}}[[a:%.*]], i32{{.*}}[[b:%.*]])
+  // LLVM:   [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 {{.*}}, i32 {{.*}})
+  // LLVM:   ret i32 [[VQRDMULHS_S32_I]]
+}
+
+// NYI-LABEL: @test_vmulxs_f32(
+// NYI:   [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b)
+// NYI:   ret float [[VMULXS_F32_I]]
+// float32_t test_vmulxs_f32(float32_t a, float32_t b) {
+//   return vmulxs_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vmulxd_f64(
+// NYI:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b)
+// NYI:   ret double [[VMULXD_F64_I]]
+// float64_t test_vmulxd_f64(float64_t a, float64_t b) {
+//   return vmulxd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmulx_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VMULX2_I]]
+// float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) {
+//   return vmulx_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vrecpss_f32(
+// NYI:   [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b)
+// NYI:   ret float [[VRECPS_I]]
+// float32_t test_vrecpss_f32(float32_t a, float32_t b) {
+//   return vrecpss_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vrecpsd_f64(
+// NYI:   [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b)
+// NYI:   ret double [[VRECPS_I]]
+// float64_t test_vrecpsd_f64(float64_t a, float64_t b) {
+//   return vrecpsd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vrsqrtss_f32(
+// NYI:   [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b)
+// NYI:   ret float [[VRSQRTSS_F32_I]]
+// float32_t test_vrsqrtss_f32(float32_t a, float32_t b) {
+//   return vrsqrtss_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vrsqrtsd_f64(
+// NYI:   [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b)
+// NYI:   ret double [[VRSQRTSD_F64_I]]
+// float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) {
+//   return vrsqrtsd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcvts_f32_s32(
+// NYI:   [[TMP0:%.*]] = sitofp i32 %a to float
+// NYI:   ret float [[TMP0]]
+// float32_t test_vcvts_f32_s32(int32_t a) {
+//   return vcvts_f32_s32(a);
+// }
+
+// NYI-LABEL: @test_vcvtd_f64_s64(
+// NYI:   [[TMP0:%.*]] = sitofp i64 %a to double
+// NYI:   ret double [[TMP0]]
+// float64_t test_vcvtd_f64_s64(int64_t a) {
+//   return vcvtd_f64_s64(a);
+// }
+
+// NYI-LABEL: @test_vcvts_f32_u32(
+// NYI:   [[TMP0:%.*]] = uitofp i32 %a to float
+// NYI:   ret float [[TMP0]]
+// float32_t test_vcvts_f32_u32(uint32_t a) {
+//   return vcvts_f32_u32(a);
+// }
+
+// NYI-LABEL: @test_vcvtd_f64_u64(
+// NYI:   [[TMP0:%.*]] = uitofp i64 %a to double
+// NYI:   ret double [[TMP0]]
+// float64_t test_vcvtd_f64_u64(uint64_t a) {
+//   return vcvtd_f64_u64(a);
+// }
+
+// NYI-LABEL: @test_vrecpes_f32(
+// NYI:   [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a)
+// NYI:   ret float [[VRECPES_F32_I]]
+// float32_t test_vrecpes_f32(float32_t a) {
+//   return vrecpes_f32(a);
+// }
+
+// NYI-LABEL: @test_vrecped_f64(
+// NYI:   [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a)
+// NYI:   ret double [[VRECPED_F64_I]]
+// float64_t test_vrecped_f64(float64_t a) {
+//   return vrecped_f64(a);
+// }
+
+// NYI-LABEL: @test_vrecpxs_f32(
+// NYI:   [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a)
+// NYI:   ret float [[VRECPXS_F32_I]]
+// float32_t test_vrecpxs_f32(float32_t a) {
+//   return vrecpxs_f32(a);
+// }
+
+// NYI-LABEL: @test_vrecpxd_f64(
+// NYI:   [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a)
+// NYI:   ret double [[VRECPXD_F64_I]]
+// float64_t test_vrecpxd_f64(float64_t a) {
+//   return vrecpxd_f64(a);
+// }
+
+// NYI-LABEL: @test_vrsqrte_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a)
+// NYI:   ret <2 x i32> [[VRSQRTE_V1_I]]
+// uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
+//   return vrsqrte_u32(a);
+// }
+
+// NYI-LABEL: @test_vrsqrteq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a)
+// NYI:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
+// uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
+//   return vrsqrteq_u32(a);
+// }
+
+// NYI-LABEL: @test_vrsqrtes_f32(
+// NYI:   [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a)
+// NYI:   ret float [[VRSQRTES_F32_I]]
+// float32_t test_vrsqrtes_f32(float32_t a) {
+//   return vrsqrtes_f32(a);
+// }
+
+// NYI-LABEL: @test_vrsqrted_f64(
+// NYI:   [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a)
+// NYI:   ret double [[VRSQRTED_F64_I]]
+// float64_t test_vrsqrted_f64(float64_t a) {
+//   return vrsqrted_f64(a);
+// }
+
+uint8x16_t test_vld1q_u8(uint8_t const *a) {
+  return vld1q_u8(a);
+  // CIR-LABEL: @test_vld1q_u8
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: cir.load align(1) %[[CAST]] : !cir.ptr<!cir.vector<!u8i x 16>>, !cir.vector<!u8i x 16>
+
+  // LLVM-LABEL: @test_vld1q_u8
+  // LLVM:   [[TMP1:%.*]] = load <16 x i8>, ptr %0, align 1
+}
+
+uint16x8_t test_vld1q_u16(uint16_t const *a) {
+  return vld1q_u16(a);
+  // CIR-LABEL: @test_vld1q_u16
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u16i x 8>>
+  // CIR: cir.load align(2) %[[CAST]] : !cir.ptr<!cir.vector<!u16i x 8>>, !cir.vector<!u16i x 8>
+
+  // LLVM-LABEL: @test_vld1q_u16
+  // LLVM:   [[TMP1:%.*]] = load <8 x i16>, ptr %0, align 2
+}
+
+uint32x4_t test_vld1q_u32(uint32_t const *a) {
+  return vld1q_u32(a);
+  // CIR-LABEL: @test_vld1q_u32
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: cir.load align(4) %[[CAST]] : !cir.ptr<!cir.vector<!u32i x 4>>, !cir.vector<!u32i x 4>
+
+  // LLVM-LABEL: @test_vld1q_u32
+  // LLVM:   [[TMP1:%.*]] = load <4 x i32>, ptr %0, align 4
+}
+
+uint64x2_t test_vld1q_u64(uint64_t const *a) {
+  return vld1q_u64(a);
+  // CIR-LABEL: @test_vld1q_u64
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u64i x 2>>
+  // CIR: cir.load align(8) %[[CAST]] : !cir.ptr<!cir.vector<!u64i x 2>>, !cir.vector<!u64i x 2>
+
+  // LLVM-LABEL: @test_vld1q_u64
+  // LLVM:   [[TMP1:%.*]] = load <2 x i64>, ptr %0, align 8
+}
+
+int8x16_t test_vld1q_s8(int8_t const *a) {
+  return vld1q_s8(a);
+  // CIR-LABEL: @test_vld1q_s8
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s8i x 16>>
+  // CIR: cir.load align(1) %[[CAST]] : !cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!s8i x 16>
+
+  // LLVM-LABEL: @test_vld1q_s8
+  // LLVM:   [[TMP1:%.*]] = load <16 x i8>, ptr %0, align 1
+}
+
+int16x8_t test_vld1q_s16(int16_t const *a) {
+  return vld1q_s16(a);
+  // CIR-LABEL: @test_vld1q_s16
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: cir.load align(2) %[[CAST]] : !cir.ptr<!cir.vector<!s16i x 8>>, !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_vld1q_s16
+  // LLVM:   [[TMP1:%.*]] = load <8 x i16>, ptr %0, align 2
+}
+
+int32x4_t test_vld1q_s32(int32_t const *a) {
+  return vld1q_s32(a);
+  // CIR-LABEL: @test_vld1q_s32
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR: cir.load align(4) %[[CAST]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_vld1q_s32
+  // LLVM:   [[TMP1:%.*]] = load <4 x i32>, ptr %0, align 4
+}
+
+int64x2_t test_vld1q_s64(int64_t const *a) {
+  return vld1q_s64(a);
+  // CIR-LABEL: @test_vld1q_s64
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s64i x 2>>
+  // CIR: cir.load align(8) %[[CAST]] : !cir.ptr<!cir.vector<!s64i x 2>>, !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_vld1q_s64
+  // LLVM:   [[TMP1:%.*]] = load <2 x i64>, ptr %0, align 8
+}
+
+// NYI-LABEL: @test_vld1q_f16(
+// NYI:   [[TMP2:%.*]] = load <8 x half>, ptr %a, align 2
+// NYI:   ret <8 x half> [[TMP2]]
+// float16x8_t test_vld1q_f16(float16_t const *a) {
+//   return vld1q_f16(a);
+// }
+
+// NYI-LABEL: @test_vld1q_f32(
+// NYI:   [[TMP2:%.*]] = load <4 x float>, ptr %a, align 4
+// NYI:   ret <4 x float> [[TMP2]]
+// float32x4_t test_vld1q_f32(float32_t const *a) {
+//   return vld1q_f32(a);
+// }
+
+// NYI-LABEL: @test_vld1q_f64(
+// NYI:   [[TMP2:%.*]] = load <2 x double>, ptr %a, align 8
+// NYI:   ret <2 x double> [[TMP2]]
+// float64x2_t test_vld1q_f64(float64_t const *a) {
+//   return vld1q_f64(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p8(
+// NYI:   [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1
+// NYI:   ret <16 x i8> [[TMP1]]
+// poly8x16_t test_vld1q_p8(poly8_t const *a) {
+//   return vld1q_p8(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p16(
+// NYI:   [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2
+// NYI:   ret <8 x i16> [[TMP2]]
+// poly16x8_t test_vld1q_p16(poly16_t const *a) {
+//   return vld1q_p16(a);
+// }
+
+// NYI-LABEL: @test_vld1_u8(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// uint8x8_t test_vld1_u8(uint8_t const *a) {
+//   return vld1_u8(a);
+// }
+
+// NYI-LABEL: @test_vld1_u16(
+// NYI:   [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
+// NYI:   ret <4 x i16> [[TMP2]]
+// uint16x4_t test_vld1_u16(uint16_t const *a) {
+//   return vld1_u16(a);
+// }
+
+// NYI-LABEL: @test_vld1_u32(
+// NYI:   [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
+// NYI:   ret <2 x i32> [[TMP2]]
+// uint32x2_t test_vld1_u32(uint32_t const *a) {
+//   return vld1_u32(a);
+// }
+
+// NYI-LABEL: @test_vld1_u64(
+// NYI:   [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
+// NYI:   ret <1 x i64> [[TMP2]]
+// uint64x1_t test_vld1_u64(uint64_t const *a) {
+//   return vld1_u64(a);
+// }
+
+// NYI-LABEL: @test_vld1_s8(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// int8x8_t test_vld1_s8(int8_t const *a) {
+//   return vld1_s8(a);
+// }
+
+// NYI-LABEL: @test_vld1_s16(
+// NYI:   [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
+// NYI:   ret <4 x i16> [[TMP2]]
+// int16x4_t test_vld1_s16(int16_t const *a) {
+//   return vld1_s16(a);
+// }
+
+// NYI-LABEL: @test_vld1_s32(
+// NYI:   [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4
+// NYI:   ret <2 x i32> [[TMP2]]
+// int32x2_t test_vld1_s32(int32_t const *a) {
+//   return vld1_s32(a);
+// }
+
+// NYI-LABEL: @test_vld1_s64(
+// NYI:   [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8
+// NYI:   ret <1 x i64> [[TMP2]]
+// int64x1_t test_vld1_s64(int64_t const *a) {
+//   return vld1_s64(a);
+// }
+
+// NYI-LABEL: @test_vld1_f16(
+// NYI:   [[TMP2:%.*]] = load <4 x half>, ptr %a, align 2
+// NYI:   ret <4 x half> [[TMP2]]
+// float16x4_t test_vld1_f16(float16_t const *a) {
+//   return vld1_f16(a);
+// }
+
+// NYI-LABEL: @test_vld1_f32(
+// NYI:   [[TMP2:%.*]] = load <2 x float>, ptr %a, align 4
+// NYI:   ret <2 x float> [[TMP2]]
+// float32x2_t test_vld1_f32(float32_t const *a) {
+//   return vld1_f32(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64(
+// NYI:   [[TMP2:%.*]] = load <1 x double>, ptr %a, align 8
+// NYI:   ret <1 x double> [[TMP2]]
+// float64x1_t test_vld1_f64(float64_t const *a) {
+//   return vld1_f64(a);
+// }
+
+// NYI-LABEL: @test_vld1_p8(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// poly8x8_t test_vld1_p8(poly8_t const *a) {
+//   return vld1_p8(a);
+// }
+
+// NYI-LABEL: @test_vld1_p16(
+// NYI:   [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2
+// NYI:   ret <4 x i16> [[TMP2]]
+// poly16x4_t test_vld1_p16(poly16_t const *a) {
+//   return vld1_p16(a);
+// }
+
+// NYI-LABEL: @test_vld1_u8_void(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// uint8x8_t test_vld1_u8_void(void *a) {
+//   return vld1_u8(a);
+// }
+
+// NYI-LABEL: @test_vld1_u16_void(
+// NYI:   [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
+// NYI:   ret <4 x i16> [[TMP1]]
+// uint16x4_t test_vld1_u16_void(void *a) {
+//   return vld1_u16(a);
+// }
+
+// NYI-LABEL: @test_vld1_u32_void(
+// NYI:   [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
+// NYI:   ret <2 x i32> [[TMP1]]
+// uint32x2_t test_vld1_u32_void(void *a) {
+//   return vld1_u32(a);
+// }
+
+// NYI-LABEL: @test_vld1_u64_void(
+// NYI:   [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
+// NYI:   ret <1 x i64> [[TMP1]]
+// uint64x1_t test_vld1_u64_void(void *a) {
+//   return vld1_u64(a);
+// }
+
+// NYI-LABEL: @test_vld1_s8_void(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// int8x8_t test_vld1_s8_void(void *a) {
+//   return vld1_s8(a);
+// }
+
+// NYI-LABEL: @test_vld1_s16_void(
+// NYI:   [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
+// NYI:   ret <4 x i16> [[TMP1]]
+// int16x4_t test_vld1_s16_void(void *a) {
+//   return vld1_s16(a);
+// }
+
+// NYI-LABEL: @test_vld1_s32_void(
+// NYI:   [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1
+// NYI:   ret <2 x i32> [[TMP1]]
+// int32x2_t test_vld1_s32_void(void *a) {
+//   return vld1_s32(a);
+// }
+
+// NYI-LABEL: @test_vld1_s64_void(
+// NYI:   [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1
+// NYI:   ret <1 x i64> [[TMP1]]
+// int64x1_t test_vld1_s64_void(void *a) {
+//   return vld1_s64(a);
+// }
+
+// NYI-LABEL: @test_vld1_f16_void(
+// NYI:   [[TMP1:%.*]] = load <4 x half>, ptr %a, align 1
+// NYI:   ret <4 x half> [[TMP1]]
+// float16x4_t test_vld1_f16_void(void *a) {
+//   return vld1_f16(a);
+// }
+
+// NYI-LABEL: @test_vld1_f32_void(
+// NYI:   [[TMP1:%.*]] = load <2 x float>, ptr %a, align 1
+// NYI:   ret <2 x float> [[TMP1]]
+// float32x2_t test_vld1_f32_void(void *a) {
+//   return vld1_f32(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64_void(
+// NYI:   [[TMP1:%.*]] = load <1 x double>, ptr %a, align 1
+// NYI:   ret <1 x double> [[TMP1]]
+// float64x1_t test_vld1_f64_void(void *a) {
+//   return vld1_f64(a);
+// }
+
+// NYI-LABEL: @test_vld1_p8_void(
+// NYI:   [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1
+// NYI:   ret <8 x i8> [[TMP1]]
+// poly8x8_t test_vld1_p8_void(void *a) {
+//   return vld1_p8(a);
+// }
+
+// NYI-LABEL: @test_vld1_p16_void(
+// NYI:   [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1
+// NYI:   ret <4 x i16> [[TMP1]]
+// poly16x4_t test_vld1_p16_void(void *a) {
+//   return vld1_p16(a);
+// }
+
+// NYI-LABEL: @test_vld2q_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint8x16x2_t [[TMP5]]
+// uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
+//   return vld2q_u8(a);
+// }
+
+// NYI-LABEL: @test_vld2q_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint16x8x2_t [[TMP6]]
+// uint16x8x2_t test_vld2q_u16(uint16_t const *a) {
+//   return vld2q_u16(a);
+// }
+
+// NYI-LABEL: @test_vld2q_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint32x4x2_t [[TMP6]]
+// uint32x4x2_t test_vld2q_u32(uint32_t const *a) {
+//   return vld2q_u32(a);
+// }
+
+// NYI-LABEL: @test_vld2q_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint64x2x2_t [[TMP6]]
+// uint64x2x2_t test_vld2q_u64(uint64_t const *a) {
+//   return vld2q_u64(a);
+// }
+
+// NYI-LABEL: @test_vld2q_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int8x16x2_t [[TMP5]]
+// int8x16x2_t test_vld2q_s8(int8_t const *a) {
+//   return vld2q_s8(a);
+// }
+
+// NYI-LABEL: @test_vld2q_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int16x8x2_t [[TMP6]]
+// int16x8x2_t test_vld2q_s16(int16_t const *a) {
+//   return vld2q_s16(a);
+// }
+
+// NYI-LABEL: @test_vld2q_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int32x4x2_t [[TMP6]]
+// int32x4x2_t test_vld2q_s32(int32_t const *a) {
+//   return vld2q_s32(a);
+// }
+
+// NYI-LABEL: @test_vld2q_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int64x2x2_t [[TMP6]]
+// int64x2x2_t test_vld2q_s64(int64_t const *a) {
+//   return vld2q_s64(a);
+// }
+
+// NYI-LABEL: @test_vld2q_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0(ptr %a)
+// NYI:   store { <8 x half>, <8 x half> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x8x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float16x8x2_t [[TMP6]]
+// float16x8x2_t test_vld2q_f16(float16_t const *a) {
+//   return vld2q_f16(a);
+// }
+
+// NYI-LABEL: @test_vld2q_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %a)
+// NYI:   store { <4 x float>, <4 x float> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float32x4x2_t [[TMP6]]
+// float32x4x2_t test_vld2q_f32(float32_t const *a) {
+//   return vld2q_f32(a);
+// }
+
+// NYI-LABEL: @test_vld2q_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x2_t [[TMP6]]
+// float64x2x2_t test_vld2q_f64(float64_t const *a) {
+//   return vld2q_f64(a);
+// }
+
+// NYI-LABEL: @test_vld2q_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly8x16x2_t [[TMP5]]
+// poly8x16x2_t test_vld2q_p8(poly8_t const *a) {
+//   return vld2q_p8(a);
+// }
+
+// NYI-LABEL: @test_vld2q_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// NYI:   [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly16x8x2_t [[TMP6]]
+// poly16x8x2_t test_vld2q_p16(poly16_t const *a) {
+//   return vld2q_p16(a);
+// }
+
+// NYI-LABEL: @test_vld2_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint8x8x2_t [[TMP5]]
+// uint8x8x2_t test_vld2_u8(uint8_t const *a) {
+//   return vld2_u8(a);
+// }
+
+// NYI-LABEL: @test_vld2_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint16x4x2_t [[TMP6]]
+// uint16x4x2_t test_vld2_u16(uint16_t const *a) {
+//   return vld2_u16(a);
+// }
+
+// NYI-LABEL: @test_vld2_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint32x2x2_t [[TMP6]]
+// uint32x2x2_t test_vld2_u32(uint32_t const *a) {
+//   return vld2_u32(a);
+// }
+
+// NYI-LABEL: @test_vld2_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint64x1x2_t [[TMP6]]
+// uint64x1x2_t test_vld2_u64(uint64_t const *a) {
+//   return vld2_u64(a);
+// }
+
+// NYI-LABEL: @test_vld2_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int8x8x2_t [[TMP5]]
+// int8x8x2_t test_vld2_s8(int8_t const *a) {
+//   return vld2_s8(a);
+// }
+
+// NYI-LABEL: @test_vld2_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int16x4x2_t [[TMP6]]
+// int16x4x2_t test_vld2_s16(int16_t const *a) {
+//   return vld2_s16(a);
+// }
+
+// NYI-LABEL: @test_vld2_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int32x2x2_t [[TMP6]]
+// int32x2x2_t test_vld2_s32(int32_t const *a) {
+//   return vld2_s32(a);
+// }
+
+// NYI-LABEL: @test_vld2_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int64x1x2_t [[TMP6]]
+// int64x1x2_t test_vld2_s64(int64_t const *a) {
+//   return vld2_s64(a);
+// }
+
+// NYI-LABEL: @test_vld2_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0(ptr %a)
+// NYI:   store { <4 x half>, <4 x half> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x4x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float16x4x2_t [[TMP6]]
+// float16x4x2_t test_vld2_f16(float16_t const *a) {
+//   return vld2_f16(a);
+// }
+
+// NYI-LABEL: @test_vld2_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %a)
+// NYI:   store { <2 x float>, <2 x float> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float32x2x2_t [[TMP6]]
+// float32x2x2_t test_vld2_f32(float32_t const *a) {
+//   return vld2_f32(a);
+// }
+
+// NYI-LABEL: @test_vld2_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x2_t [[TMP6]]
+// float64x1x2_t test_vld2_f64(float64_t const *a) {
+//   return vld2_f64(a);
+// }
+
+// NYI-LABEL: @test_vld2_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly8x8x2_t [[TMP5]]
+// poly8x8x2_t test_vld2_p8(poly8_t const *a) {
+//   return vld2_p8(a);
+// }
+
+// NYI-LABEL: @test_vld2_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// NYI:   [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly16x4x2_t [[TMP6]]
+// poly16x4x2_t test_vld2_p16(poly16_t const *a) {
+//   return vld2_p16(a);
+// }
+
+// NYI-LABEL: @test_vld3q_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x16x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint8x16x3_t [[TMP5]]
+// uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
+//   return vld3q_u8(a);
+// }
+
+// NYI-LABEL: @test_vld3q_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x8x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint16x8x3_t [[TMP6]]
+// uint16x8x3_t test_vld3q_u16(uint16_t const *a) {
+//   return vld3q_u16(a);
+// }
+
+// NYI-LABEL: @test_vld3q_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x4x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint32x4x3_t [[TMP6]]
+// uint32x4x3_t test_vld3q_u32(uint32_t const *a) {
+//   return vld3q_u32(a);
+// }
+
+// NYI-LABEL: @test_vld3q_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint64x2x3_t [[TMP6]]
+// uint64x2x3_t test_vld3q_u64(uint64_t const *a) {
+//   return vld3q_u64(a);
+// }
+
+// NYI-LABEL: @test_vld3q_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x16x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int8x16x3_t [[TMP5]]
+// int8x16x3_t test_vld3q_s8(int8_t const *a) {
+//   return vld3q_s8(a);
+// }
+
+// NYI-LABEL: @test_vld3q_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x8x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int16x8x3_t [[TMP6]]
+// int16x8x3_t test_vld3q_s16(int16_t const *a) {
+//   return vld3q_s16(a);
+// }
+
+// NYI-LABEL: @test_vld3q_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x4x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int32x4x3_t [[TMP6]]
+// int32x4x3_t test_vld3q_s32(int32_t const *a) {
+//   return vld3q_s32(a);
+// }
+
+// NYI-LABEL: @test_vld3q_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int64x2x3_t [[TMP6]]
+// int64x2x3_t test_vld3q_s64(int64_t const *a) {
+//   return vld3q_s64(a);
+// }
+
+// NYI-LABEL: @test_vld3q_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0(ptr %a)
+// NYI:   store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x8x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float16x8x3_t [[TMP6]]
+// float16x8x3_t test_vld3q_f16(float16_t const *a) {
+//   return vld3q_f16(a);
+// }
+
+// NYI-LABEL: @test_vld3q_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %a)
+// NYI:   store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x4x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float32x4x3_t [[TMP6]]
+// float32x4x3_t test_vld3q_f32(float32_t const *a) {
+//   return vld3q_f32(a);
+// }
+
+// NYI-LABEL: @test_vld3q_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x3_t [[TMP6]]
+// float64x2x3_t test_vld3q_f64(float64_t const *a) {
+//   return vld3q_f64(a);
+// }
+
+// NYI-LABEL: @test_vld3q_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x16x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly8x16x3_t [[TMP5]]
+// poly8x16x3_t test_vld3q_p8(poly8_t const *a) {
+//   return vld3q_p8(a);
+// }
+
+// NYI-LABEL: @test_vld3q_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// NYI:   [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x8x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly16x8x3_t [[TMP6]]
+// poly16x8x3_t test_vld3q_p16(poly16_t const *a) {
+//   return vld3q_p16(a);
+// }
+
+// NYI-LABEL: @test_vld3_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x8x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint8x8x3_t [[TMP5]]
+// uint8x8x3_t test_vld3_u8(uint8_t const *a) {
+//   return vld3_u8(a);
+// }
+
+// NYI-LABEL: @test_vld3_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x4x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint16x4x3_t [[TMP6]]
+// uint16x4x3_t test_vld3_u16(uint16_t const *a) {
+//   return vld3_u16(a);
+// }
+
+// NYI-LABEL: @test_vld3_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x2x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint32x2x3_t [[TMP6]]
+// uint32x2x3_t test_vld3_u32(uint32_t const *a) {
+//   return vld3_u32(a);
+// }
+
+// NYI-LABEL: @test_vld3_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint64x1x3_t [[TMP6]]
+// uint64x1x3_t test_vld3_u64(uint64_t const *a) {
+//   return vld3_u64(a);
+// }
+
+// NYI-LABEL: @test_vld3_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x8x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int8x8x3_t [[TMP5]]
+// int8x8x3_t test_vld3_s8(int8_t const *a) {
+//   return vld3_s8(a);
+// }
+
+// NYI-LABEL: @test_vld3_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x4x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int16x4x3_t [[TMP6]]
+// int16x4x3_t test_vld3_s16(int16_t const *a) {
+//   return vld3_s16(a);
+// }
+
+// NYI-LABEL: @test_vld3_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x2x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int32x2x3_t [[TMP6]]
+// int32x2x3_t test_vld3_s32(int32_t const *a) {
+//   return vld3_s32(a);
+// }
+
+// NYI-LABEL: @test_vld3_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int64x1x3_t [[TMP6]]
+// int64x1x3_t test_vld3_s64(int64_t const *a) {
+//   return vld3_s64(a);
+// }
+
+// NYI-LABEL: @test_vld3_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0(ptr %a)
+// NYI:   store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x4x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float16x4x3_t [[TMP6]]
+// float16x4x3_t test_vld3_f16(float16_t const *a) {
+//   return vld3_f16(a);
+// }
+
+// NYI-LABEL: @test_vld3_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %a)
+// NYI:   store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x2x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float32x2x3_t [[TMP6]]
+// float32x2x3_t test_vld3_f32(float32_t const *a) {
+//   return vld3_f32(a);
+// }
+
+// NYI-LABEL: @test_vld3_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x3_t [[TMP6]]
+// float64x1x3_t test_vld3_f64(float64_t const *a) {
+//   return vld3_f64(a);
+// }
+
+// NYI-LABEL: @test_vld3_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x8x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly8x8x3_t [[TMP5]]
+// poly8x8x3_t test_vld3_p8(poly8_t const *a) {
+//   return vld3_p8(a);
+// }
+
+// NYI-LABEL: @test_vld3_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// NYI:   [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x4x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly16x4x3_t [[TMP6]]
+// poly16x4x3_t test_vld3_p16(poly16_t const *a) {
+//   return vld3_p16(a);
+// }
+
+// NYI-LABEL: @test_vld4q_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x16x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint8x16x4_t [[TMP5]]
+// uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
+//   return vld4q_u8(a);
+// }
+
+// NYI-LABEL: @test_vld4q_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x8x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint16x8x4_t [[TMP6]]
+// uint16x8x4_t test_vld4q_u16(uint16_t const *a) {
+//   return vld4q_u16(a);
+// }
+
+// NYI-LABEL: @test_vld4q_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x4x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint32x4x4_t [[TMP6]]
+// uint32x4x4_t test_vld4q_u32(uint32_t const *a) {
+//   return vld4q_u32(a);
+// }
+
+// NYI-LABEL: @test_vld4q_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.uint64x2x4_t [[TMP6]]
+// uint64x2x4_t test_vld4q_u64(uint64_t const *a) {
+//   return vld4q_u64(a);
+// }
+
+// NYI-LABEL: @test_vld4q_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x16x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int8x16x4_t [[TMP5]]
+// int8x16x4_t test_vld4q_s8(int8_t const *a) {
+//   return vld4q_s8(a);
+// }
+
+// NYI-LABEL: @test_vld4q_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x8x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int16x8x4_t [[TMP6]]
+// int16x8x4_t test_vld4q_s16(int16_t const *a) {
+//   return vld4q_s16(a);
+// }
+
+// NYI-LABEL: @test_vld4q_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a)
+// NYI:   store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x4x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int32x4x4_t [[TMP6]]
+// int32x4x4_t test_vld4q_s32(int32_t const *a) {
+//   return vld4q_s32(a);
+// }
+
+// NYI-LABEL: @test_vld4q_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.int64x2x4_t [[TMP6]]
+// int64x2x4_t test_vld4q_s64(int64_t const *a) {
+//   return vld4q_s64(a);
+// }
+
+// NYI-LABEL: @test_vld4q_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0(ptr %a)
+// NYI:   store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x8x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float16x8x4_t [[TMP6]]
+// float16x8x4_t test_vld4q_f16(float16_t const *a) {
+//   return vld4q_f16(a);
+// }
+
+// NYI-LABEL: @test_vld4q_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %a)
+// NYI:   store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x4x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float32x4x4_t [[TMP6]]
+// float32x4x4_t test_vld4q_f32(float32_t const *a) {
+//   return vld4q_f32(a);
+// }
+
+// NYI-LABEL: @test_vld4q_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x4_t [[TMP6]]
+// float64x2x4_t test_vld4q_f64(float64_t const *a) {
+//   return vld4q_f64(a);
+// }
+
+// NYI-LABEL: @test_vld4q_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a)
+// NYI:   store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x16x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly8x16x4_t [[TMP5]]
+// poly8x16x4_t test_vld4q_p8(poly8_t const *a) {
+//   return vld4q_p8(a);
+// }
+
+// NYI-LABEL: @test_vld4q_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// NYI:   [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a)
+// NYI:   store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x8x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly16x8x4_t [[TMP6]]
+// poly16x8x4_t test_vld4q_p16(poly16_t const *a) {
+//   return vld4q_p16(a);
+// }
+
+// NYI-LABEL: @test_vld4_u8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.uint8x8x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint8x8x4_t [[TMP5]]
+// uint8x8x4_t test_vld4_u8(uint8_t const *a) {
+//   return vld4_u8(a);
+// }
+
+// NYI-LABEL: @test_vld4_u16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint16x4x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint16x4x4_t [[TMP6]]
+// uint16x4x4_t test_vld4_u16(uint16_t const *a) {
+//   return vld4_u16(a);
+// }
+
+// NYI-LABEL: @test_vld4_u32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint32x2x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint32x2x4_t [[TMP6]]
+// uint32x2x4_t test_vld4_u32(uint32_t const *a) {
+//   return vld4_u32(a);
+// }
+
+// NYI-LABEL: @test_vld4_u64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.uint64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.uint64x1x4_t [[TMP6]]
+// uint64x1x4_t test_vld4_u64(uint64_t const *a) {
+//   return vld4_u64(a);
+// }
+
+// NYI-LABEL: @test_vld4_s8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.int8x8x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int8x8x4_t [[TMP5]]
+// int8x8x4_t test_vld4_s8(int8_t const *a) {
+//   return vld4_s8(a);
+// }
+
+// NYI-LABEL: @test_vld4_s16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int16x4x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int16x4x4_t [[TMP6]]
+// int16x4x4_t test_vld4_s16(int16_t const *a) {
+//   return vld4_s16(a);
+// }
+
+// NYI-LABEL: @test_vld4_s32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a)
+// NYI:   store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int32x2x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int32x2x4_t [[TMP6]]
+// int32x2x4_t test_vld4_s32(int32_t const *a) {
+//   return vld4_s32(a);
+// }
+
+// NYI-LABEL: @test_vld4_s64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.int64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.int64x1x4_t [[TMP6]]
+// int64x1x4_t test_vld4_s64(int64_t const *a) {
+//   return vld4_s64(a);
+// }
+
+// NYI-LABEL: @test_vld4_f16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0(ptr %a)
+// NYI:   store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float16x4x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float16x4x4_t [[TMP6]]
+// float16x4x4_t test_vld4_f16(float16_t const *a) {
+//   return vld4_f16(a);
+// }
+
+// NYI-LABEL: @test_vld4_f32(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %a)
+// NYI:   store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float32x2x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float32x2x4_t [[TMP6]]
+// float32x2x4_t test_vld4_f32(float32_t const *a) {
+//   return vld4_f32(a);
+// }
+
+// NYI-LABEL: @test_vld4_f64(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x4_t [[TMP6]]
+// float64x1x4_t test_vld4_f64(float64_t const *a) {
+//   return vld4_f64(a);
+// }
+
+// NYI-LABEL: @test_vld4_p8(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a)
+// NYI:   store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP5:%.*]] = load %struct.poly8x8x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly8x8x4_t [[TMP5]]
+// poly8x8x4_t test_vld4_p8(poly8_t const *a) {
+//   return vld4_p8(a);
+// }
+
+// NYI-LABEL: @test_vld4_p16(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// NYI:   [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a)
+// NYI:   store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly16x4x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly16x4x4_t [[TMP6]]
+// poly16x4x4_t test_vld4_p16(poly16_t const *a) {
+//   return vld4_p16(a);
+// }
+
+void test_vst1q_u8(uint8_t *a, uint8x16_t b) {
+  vst1q_u8(a, b);
+  // CIR-LABEL: @test_vst1q_u8
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u8i x 16>>
+  // CIR: cir.store align(1) %{{.*}}, %[[CAST]] : !cir.vector<!u8i x 16>, !cir.ptr<!cir.vector<!u8i x 16>>
+
+  // LLVM-LABEL: @test_vst1q_u8
+  // LLVM:   store <16 x i8> %{{.*}}, ptr %0, align 1
+}
+
+void test_vst1q_u16(uint16_t *a, uint16x8_t b) {
+  vst1q_u16(a, b);
+  // CIR-LABEL: @test_vst1q_u16
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u16i x 8>>
+  // CIR: cir.store align(2) %{{.*}}, %[[CAST]] : !cir.vector<!u16i x 8>, !cir.ptr<!cir.vector<!u16i x 8>>
+
+  // LLVM-LABEL: @test_vst1q_u16
+  // LLVM:   store <8 x i16> %{{.*}}, ptr %0, align 2
+}
+
+void test_vst1q_u32(uint32_t *a, uint32x4_t b) {
+  vst1q_u32(a, b);
+  // CIR-LABEL: @test_vst1q_u32
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u32i x 4>>
+  // CIR: cir.store align(4) %{{.*}}, %[[CAST]] : !cir.vector<!u32i x 4>, !cir.ptr<!cir.vector<!u32i x 4>>
+
+  // LLVM-LABEL: @test_vst1q_u32
+  // LLVM:   store <4 x i32> %{{.*}}, ptr %0, align 4
+}
+
+void test_vst1q_u64(uint64_t *a, uint64x2_t b) {
+  vst1q_u64(a, b);
+  // CIR-LABEL: @test_vst1q_u64
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!u64i x 2>>
+  // CIR: cir.store align(8) %{{.*}}, %[[CAST]] : !cir.vector<!u64i x 2>, !cir.ptr<!cir.vector<!u64i x 2>>
+
+  // LLVM-LABEL: @test_vst1q_u64
+  // LLVM:   store <2 x i64> %{{.*}}, ptr %0, align 8
+}
+
+void test_vst1q_s8(int8_t *a, int8x16_t b) {
+  vst1q_s8(a, b);
+  // CIR-LABEL: @test_vst1q_s8
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s8i x 16>>
+  // CIR: cir.store align(1) %{{.*}}, %[[CAST]] : !cir.vector<!s8i x 16>, !cir.ptr<!cir.vector<!s8i x 16>>
+
+  // LLVM-LABEL: @test_vst1q_s8
+  // LLVM:   store <16 x i8> %{{.*}}, ptr %0, align 1
+}
+
+void test_vst1q_s16(int16_t *a, int16x8_t b) {
+  vst1q_s16(a, b);
+  // CIR-LABEL: @test_vst1q_s16
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s16i x 8>>
+  // CIR: cir.store align(2) %{{.*}}, %[[CAST]] : !cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>
+
+  // LLVM-LABEL: @test_vst1q_s16
+  // LLVM:   store <8 x i16> %{{.*}}, ptr %0, align 2
+}
+
+void test_vst1q_s32(int32_t *a, int32x4_t b) {
+  vst1q_s32(a, b);
+  // CIR-LABEL: @test_vst1q_s32
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR: cir.store align(4) %{{.*}}, %[[CAST]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM-LABEL: @test_vst1q_s32
+  // LLVM:   store <4 x i32> %{{.*}}, ptr %0, align 4
+}
+
+void test_vst1q_s64(int64_t *a, int64x2_t b) {
+  vst1q_s64(a, b);
+  // CIR-LABEL: @test_vst1q_s64
+  // CIR: %[[CAST:.*]] = cir.cast bitcast {{.*}} : !cir.ptr<!void> -> !cir.ptr<!cir.vector<!s64i x 2>>
+  // CIR: cir.store align(8) %{{.*}}, %[[CAST]] : !cir.vector<!s64i x 2>, !cir.ptr<!cir.vector<!s64i x 2>>
+
+  // LLVM-LABEL: @test_vst1q_s64
+  // LLVM:   store <2 x i64> %{{.*}}, ptr %0, align 8
+}
+
+// NYI-LABEL: @test_vst1q_f16(
+// NYI:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// NYI:   store <8 x half> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1q_f16(float16_t *a, float16x8_t b) {
+//   vst1q_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_f32(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
+// NYI:   store <4 x float> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1q_f32(float32_t *a, float32x4_t b) {
+//   vst1q_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_f64(
+// NYI:   [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double>
+// NYI:   store <2 x double> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1q_f64(float64_t *a, float64x2_t b) {
+//   vst1q_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p8(
+// NYI:   store <16 x i8> %b, ptr %a
+// NYI:   ret void
+// void test_vst1q_p8(poly8_t *a, poly8x16_t b) {
+//   vst1q_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p16(
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// NYI:   store <8 x i16> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1q_p16(poly16_t *a, poly16x8_t b) {
+//   vst1q_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_u8(
+// NYI:   store <8 x i8> %b, ptr %a
+// NYI:   ret void
+// void test_vst1_u8(uint8_t *a, uint8x8_t b) {
+//   vst1_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_u16(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   store <4 x i16> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_u16(uint16_t *a, uint16x4_t b) {
+//   vst1_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_u32(
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   store <2 x i32> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_u32(uint32_t *a, uint32x2_t b) {
+//   vst1_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_u64(
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   store <1 x i64> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_u64(uint64_t *a, uint64x1_t b) {
+//   vst1_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_s8(
+// NYI:   store <8 x i8> %b, ptr %a
+// NYI:   ret void
+// void test_vst1_s8(int8_t *a, int8x8_t b) {
+//   vst1_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_s16(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   store <4 x i16> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_s16(int16_t *a, int16x4_t b) {
+//   vst1_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_s32(
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// NYI:   store <2 x i32> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_s32(int32_t *a, int32x2_t b) {
+//   vst1_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_s64(
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   store <1 x i64> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_s64(int64_t *a, int64x1_t b) {
+//   vst1_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f16(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// NYI:   store <4 x half> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_f16(float16_t *a, float16x4_t b) {
+//   vst1_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f32(
+// NYI:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
+// NYI:   store <2 x float> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_f32(float32_t *a, float32x2_t b) {
+//   vst1_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f64(
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
+// NYI:   store <1 x double> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_f64(float64_t *a, float64x1_t b) {
+//   vst1_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p8(
+// NYI:   store <8 x i8> %b, ptr %a
+// NYI:   ret void
+// void test_vst1_p8(poly8_t *a, poly8x8_t b) {
+//   vst1_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p16(
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// NYI:   store <4 x i16> [[TMP3]], ptr %a
+// NYI:   ret void
+// void test_vst1_p16(poly16_t *a, poly16x4_t b) {
+//   vst1_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <16 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
+//   vst2q_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <8 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) {
+//   vst2q_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <4 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) {
+//   vst2q_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <2 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) {
+//   vst2q_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <16 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_s8(int8_t *a, int8x16x2_t b) {
+//   vst2q_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <8 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_s16(int16_t *a, int16x8x2_t b) {
+//   vst2q_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <4 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_s32(int32_t *a, int32x4x2_t b) {
+//   vst2q_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <2 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_s64(int64_t *a, int64x2x2_t b) {
+//   vst2q_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <8 x half>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// NYI:   call void @llvm.aarch64.neon.st2.v8f16.p0(<8 x half> [[TMP7]], <8 x half> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_f16(float16_t *a, float16x8x2_t b) {
+//   vst2q_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <4 x float>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// NYI:   call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> [[TMP7]], <4 x float> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_f32(float32_t *a, float32x4x2_t b) {
+//   vst2q_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <2 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_f64(float64_t *a, float64x2x2_t b) {
+//   vst2q_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <16 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) {
+//   vst2q_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2q_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <8 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) {
+//   vst2q_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <8 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2_u8(uint8_t *a, uint8x8x2_t b) {
+//   vst2_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <4 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_u16(uint16_t *a, uint16x4x2_t b) {
+//   vst2_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <2 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_u32(uint32_t *a, uint32x2x2_t b) {
+//   vst2_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <1 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_u64(uint64_t *a, uint64x1x2_t b) {
+//   vst2_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <8 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2_s8(int8_t *a, int8x8x2_t b) {
+//   vst2_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <4 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_s16(int16_t *a, int16x4x2_t b) {
+//   vst2_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <2 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_s32(int32_t *a, int32x2x2_t b) {
+//   vst2_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <1 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_s64(int64_t *a, int64x1x2_t b) {
+//   vst2_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <4 x half>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// NYI:   call void @llvm.aarch64.neon.st2.v4f16.p0(<4 x half> [[TMP7]], <4 x half> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_f16(float16_t *a, float16x4x2_t b) {
+//   vst2_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <2 x float>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// NYI:   call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> [[TMP7]], <2 x float> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_f32(float32_t *a, float32x2x2_t b) {
+//   vst2_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <1 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_f64(float64_t *a, float64x1x2_t b) {
+//   vst2_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <8 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a)
+// NYI:   ret void
+// void test_vst2_p8(poly8_t *a, poly8x8x2_t b) {
+//   vst2_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst2_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <4 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst2_p16(poly16_t *a, poly16x4x2_t b) {
+//   vst2_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <16 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) {
+//   vst3q_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <8 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) {
+//   vst3q_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <4 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) {
+//   vst3q_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <2 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) {
+//   vst3q_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <16 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_s8(int8_t *a, int8x16x3_t b) {
+//   vst3q_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <8 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_s16(int16_t *a, int16x8x3_t b) {
+//   vst3q_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <4 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_s32(int32_t *a, int32x4x3_t b) {
+//   vst3q_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <2 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_s64(int64_t *a, int64x2x3_t b) {
+//   vst3q_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <8 x half>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// NYI:   call void @llvm.aarch64.neon.st3.v8f16.p0(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_f16(float16_t *a, float16x8x3_t b) {
+//   vst3q_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <4 x float>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
+// NYI:   call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_f32(float32_t *a, float32x4x3_t b) {
+//   vst3q_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <2 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_f64(float64_t *a, float64x2x3_t b) {
+//   vst3q_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <16 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) {
+//   vst3q_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3q_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <8 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) {
+//   vst3q_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <8 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3_u8(uint8_t *a, uint8x8x3_t b) {
+//   vst3_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <4 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_u16(uint16_t *a, uint16x4x3_t b) {
+//   vst3_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <2 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_u32(uint32_t *a, uint32x2x3_t b) {
+//   vst3_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <1 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_u64(uint64_t *a, uint64x1x3_t b) {
+//   vst3_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <8 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3_s8(int8_t *a, int8x8x3_t b) {
+//   vst3_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <4 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_s16(int16_t *a, int16x4x3_t b) {
+//   vst3_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <2 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_s32(int32_t *a, int32x2x3_t b) {
+//   vst3_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <1 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_s64(int64_t *a, int64x1x3_t b) {
+//   vst3_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <4 x half>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// NYI:   call void @llvm.aarch64.neon.st3.v4f16.p0(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_f16(float16_t *a, float16x4x3_t b) {
+//   vst3_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <2 x float>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
+// NYI:   call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_f32(float32_t *a, float32x2x3_t b) {
+//   vst3_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <1 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_f64(float64_t *a, float64x1x3_t b) {
+//   vst3_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <8 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a)
+// NYI:   ret void
+// void test_vst3_p8(poly8_t *a, poly8x8x3_t b) {
+//   vst3_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst3_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <4 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst3_p16(poly16_t *a, poly16x4x3_t b) {
+//   vst3_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <16 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
+// NYI:   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) {
+//   vst4q_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <8 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) {
+//   vst4q_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <4 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) {
+//   vst4q_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <2 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) {
+//   vst4q_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <16 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
+// NYI:   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_s8(int8_t *a, int8x16x4_t b) {
+//   vst4q_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <8 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_s16(int16_t *a, int16x8x4_t b) {
+//   vst4q_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <4 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_s32(int32_t *a, int32x4x4_t b) {
+//   vst4q_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <2 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_s64(int64_t *a, int64x2x4_t b) {
+//   vst4q_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <8 x half>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
+// NYI:   call void @llvm.aarch64.neon.st4.v8f16.p0(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_f16(float16_t *a, float16x8x4_t b) {
+//   vst4q_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <4 x float>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
+// NYI:   call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_f32(float32_t *a, float32x4x4_t b) {
+//   vst4q_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <2 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_f64(float64_t *a, float64x2x4_t b) {
+//   vst4q_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <16 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16
+// NYI:   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) {
+//   vst4q_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4q_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <8 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) {
+//   vst4q_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_u8(
+// NYI:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <8 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
+// NYI:   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4_u8(uint8_t *a, uint8x8x4_t b) {
+//   vst4_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_u16(
+// NYI:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <4 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_u16(uint16_t *a, uint16x4x4_t b) {
+//   vst4_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_u32(
+// NYI:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <2 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_u32(uint32_t *a, uint32x2x4_t b) {
+//   vst4_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_u64(
+// NYI:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <1 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_u64(uint64_t *a, uint64x1x4_t b) {
+//   vst4_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_s8(
+// NYI:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <8 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
+// NYI:   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4_s8(int8_t *a, int8x8x4_t b) {
+//   vst4_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_s16(
+// NYI:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <4 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_s16(int16_t *a, int16x4x4_t b) {
+//   vst4_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_s32(
+// NYI:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <2 x i32>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
+// NYI:   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_s32(int32_t *a, int32x2x4_t b) {
+//   vst4_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_s64(
+// NYI:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <1 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_s64(int64_t *a, int64x1x4_t b) {
+//   vst4_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_f16(
+// NYI:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <4 x half>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
+// NYI:   call void @llvm.aarch64.neon.st4.v4f16.p0(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_f16(float16_t *a, float16x4x4_t b) {
+//   vst4_f16(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_f32(
+// NYI:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <2 x float>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
+// NYI:   call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_f32(float32_t *a, float32x2x4_t b) {
+//   vst4_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_f64(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <1 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_f64(float64_t *a, float64x1x4_t b) {
+//   vst4_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_p8(
+// NYI:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <8 x i8>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8
+// NYI:   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a)
+// NYI:   ret void
+// void test_vst4_p8(poly8_t *a, poly8x8x4_t b) {
+//   vst4_p8(a, b);
+// }
+
+// NYI-LABEL: @test_vst4_p16(
+// NYI:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <4 x i16>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
+// NYI:   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst4_p16(poly16_t *a, poly16x4x4_t b) {
+//   vst4_p16(a, b);
+// }
+
+// NYI-LABEL: @test_vld1q_f64_x2(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x2_t [[TMP6]]
+// float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
+//   return vld1q_f64_x2(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p64_x2(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly64x2x2_t [[TMP6]]
+// poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) {
+//   return vld1q_p64_x2(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64_x2(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x2_t [[TMP6]]
+// float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
+//   return vld1_f64_x2(a);
+// }
+
+// NYI-LABEL: @test_vld1_p64_x2(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly64x1x2_t [[TMP6]]
+// poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) {
+//   return vld1_p64_x2(a);
+// }
+
+// NYI-LABEL: @test_vld1q_f64_x3(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x3_t [[TMP6]]
+// float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
+//   return vld1q_f64_x3(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p64_x3(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly64x2x3_t [[TMP6]]
+// poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) {
+//   return vld1q_p64_x3(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64_x3(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x3_t [[TMP6]]
+// float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
+//   return vld1_f64_x3(a);
+// }
+
+// NYI-LABEL: @test_vld1_p64_x3(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly64x1x3_t [[TMP6]]
+// poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) {
+//   return vld1_p64_x3(a);
+// }
+
+// NYI-LABEL: @test_vld1q_f64_x4(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %a)
+// NYI:   store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.float64x2x4_t [[TMP6]]
+// float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
+//   return vld1q_f64_x4(a);
+// }
+
+// NYI-LABEL: @test_vld1q_p64_x4(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
+// NYI:   [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %a)
+// NYI:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16
+// NYI:   ret %struct.poly64x2x4_t [[TMP6]]
+// poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) {
+//   return vld1q_p64_x4(a);
+// }
+
+// NYI-LABEL: @test_vld1_f64_x4(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %a)
+// NYI:   store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.float64x1x4_t [[TMP6]]
+// float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
+//   return vld1_f64_x4(a);
+// }
+
+// NYI-LABEL: @test_vld1_p64_x4(
+// NYI:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
+// NYI:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
+// NYI:   [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %a)
+// NYI:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]]
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
+// NYI:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8
+// NYI:   ret %struct.poly64x1x4_t [[TMP6]]
+// poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) {
+//   return vld1_p64_x4(a);
+// }
+
+// NYI-LABEL: @test_vst1q_f64_x2(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <2 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) {
+//   vst1q_f64_x2(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p64_x2(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <2 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) {
+//   vst1q_p64_x2(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f64_x2(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <1 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) {
+//   vst1_f64_x2(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p64_x2(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [2 x <1 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 16, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a)
+// NYI:   ret void
+// void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) {
+//   vst1_p64_x2(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_f64_x3(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <2 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) {
+//   vst1q_f64_x3(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p64_x3(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <2 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 48, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) {
+//   vst1q_p64_x3(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f64_x3(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <1 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) {
+//   vst1_f64_x3(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p64_x3(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [3 x <1 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 24, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a)
+// NYI:   ret void
+// void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) {
+//   vst1_p64_x3(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_f64_x4(
+// NYI:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <2 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) {
+//   vst1q_f64_x4(a, b);
+// }
+
+// NYI-LABEL: @test_vst1q_p64_x4(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <2 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 16
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 {{.*}}, i64 64, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
+// NYI:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16
+// NYI:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16
+// NYI:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16
+// NYI:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) {
+//   vst1q_p64_x4(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_f64_x4(
+// NYI:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <1 x double>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double>
+// NYI:   call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) {
+//   vst1_f64_x4(a, b);
+// }
+
+// NYI-LABEL: @test_vst1_p64_x4(
+// NYI:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
+// NYI:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
+// NYI:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr {{.*}}, i32 0, i32 0
+// NYI:   store [4 x <1 x i64>] {{.*}}.coerce, ptr [[COERCE_DIVE]], align 8
+// NYI:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 {{.*}}, i64 32, i1 false)
+// NYI:   [[VAL:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0
+// NYI:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
+// NYI:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
+// NYI:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1
+// NYI:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8
+// NYI:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
+// NYI:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2
+// NYI:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8
+// NYI:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
+// NYI:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
+// NYI:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3
+// NYI:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8
+// NYI:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
+// NYI:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
+// NYI:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
+// NYI:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
+// NYI:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
+// NYI:   call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a)
+// NYI:   ret void
+// void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) {
+//   vst1_p64_x4(a, b);
+// }
+
+// NYI-LABEL: @test_vceqd_s64(
+// NYI:   [[TMP0:%.*]] = icmp eq i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vceqd_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vceqd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vceqd_u64(
+// NYI:   [[TMP0:%.*]] = icmp eq i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vceqd_u64(uint64_t a, uint64_t b) {
+//   return (int64_t)vceqd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vceqzd_s64(
+// NYI:   [[TMP0:%.*]] = icmp eq i64 %a, 0
+// NYI:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQZ_I]]
+// uint64_t test_vceqzd_s64(int64_t a) {
+//   return (uint64_t)vceqzd_s64(a);
+// }
+
+// NYI-LABEL: @test_vceqzd_u64(
+// NYI:   [[TMP0:%.*]] = icmp eq i64 %a, 0
+// NYI:   [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQZD_I]]
+// int64_t test_vceqzd_u64(int64_t a) {
+//   return (int64_t)vceqzd_u64(a);
+// }
+
+// NYI-LABEL: @test_vcged_s64(
+// NYI:   [[TMP0:%.*]] = icmp sge i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcged_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vcged_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcged_u64(
+// NYI:   [[TMP0:%.*]] = icmp uge i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcged_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vcged_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgezd_s64(
+// NYI:   [[TMP0:%.*]] = icmp sge i64 %a, 0
+// NYI:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCGEZ_I]]
+// uint64_t test_vcgezd_s64(int64_t a) {
+//   return (uint64_t)vcgezd_s64(a);
+// }
+
+// NYI-LABEL: @test_vcgtd_s64(
+// NYI:   [[TMP0:%.*]] = icmp sgt i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcgtd_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vcgtd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgtd_u64(
+// NYI:   [[TMP0:%.*]] = icmp ugt i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vcgtd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgtzd_s64(
+// NYI:   [[TMP0:%.*]] = icmp sgt i64 %a, 0
+// NYI:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCGTZ_I]]
+// uint64_t test_vcgtzd_s64(int64_t a) {
+//   return (uint64_t)vcgtzd_s64(a);
+// }
+
+// NYI-LABEL: @test_vcled_s64(
+// NYI:   [[TMP0:%.*]] = icmp sle i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcled_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vcled_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcled_u64(
+// NYI:   [[TMP0:%.*]] = icmp ule i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcled_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vcled_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vclezd_s64(
+// NYI:   [[TMP0:%.*]] = icmp sle i64 %a, 0
+// NYI:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCLEZ_I]]
+// uint64_t test_vclezd_s64(int64_t a) {
+//   return (uint64_t)vclezd_s64(a);
+// }
+
+// NYI-LABEL: @test_vcltd_s64(
+// NYI:   [[TMP0:%.*]] = icmp slt i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcltd_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vcltd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vcltd_u64(
+// NYI:   [[TMP0:%.*]] = icmp ult i64 %a, %b
+// NYI:   [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQD_I]]
+// uint64_t test_vcltd_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vcltd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vcltzd_s64(
+// NYI:   [[TMP0:%.*]] = icmp slt i64 %a, 0
+// NYI:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCLTZ_I]]
+// uint64_t test_vcltzd_s64(int64_t a) {
+//   return (uint64_t)vcltzd_s64(a);
+// }
+
+// NYI-LABEL: @test_vtstd_s64(
+// NYI:   [[TMP0:%.*]] = and i64 %a, %b
+// NYI:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
+// NYI:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
+// NYI:   ret i64 [[VTSTD_I]]
+// uint64_t test_vtstd_s64(int64_t a, int64_t b) {
+//   return (uint64_t)vtstd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vtstd_u64(
+// NYI:   [[TMP0:%.*]] = and i64 %a, %b
+// NYI:   [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0
+// NYI:   [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64
+// NYI:   ret i64 [[VTSTD_I]]
+// uint64_t test_vtstd_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vtstd_u64(a, b);
+// }
+
+int64_t test_vabsd_s64(int64_t a) {
+  return (int64_t)vabsd_s64(a);
+
+  // CIR-LABEL: vabsd_s64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.abs" {{%.*}} : (!s64i) -> !s64i
+
+  // LLVM-LABEL: @test_vabsd_s64
+  // LLVM-SAME: (i64 [[a:%.*]])
+  // LLVM:   [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 {{.*}})
+  // LLVM:   ret i64 [[VABSD_S64_I]]
+}
+
+// NYI-LABEL: @test_vqabsb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqabsb_s8(int8_t a) {
+//   return (int8_t)vqabsb_s8(a);
+// }
+
+// NYI-LABEL: @test_vqabsh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqabsh_s16(int16_t a) {
+//   return (int16_t)vqabsh_s16(a);
+// }
+
+// NYI-LABEL: @test_vqabss_s32(
+// NYI:   [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
+// NYI:   ret i32 [[VQABSS_S32_I]]
+// int32_t test_vqabss_s32(int32_t a) {
+//   return (int32_t)vqabss_s32(a);
+// }
+
+// NYI-LABEL: @test_vqabsd_s64(
+// NYI:   [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a)
+// NYI:   ret i64 [[VQABSD_S64_I]]
+// int64_t test_vqabsd_s64(int64_t a) {
+//   return (int64_t)vqabsd_s64(a);
+// }
+
+// NYI-LABEL: @test_vnegd_s64(
+// NYI:   [[VNEGD_I:%.*]] = sub i64 0, %a
+// NYI:   ret i64 [[VNEGD_I]]
+// int64_t test_vnegd_s64(int64_t a) {
+//   return (int64_t)vnegd_s64(a);
+// }
+
+// NYI-LABEL: @test_vqnegb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqnegb_s8(int8_t a) {
+//   return (int8_t)vqnegb_s8(a);
+// }
+
+// NYI-LABEL: @test_vqnegh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqnegh_s16(int16_t a) {
+//   return (int16_t)vqnegh_s16(a);
+// }
+
+// NYI-LABEL: @test_vqnegs_s32(
+// NYI:   [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a)
+// NYI:   ret i32 [[VQNEGS_S32_I]]
+// int32_t test_vqnegs_s32(int32_t a) {
+//   return (int32_t)vqnegs_s32(a);
+// }
+
+// NYI-LABEL: @test_vqnegd_s64(
+// NYI:   [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a)
+// NYI:   ret i64 [[VQNEGD_S64_I]]
+// int64_t test_vqnegd_s64(int64_t a) {
+//   return (int64_t)vqnegd_s64(a);
+// }
+
+// NYI-LABEL: @test_vuqaddb_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// int8_t test_vuqaddb_s8(int8_t a, uint8_t b) {
+//   return (int8_t)vuqaddb_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// int16_t test_vuqaddh_s16(int16_t a, uint16_t b) {
+//   return (int16_t)vuqaddh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadds_s32(
+// NYI:   [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VUQADDS_S32_I]]
+// int32_t test_vuqadds_s32(int32_t a, uint32_t b) {
+//   return (int32_t)vuqadds_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddd_s64(
+// NYI:   [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VUQADDD_S64_I]]
+// int64_t test_vuqaddd_s64(int64_t a, uint64_t b) {
+//   return (int64_t)vuqaddd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddb_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0
+// NYI:   [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0
+// NYI:   ret i8 [[TMP2]]
+// uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) {
+//   return (uint8_t)vsqaddb_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0
+// NYI:   ret i16 [[TMP2]]
+// uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) {
+//   return (uint16_t)vsqaddh_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadds_u32(
+// NYI:   [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b)
+// NYI:   ret i32 [[VSQADDS_U32_I]]
+// uint32_t test_vsqadds_u32(uint32_t a, int32_t b) {
+//   return (uint32_t)vsqadds_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddd_u64(
+// NYI:   [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b)
+// NYI:   ret i64 [[VSQADDD_U64_I]]
+// uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) {
+//   return (uint64_t)vsqaddd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmlalh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
+// NYI:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
+// NYI:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]])
+// NYI:   ret i32 [[VQDMLXL1_I]]
+// int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) {
+//   return (int32_t)vqdmlalh_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlals_s32(
+// NYI:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
+// NYI:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]])
+// NYI:   ret i64 [[VQDMLXL1_I]]
+// int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) {
+//   return (int64_t)vqdmlals_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlslh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0
+// NYI:   [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0
+// NYI:   [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]])
+// NYI:   ret i32 [[VQDMLXL1_I]]
+// int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) {
+//   return (int32_t)vqdmlslh_s16(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmlsls_s32(
+// NYI:   [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c)
+// NYI:   [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]])
+// NYI:   ret i64 [[VQDMLXL1_I]]
+// int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) {
+//   return (int64_t)vqdmlsls_s32(a, b, c);
+// }
+
+// NYI-LABEL: @test_vqdmullh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0
+// NYI:   [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// NYI:   [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0
+// NYI:   ret i32 [[TMP2]]
+// int32_t test_vqdmullh_s16(int16_t a, int16_t b) {
+//   return (int32_t)vqdmullh_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vqdmulls_s32(
+// NYI:   [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b)
+// NYI:   ret i64 [[VQDMULLS_S32_I]]
+// int64_t test_vqdmulls_s32(int32_t a, int32_t b) {
+//   return (int64_t)vqdmulls_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vqmovunh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqmovunh_s16(int16_t a) {
+//   return (uint8_t)vqmovunh_s16(a);
+// }
+
+// NYI-LABEL: @test_vqmovuns_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqmovuns_s32(int32_t a) {
+//   return (uint16_t)vqmovuns_s32(a);
+// }
+
+// NYI-LABEL: @test_vqmovund_s64(
+// NYI:   [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a)
+// NYI:   ret i32 [[VQMOVUND_S64_I]]
+// uint32_t test_vqmovund_s64(int64_t a) {
+//   return (uint32_t)vqmovund_s64(a);
+// }
+
+// NYI-LABEL: @test_vqmovnh_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqmovnh_s16(int16_t a) {
+//   return (int8_t)vqmovnh_s16(a);
+// }
+
+int16_t test_vqmovns_s32(int32_t a) {
+  return (int16_t)vqmovns_s32(a);
+
+  // CIR-LABEL: vqmovns_s32
+  // CIR: [[A:%.*]] = cir.load{{.*}} : !cir.ptr<!s32i>, !s32i
+  // CIR: [[VQMOVNS_S32_ZERO1:%.*]] = cir.const #cir.int<0> : !u64i
+  // CIR: [[POISON:%.*]] = cir.const #cir.poison : !s32i
+  // CIR: [[POISON_VEC:%.*]] = cir.vec.splat [[POISON]] : !s32i, !cir.vector<!s32i x 4>
+  // CIR: [[TMP0:%.*]] = cir.vec.insert {{.*}}, [[POISON_VEC]][[[VQMOVNS_S32_ZERO1]] : !u64i] : !cir.vector<!s32i x 4>
+  // CIR: [[VQMOVNS_S32_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.sqxtn" [[TMP0]] : (!cir.vector<!s32i x 4>) -> !cir.vector<!s16i x 4>
+  // CIR: [[VQMOVNS_S32_ZERO2:%.*]] = cir.const #cir.int<0> : !u64i
+  // CIR: [[TMP1:%.*]] = cir.vec.extract [[VQMOVNS_S32_I]][[[VQMOVNS_S32_ZERO2]] : !u64i] : !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_vqmovns_s32(i32{{.*}}[[a:%.*]])
+  // LLVM:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 {{.*}}, i64 0
+  // LLVM:   [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]])
+  // LLVM:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0
+  // LLVM:   ret i16 [[TMP1]]
+}
+
+// NYI-LABEL: @test_vqmovnd_s64(
+// NYI:   [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a)
+// NYI:   ret i32 [[VQMOVND_S64_I]]
+// int32_t test_vqmovnd_s64(int64_t a) {
+//   return (int32_t)vqmovnd_s64(a);
+// }
+
+// NYI-LABEL: @test_vqmovnh_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqmovnh_u16(int16_t a) {
+//   return (int8_t)vqmovnh_u16(a);
+// }
+
+// NYI-LABEL: @test_vqmovns_u32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]])
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqmovns_u32(int32_t a) {
+//   return (int16_t)vqmovns_u32(a);
+// }
+
+// NYI-LABEL: @test_vqmovnd_u64(
+// NYI:   [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a)
+// NYI:   ret i32 [[VQMOVND_U64_I]]
+// int32_t test_vqmovnd_u64(int64_t a) {
+//   return (int32_t)vqmovnd_u64(a);
+// }
+
+// NYI-LABEL: @test_vceqs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp oeq float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vceqs_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vceqs_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vceqd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp oeq double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vceqd_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vceqd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vceqzs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00
+// NYI:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCEQZ_I]]
+// uint32_t test_vceqzs_f32(float32_t a) {
+//   return (uint32_t)vceqzs_f32(a);
+// }
+
+// NYI-LABEL: @test_vceqzd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00
+// NYI:   [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCEQZ_I]]
+// uint64_t test_vceqzd_f64(float64_t a) {
+//   return (uint64_t)vceqzd_f64(a);
+// }
+
+// NYI-LABEL: @test_vcges_f32(
+// NYI:   [[TMP0:%.*]] = fcmp oge float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vcges_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcges_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcged_f64(
+// NYI:   [[TMP0:%.*]] = fcmp oge double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vcged_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcged_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgezs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00
+// NYI:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCGEZ_I]]
+// uint32_t test_vcgezs_f32(float32_t a) {
+//   return (uint32_t)vcgezs_f32(a);
+// }
+
+// NYI-LABEL: @test_vcgezd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00
+// NYI:   [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCGEZ_I]]
+// uint64_t test_vcgezd_f64(float64_t a) {
+//   return (uint64_t)vcgezd_f64(a);
+// }
+
+// NYI-LABEL: @test_vcgts_f32(
+// NYI:   [[TMP0:%.*]] = fcmp ogt float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vcgts_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcgts_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcgtd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp ogt double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vcgtd_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcgtd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcgtzs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00
+// NYI:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCGTZ_I]]
+// uint32_t test_vcgtzs_f32(float32_t a) {
+//   return (uint32_t)vcgtzs_f32(a);
+// }
+
+// NYI-LABEL: @test_vcgtzd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00
+// NYI:   [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCGTZ_I]]
+// uint64_t test_vcgtzd_f64(float64_t a) {
+//   return (uint64_t)vcgtzd_f64(a);
+// }
+
+// NYI-LABEL: @test_vcles_f32(
+// NYI:   [[TMP0:%.*]] = fcmp ole float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vcles_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vcles_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcled_f64(
+// NYI:   [[TMP0:%.*]] = fcmp ole double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vcled_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcled_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vclezs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00
+// NYI:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCLEZ_I]]
+// uint32_t test_vclezs_f32(float32_t a) {
+//   return (uint32_t)vclezs_f32(a);
+// }
+
+// NYI-LABEL: @test_vclezd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00
+// NYI:   [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCLEZ_I]]
+// uint64_t test_vclezd_f64(float64_t a) {
+//   return (uint64_t)vclezd_f64(a);
+// }
+
+// NYI-LABEL: @test_vclts_f32(
+// NYI:   [[TMP0:%.*]] = fcmp olt float %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCMPD_I]]
+// uint32_t test_vclts_f32(float32_t a, float32_t b) {
+//   return (uint32_t)vclts_f32(a, b);
+// }
+
+// NYI-LABEL: @test_vcltd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp olt double %a, %b
+// NYI:   [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCMPD_I]]
+// uint64_t test_vcltd_f64(float64_t a, float64_t b) {
+//   return (uint64_t)vcltd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vcltzs_f32(
+// NYI:   [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00
+// NYI:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32
+// NYI:   ret i32 [[VCLTZ_I]]
+// uint32_t test_vcltzs_f32(float32_t a) {
+//   return (uint32_t)vcltzs_f32(a);
+// }
+
+// NYI-LABEL: @test_vcltzd_f64(
+// NYI:   [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00
+// NYI:   [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64
+// NYI:   ret i64 [[VCLTZ_I]]
+// uint64_t test_vcltzd_f64(float64_t a) {
+//   return (uint64_t)vcltzd_f64(a);
+// }
+
+uint32_t test_vcages_f32(float32_t a, float32_t b) {
+  return (uint32_t)vcages_f32(a, b);
+
+  // CIR-LABEL: vcages_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.facge" {{.*}}, {{.*}} : (!cir.float, !cir.float) -> !u32i
+
+  // LLVM-LABEL: @test_vcages_f32(
+  // LLVM:   [[VCAGED_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %0, float %1)
+  // LLVM:   ret i32 [[VCAGED_F32_I]]
+}
+
+uint64_t test_vcaged_f64(float64_t a, float64_t b) {
+  return (uint64_t)vcaged_f64(a, b);
+
+  // CIR-LABEL: vcaged_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.facge" {{.*}}, {{.*}} : (!cir.double, !cir.double) -> !u64i
+
+  // LLVM-LABEL: @test_vcaged_f64(
+  // LLVM:   [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %0, double %1)
+  // LLVM:   ret i64 [[VCAGED_F64_I]]
+}
+
+uint32_t test_vcagts_f32(float32_t a, float32_t b) {
+  return (uint32_t)vcagts_f32(a, b);
+
+  // CIR-LABEL: vcagts_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.facgt" {{.*}}, {{.*}} : (!cir.float, !cir.float) -> !u32i
+
+  // LLVM-LABEL: @test_vcagts_f32(
+  // LLVM:   [[VCAGED_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %0, float %1)
+  // LLVM:   ret i32 [[VCAGED_F32_I]]
+}
+
+uint64_t test_vcagtd_f64(float64_t a, float64_t b) {
+  return (uint64_t)vcagtd_f64(a, b);
+
+  // CIR-LABEL: vcagtd_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.facgt" {{.*}}, {{.*}} : (!cir.double, !cir.double) -> !u64i
+
+  // LLVM-LABEL: @test_vcagtd_f64(
+  // LLVM:   [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %0, double %1)
+  // LLVM:   ret i64 [[VCAGED_F64_I]]
+}
+
+uint32_t test_vcales_f32(float32_t a, float32_t b) {
+  return (uint32_t)vcales_f32(a, b);
+
+  // CIR-LABEL: vcales_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.facge" {{.*}}, {{.*}} : (!cir.float, !cir.float) -> !u32i
+
+  // LLVM-LABEL: @test_vcales_f32(
+  // LLVM:   [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %0, float %1)
+  // LLVM:   ret i32 [[VCALES_F32_I]]
+}
+
+uint64_t test_vcaled_f64(float64_t a, float64_t b) {
+  return (uint64_t)vcaled_f64(a, b);
+
+  // CIR-LABEL: vcaled_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.facge" {{.*}}, {{.*}} : (!cir.double, !cir.double) -> !u64i
+
+  // LLVM-LABEL: @test_vcaled_f64(
+  // LLVM:   [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %0, double %1)
+  // LLVM:   ret i64 [[VCALED_F64_I]]
+}
+
+// NYI-LABEL: @test_vcalts_f32(
+// NYI:   [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a)
+// NYI:   ret i32 [[VCALTS_F32_I]]
+uint32_t test_vcalts_f32(float32_t a, float32_t b) {
+  return (uint32_t)vcalts_f32(a, b);
+
+  // CIR-LABEL: vcalts_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.facgt" {{.*}}, {{.*}} : (!cir.float, !cir.float) -> !u32i
+
+  // LLVM-LABEL: @test_vcalts_f32(
+  // LLVM:   [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %0, float %1)
+  // LLVM:   ret i32 [[VCALTS_F32_I]]
+}
+
+uint64_t test_vcaltd_f64(float64_t a, float64_t b) {
+  return (uint64_t)vcaltd_f64(a, b);
+
+  // CIR-LABEL: vcaltd_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.facgt" {{.*}}, {{.*}} : (!cir.double, !cir.double) -> !u64i
+
+  // LLVM-LABEL: @test_vcaltd_f64(
+  // LLVM:   [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %0, double %1)
+  // LLVM:   ret i64 [[VCALTD_F64_I]]
+}
+
+int64_t test_vshrd_n_s64(int64_t a) {
+  return (int64_t)vshrd_n_s64(a, 1);
+
+  // CIR-LABEL: vshrd_n_s64
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !s64i, {{%.*}} : !s64i) -> !s64i
+
+  // LLVM-LABEL: @test_vshrd_n_s64(
+  // LLVM:   [[SHRD_N:%.*]] = ashr i64 %0, 1
+  // LLVM:   ret i64 [[SHRD_N]]
+}
+
+uint64_t test_vshrd_n_u64(uint64_t a) {
+   return (uint64_t)vshrd_n_u64(a, 64);
+
+  // CIR-LABEL: vshrd_n_u64
+  // CIR: {{.*}} = cir.const #cir.int<0> : !u64i
+  // CIR: cir.return {{.*}} : !u64i
+
+  // LLVM-LABEL: @test_vshrd_n_u64(
+  // LLVM:   ret i64 0
+}
+
+uint64_t test_vshrd_n_u64_2() {
+  uint64_t a = UINT64_C(0xf000000000000000);
+  return vshrd_n_u64(a, 64);
+
+  // CIR-LABEL: vshrd_n_u64
+  // CIR: {{.*}} = cir.const #cir.int<0> : !u64i
+  // CIR: cir.return {{.*}} : !u64i
+
+  // LLVM-LABEL: @test_vshrd_n_u64_2(
+  // LLVM:   ret i64 0
+
+}
+
+uint64_t test_vshrd_n_u64_3(uint64_t a) {
+  return vshrd_n_u64(a, 1);
+
+  // CIR-LABEL: vshrd_n_u64
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !u64i, {{%.*}} : !u64i) -> !u64i
+
+  // LLVM-LABEL: @test_vshrd_n_u64_3(
+  // LLVM:   [[SHRD_N:%.*]] = lshr i64 %0, 1
+  // LLVM:   ret i64 [[SHRD_N]]
+}
+
+int64_t test_vrshrd_n_s64(int64_t a) {
+  return (int64_t)vrshrd_n_s64(a, 63);
+
+  // CIR-LABEL: vrshrd_n_s64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{.*}}, {{.*}} : (!s64i, !s64i) -> !s64i
+
+  // LLVM-LABEL: @test_vrshrd_n_s64(
+  // LLVM:  [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %0, i64 -63)
+  // LLVM:  ret i64 {{.*}}
+}
+
+// NYI-LABEL: @test_vrshr_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> {{.*}}, <1 x i64> <i64 -1>)
+// NYI:   ret <1 x i64> [[VRSHR_N1]]
+// int64x1_t test_vrshr_n_s64(int64x1_t a) {
+//   return vrshr_n_s64(a, 1);
+// }
+
+uint64_t test_vrshrd_n_u64(uint64_t a) {
+  return (uint64_t)vrshrd_n_u64(a, 63);
+
+  // CIR-LABEL: vrshrd_n_u64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{.*}}, {{.*}} : (!u64i, !s64i) -> !u64i
+
+  // LLVM-LABEL: @test_vrshrd_n_u64(
+  // LLVM:   [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %0, i64 -63)
+  // LLVM:   ret i64 {{.*}}
+}
+
+// NYI-LABEL: @test_vrshr_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> {{.*}}, <1 x i64> <i64 -1>)
+// NYI:   ret <1 x i64> [[VRSHR_N1]]
+// uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
+//   return vrshr_n_u64(a, 1);
+// }
+
+
+int64_t test_vsrad_n_s64(int64_t a, int64_t b) {
+  return (int64_t)vsrad_n_s64(a, b, 63);
+
+  // CIR-LABEL: vsrad_n_s64
+  // CIR: [[ASHR:%.*]] = cir.shift(right, {{%.*}} : !s64i, {{%.*}} : !s64i) -> !s64i
+  // CIR: {{.*}} = cir.binop(add, {{.*}}, [[ASHR]]) : !s64i
+
+  // LLVM-LABEL: test_vsrad_n_s64(
+  // LLVM: [[SHRD_N:%.*]] = ashr i64 %1, 63
+  // LLVM: [[TMP0:%.*]] = add i64 %0, [[SHRD_N]]
+  // LLVM: ret i64 [[TMP0]]
+}
+
+int64_t test_vsrad_n_s64_2(int64_t a, int64_t b) {
+  return (int64_t)vsrad_n_s64(a, b, 64);
+
+  // CIR-LABEL: vsrad_n_s64
+  // CIR: [[ASHR:%.*]] = cir.shift(right, {{%.*}} : !s64i, {{%.*}} : !s64i) -> !s64i
+  // CIR: {{.*}} = cir.binop(add, {{.*}}, [[ASHR]]) : !s64i
+
+  // LLVM-LABEL: test_vsrad_n_s64_2(
+  // LLVM: [[SHRD_N:%.*]] = ashr i64 %1, 63
+  // LLVM: [[TMP0:%.*]] = add i64 %0, [[SHRD_N]]
+  // LLVM: ret i64 [[TMP0]]
+}
+
+int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
+  return vsra_n_s64(a, b, 1);
+
+  // CIR-LABEL: vsra_n_s64
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!s64i x 1>
+
+  // LLVM-LABEL: test_vsra_n_s64
+  // LLVM: [[VSRA_N:%.*]] = ashr <1 x i64> {{.*}}, splat (i64 1)
+  // LLVM: [[TMP4:%.*]] = add <1 x i64> {{.*}}, [[VSRA_N]]
+}
+
+uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) {
+  return (uint64_t)vsrad_n_u64(a, b, 63);
+
+  // CIR-LABEL:test_vsrad_n_u64
+  // CIR: [[SHL:%.*]] = cir.shift(left, {{%.*}} : !u64i, {{%.*}} : !u64i) -> !u64i
+  // CIR: {{.*}} = cir.binop(add, {{.*}}, [[SHL]]) : !u64i
+
+  // LLVM-LABEL: test_vsrad_n_u64(
+  // LLVM: [[SHRD_N:%.*]] = shl i64 %1, 63
+  // LLVM: [[TMP0:%.*]] = add i64 %0, [[SHRD_N]]
+  // LLVM: ret i64 [[TMP0]]
+}
+
+uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) {
+  return (uint64_t)vsrad_n_u64(a, b, 64);
+
+  // CIR-LABEL:test_vsrad_n_u64
+  // CIR: cir.return {{.*}} : !u64i
+
+  // LLVM-LABEL: test_vsrad_n_u64_2(
+  // LLVM: ret i64 %0
+}
+
+uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
+  return vsra_n_u64(a, b, 1);
+
+  // CIR-LABEL: vsra_n_u64
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VSRA_N:%.*]] = cir.shift(right, {{%.*}}, [[splat]] : !cir.vector<!u64i x 1>) -> !cir.vector<!u64i x 1>
+  // CIR: cir.binop(add, {{%.*}}, [[VSRA_N]]) : !cir.vector<!u64i x 1>
+
+  // LLVM-LABEL: test_vsra_n_u64
+  // LLVM:   [[VSRA_N:%.*]] = lshr <1 x i64> {{.*}}, splat (i64 1)
+  // LLVM:   [[TMP4:%.*]] = add <1 x i64> {{.*}}, [[VSRA_N]]
+}
+
+int64_t test_vrsrad_n_s64(int64_t a, int64_t b) {
+  return (int64_t)vrsrad_n_s64(a, b, 63);
+
+  // CIR-LABEL: vrsrad_n_s64
+  // CIR: [[TMP0:%.*]] = cir.const #cir.int<63> : !s32i
+  // CIR: [[TMP1:%.*]] = cir.unary(minus, [[TMP0]]) : !s32i, !s32i
+  // CIR: [[TMP2:%.*]] = cir.cast integral [[TMP1]] : !s32i -> !s64i
+  // CIR: [[TMP3:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{.*}}, [[TMP2]] : (!s64i, !s64i) -> !s64i
+  // CIR: [[TMP4:%.*]] = cir.binop(add, {{.*}}, [[TMP3]]) : !s64i
+
+  // LLVM-LABEL: @test_vrsrad_n_s64(
+  // LLVM: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %1, i64 -63)
+  // LLVM: [[TMP1:%.*]] = add i64 %0, [[TMP0]]
+  // LLVM: ret i64 [[TMP1]]
+}
+
+int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
+  return vrsra_n_s64(a, b, 1);
+
+  // CIR-LABEL: vrsra_n_s64
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s64i x 1>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.srshl" {{%.*}}, [[splat]] : (!cir.vector<!s64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!s64i x 1>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!s64i x 1>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!s64i x 1>
+
+  // LLVM-LABEL: test_vrsra_n_s64
+  // LLVM:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> {{.*}}, <1 x i64> splat (i64 -1))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> {{.*}} to <1 x i64>
+  // LLVM:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
+}
+
+uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
+  return (uint64_t)vrsrad_n_u64(a, b, 63);
+
+  // CIR-LABEL:vrsrad_n_u64
+  // CIR: [[TMP0:%.*]] = cir.const #cir.int<63> : !s32i
+  // CIR: [[TMP1:%.*]] = cir.unary(minus, [[TMP0]]) : !s32i, !s32i
+  // CIR: [[TMP2:%.*]] = cir.cast integral [[TMP1]] : !s32i -> !u64i
+  // CIR: [[TMP3:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{.*}}, [[TMP2]] : (!u64i, !u64i) -> !u64i
+  // CIR: [[TMP4:%.*]] = cir.binop(add, {{.*}}, [[TMP3]]) : !u64i
+
+  // LLVM-LABEL: @test_vrsrad_n_u64(
+  // LLVM: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %1, i64 -63)
+  // LLVM: [[TMP1:%.*]] = add i64 %0, [[TMP0]]
+  // LLVM: ret i64 [[TMP1]]
+}
+
+uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
+  return vrsra_n_u64(a, b, 1);
+
+  // CIR-LABEL: vrsra_n_u64
+  // CIR: [[VRSHR_N:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1>
+  // CIR: [[splat:%.*]] = cir.const #cir.const_vector
+  // CIR: [[VRSHR_N1:%.*]] = cir.llvm.intrinsic "aarch64.neon.urshl" {{.*}}, [[splat]] : (!cir.vector<!u64i x 1>, !cir.vector<!s64i x 1>) -> !cir.vector<!u64i x 1>
+  // CIR: [[TMP2:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 8> -> !cir.vector<!u64i x 1>
+  // CIR: cir.binop(add, [[TMP2]], [[VRSHR_N1]]) : !cir.vector<!u64i x 1>
+
+  // LLVM-LABEL: test_vrsra_n_u64
+  // LLVM:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> {{.*}}, <1 x i64> splat (i64 -1))
+  // LLVM:   [[TMP2:%.*]] = bitcast <8 x i8> {{.*}} to <1 x i64>
+  // LLVM:   [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]]
+}
+
+int64_t test_vshld_n_s64(int64_t a) {
+  return (int64_t)vshld_n_s64(a, 1);
+
+  // CIR-LABEL: vshld_n_s64
+  // CIR: {{%.*}} = cir.shift(left, {{%.*}} : !s64i, {{%.*}} : !s64i) -> !s64i
+
+  // LLVM-LABEL: @test_vshld_n_s64(
+  // LLVM:   [[SHLD_N:%.*]] = shl i64 %0, 1
+  // LLVM:   ret i64 [[SHLD_N]]
+}
+
+// NYI-LABEL: @test_vshl_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
+// NYI:   ret <1 x i64> [[VSHL_N]]
+// int64x1_t test_vshl_n_s64(int64x1_t a) {
+//   return vshl_n_s64(a, 1);
+// }
+
+uint64_t test_vshld_n_u64(uint64_t a) {
+  return (uint64_t)vshld_n_u64(a, 63);
+
+  // CIR-LABEL: vshld_n_u64
+  // CIR: {{%.*}} = cir.shift(left, {{%.*}} : !u64i, {{%.*}} : !u64i) -> !u64i
+
+  // LLVM-LABEL: @test_vshld_n_u64(
+  // LLVM:   [[SHLD_N:%.*]] = shl i64 %0, 63
+  // LLVM:   ret i64 [[SHLD_N]]
+}
+
+// NYI-LABEL: @test_vshl_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
+// NYI:   ret <1 x i64> [[VSHL_N]]
+// uint64x1_t test_vshl_n_u64(uint64x1_t a) {
+//   return vshl_n_u64(a, 1);
+// }
+
+// NYI-LABEL: @test_vqshlb_n_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqshlb_n_s8(int8_t a) {
+//   return (int8_t)vqshlb_n_s8(a, 7);
+// }
+
+// NYI-LABEL: @test_vqshlh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqshlh_n_s16(int16_t a) {
+//   return (int16_t)vqshlh_n_s16(a, 15);
+// }
+
+// NYI-LABEL: @test_vqshls_n_s32(
+// NYI:   [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31)
+// NYI:   ret i32 [[VQSHLS_N_S32]]
+// int32_t test_vqshls_n_s32(int32_t a) {
+//   return (int32_t)vqshls_n_s32(a, 31);
+// }
+
+int64_t test_vqshld_n_s64(int64_t a) {
+ return (int64_t)vqshld_n_s64(a, 63);
+
+ // CIR-LABEL: vqshld_n_s64
+ // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.sqshl" {{.*}}, {{.*}} : (!s64i, !s64i) -> !s64i
+
+ // LLVM-LABEL: @test_vqshld_n_s64(
+ // LLVM: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %0, i64 63)
+ // LLVM: ret i64 [[VQSHL_N]]
+}
+
+// NYI-LABEL: @test_vqshl_n_s8(
+// NYI:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
+// NYI:   ret <8 x i8> [[VQSHL_N]]
+// int8x8_t test_vqshl_n_s8(int8x8_t a) {
+//   return vqshl_n_s8(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_s8(
+// NYI:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
+// NYI:   ret <16 x i8> [[VQSHL_N]]
+// int8x16_t test_vqshlq_n_s8(int8x16_t a) {
+//   return vqshlq_n_s8(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
+// NYI:   ret <4 x i16> [[VQSHL_N1]]
+// int16x4_t test_vqshl_n_s16(int16x4_t a) {
+//   return vqshl_n_s16(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
+// NYI:   ret <8 x i16> [[VQSHL_N1]]
+// int16x8_t test_vqshlq_n_s16(int16x8_t a) {
+//   return vqshlq_n_s16(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
+// NYI:   ret <2 x i32> [[VQSHL_N1]]
+// int32x2_t test_vqshl_n_s32(int32x2_t a) {
+//   return vqshl_n_s32(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
+// NYI:   ret <4 x i32> [[VQSHL_N1]]
+// int32x4_t test_vqshlq_n_s32(int32x4_t a) {
+//   return vqshlq_n_s32(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
+// NYI:   ret <2 x i64> [[VQSHL_N1]]
+// int64x2_t test_vqshlq_n_s64(int64x2_t a) {
+//   return vqshlq_n_s64(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_u8(
+// NYI:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer)
+// NYI:   ret <8 x i8> [[VQSHL_N]]
+// uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
+//   return vqshl_n_u8(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_u8(
+// NYI:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer)
+// NYI:   ret <16 x i8> [[VQSHL_N]]
+// uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
+//   return vqshlq_n_u8(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// NYI:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer)
+// NYI:   ret <4 x i16> [[VQSHL_N1]]
+// uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
+//   return vqshl_n_u16(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// NYI:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer)
+// NYI:   ret <8 x i16> [[VQSHL_N1]]
+// uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
+//   return vqshlq_n_u16(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// NYI:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer)
+// NYI:   ret <2 x i32> [[VQSHL_N1]]
+// uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
+//   return vqshl_n_u32(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// NYI:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer)
+// NYI:   ret <4 x i32> [[VQSHL_N1]]
+// uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
+//   return vqshlq_n_u32(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshlq_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
+// NYI:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer)
+// NYI:   ret <2 x i64> [[VQSHL_N1]]
+// uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
+//   return vqshlq_n_u64(a, 0);
+// }
+
+// NYI-LABEL: @test_vqshl_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
+// NYI:   ret <1 x i64> [[VQSHL_N1]]
+// int64x1_t test_vqshl_n_s64(int64x1_t a) {
+//   return vqshl_n_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vqshlb_n_u8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqshlb_n_u8(uint8_t a) {
+//   return (uint8_t)vqshlb_n_u8(a, 7);
+// }
+
+// NYI-LABEL: @test_vqshlh_n_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqshlh_n_u16(uint16_t a) {
+//   return (uint16_t)vqshlh_n_u16(a, 15);
+// }
+
+// NYI-LABEL: @test_vqshls_n_u32(
+// NYI:   [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31)
+// NYI:   ret i32 [[VQSHLS_N_U32]]
+// uint32_t test_vqshls_n_u32(uint32_t a) {
+//   return (uint32_t)vqshls_n_u32(a, 31);
+// }
+
+uint64_t test_vqshld_n_u64(uint64_t a) {
+ return (uint64_t)vqshld_n_u64(a, 63);
+
+ // CIR-LABEL: vqshld_n_u64
+ // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.uqshl" {{.*}}, {{.*}} : (!u64i, !u64i) -> !u64i
+
+ // LLVM-LABEL: @test_vqshld_n_u64(
+ // LLVM: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %0, i64 63)
+ // LLVM: ret i64 [[VQSHL_N]]
+}
+
+// NYI-LABEL: @test_vqshl_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
+// NYI:   ret <1 x i64> [[VQSHL_N1]]
+// uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
+//   return vqshl_n_u64(a, 1);
+// }
+
+// NYI-LABEL: @test_vqshlub_n_s8(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0
+// NYI:   [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqshlub_n_s8(int8_t a) {
+//   return (int8_t)vqshlub_n_s8(a, 7);
+// }
+
+// NYI-LABEL: @test_vqshluh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqshluh_n_s16(int16_t a) {
+//   return (int16_t)vqshluh_n_s16(a, 15);
+// }
+
+// NYI-LABEL: @test_vqshlus_n_s32(
+// NYI:   [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31)
+// NYI:   ret i32 [[VQSHLUS_N_S32]]
+// int32_t test_vqshlus_n_s32(int32_t a) {
+//   return (int32_t)vqshlus_n_s32(a, 31);
+// }
+
+int64_t test_vqshlud_n_s64(int64_t a) {
+  return (int64_t)vqshlud_n_s64(a, 63);
+
+  // CIR-LABEL: vqshlud_n_s64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.sqshlu" {{.*}}, {{.*}} : (!s64i, !s64i) -> !s64i
+
+  // LLVM-LABEL: @test_vqshlud_n_s64(
+  // LLVM: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %0, i64 63)
+  // LLVM: ret i64 [[VQSHLU_N]]
+}
+
+// NYI-LABEL: @test_vqshlu_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
+// NYI:   ret <1 x i64> [[VQSHLU_N1]]
+// uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
+//   return vqshlu_n_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vsrid_n_s64(
+// NYI:   [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
+// NYI:   [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
+// NYI:   [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63)
+// NYI:   [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64
+// NYI:   ret i64 [[VSRID_N_S643]]
+// int64_t test_vsrid_n_s64(int64_t a, int64_t b) {
+//   return (int64_t)vsrid_n_s64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsri_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
+// NYI:   ret <1 x i64> [[VSRI_N2]]
+// int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
+//   return vsri_n_s64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vsrid_n_u64(
+// NYI:   [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
+// NYI:   [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
+// NYI:   [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63)
+// NYI:   [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64
+// NYI:   ret i64 [[VSRID_N_U643]]
+// uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vsrid_n_u64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsri_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1)
+// NYI:   ret <1 x i64> [[VSRI_N2]]
+// uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
+//   return vsri_n_u64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vslid_n_s64(
+// NYI:   [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64>
+// NYI:   [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64>
+// NYI:   [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63)
+// NYI:   [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64
+// NYI:   ret i64 [[VSLID_N_S643]]
+// int64_t test_vslid_n_s64(int64_t a, int64_t b) {
+//   return (int64_t)vslid_n_s64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsli_n_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
+// NYI:   ret <1 x i64> [[VSLI_N2]]
+// int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
+//   return vsli_n_s64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vslid_n_u64(
+// NYI:   [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64>
+// NYI:   [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64>
+// NYI:   [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63)
+// NYI:   [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64
+// NYI:   ret i64 [[VSLID_N_U643]]
+// uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) {
+//   return (uint64_t)vslid_n_u64(a, b, 63);
+// }
+
+// NYI-LABEL: @test_vsli_n_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
+// NYI:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1)
+// NYI:   ret <1 x i64> [[VSLI_N2]]
+// uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
+//   return vsli_n_u64(a, b, 1);
+// }
+
+// NYI-LABEL: @test_vqshrnh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqshrnh_n_s16(int16_t a) {
+//   return (int8_t)vqshrnh_n_s16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqshrns_n_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqshrns_n_s32(int32_t a) {
+//   return (int16_t)vqshrns_n_s32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqshrnd_n_s64(
+// NYI:   [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQSHRND_N_S64]]
+// int32_t test_vqshrnd_n_s64(int64_t a) {
+//   return (int32_t)vqshrnd_n_s64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqshrnh_n_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqshrnh_n_u16(uint16_t a) {
+//   return (uint8_t)vqshrnh_n_u16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqshrns_n_u32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqshrns_n_u32(uint32_t a) {
+//   return (uint16_t)vqshrns_n_u32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqshrnd_n_u64(
+// NYI:   [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQSHRND_N_U64]]
+// uint32_t test_vqshrnd_n_u64(uint64_t a) {
+//   return (uint32_t)vqshrnd_n_u64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqrshrnh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqrshrnh_n_s16(int16_t a) {
+//   return (int8_t)vqrshrnh_n_s16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqrshrns_n_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqrshrns_n_s32(int32_t a) {
+//   return (int16_t)vqrshrns_n_s32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqrshrnd_n_s64(
+// NYI:   [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQRSHRND_N_S64]]
+// int32_t test_vqrshrnd_n_s64(int64_t a) {
+//   return (int32_t)vqrshrnd_n_s64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqrshrnh_n_u16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqrshrnh_n_u16(uint16_t a) {
+//   return (uint8_t)vqrshrnh_n_u16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqrshrns_n_u32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqrshrns_n_u32(uint32_t a) {
+//   return (uint16_t)vqrshrns_n_u32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqrshrnd_n_u64(
+// NYI:   [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQRSHRND_N_U64]]
+// uint32_t test_vqrshrnd_n_u64(uint64_t a) {
+//   return (uint32_t)vqrshrnd_n_u64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqshrunh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// int8_t test_vqshrunh_n_s16(int16_t a) {
+//   return (int8_t)vqshrunh_n_s16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqshruns_n_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// int16_t test_vqshruns_n_s32(int32_t a) {
+//   return (int16_t)vqshruns_n_s32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqshrund_n_s64(
+// NYI:   [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQSHRUND_N_S64]]
+// int32_t test_vqshrund_n_s64(int64_t a) {
+//   return (int32_t)vqshrund_n_s64(a, 32);
+// }
+
+// NYI-LABEL: @test_vqrshrunh_n_s16(
+// NYI:   [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0
+// NYI:   [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8)
+// NYI:   [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0
+// NYI:   ret i8 [[TMP1]]
+// uint8_t test_vqrshrunh_n_s16(int16_t a) {
+//   return (uint8_t)vqrshrunh_n_s16(a, 8);
+// }
+
+// NYI-LABEL: @test_vqrshruns_n_s32(
+// NYI:   [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0
+// NYI:   [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16)
+// NYI:   [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0
+// NYI:   ret i16 [[TMP1]]
+// uint16_t test_vqrshruns_n_s32(int32_t a) {
+//   return (uint16_t)vqrshruns_n_s32(a, 16);
+// }
+
+// NYI-LABEL: @test_vqrshrund_n_s64(
+// NYI:   [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32)
+// NYI:   ret i32 [[VQRSHRUND_N_S64]]
+// uint32_t test_vqrshrund_n_s64(int64_t a) {
+//   return (uint32_t)vqrshrund_n_s64(a, 32);
+// }
+
+// NYI-LABEL: @test_vcvts_n_f32_s32(
+// NYI:   [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1)
+// NYI:   ret float [[VCVTS_N_F32_S32]]
+// float32_t test_vcvts_n_f32_s32(int32_t a) {
+//   return vcvts_n_f32_s32(a, 1);
+// }
+
+// NYI-LABEL: @test_vcvtd_n_f64_s64(
+// NYI:   [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1)
+// NYI:   ret double [[VCVTD_N_F64_S64]]
+// float64_t test_vcvtd_n_f64_s64(int64_t a) {
+//   return vcvtd_n_f64_s64(a, 1);
+// }
+
+// NYI-LABEL: @test_vcvts_n_f32_u32(
+// NYI:   [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32)
+// NYI:   ret float [[VCVTS_N_F32_U32]]
+// float32_t test_vcvts_n_f32_u32(uint32_t a) {
+//   return vcvts_n_f32_u32(a, 32);
+// }
+
+// NYI-LABEL: @test_vcvtd_n_f64_u64(
+// NYI:   [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64)
+// NYI:   ret double [[VCVTD_N_F64_U64]]
+// float64_t test_vcvtd_n_f64_u64(uint64_t a) {
+//   return vcvtd_n_f64_u64(a, 64);
+// }
+
+// NYI-LABEL: @test_vcvts_n_s32_f32(
+// NYI:   [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1)
+// NYI:   ret i32 [[VCVTS_N_S32_F32]]
+// int32_t test_vcvts_n_s32_f32(float32_t a) {
+//   return (int32_t)vcvts_n_s32_f32(a, 1);
+// }
+
+// NYI-LABEL: @test_vcvtd_n_s64_f64(
+// NYI:   [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1)
+// NYI:   ret i64 [[VCVTD_N_S64_F64]]
+// int64_t test_vcvtd_n_s64_f64(float64_t a) {
+//   return (int64_t)vcvtd_n_s64_f64(a, 1);
+// }
+
+// NYI-LABEL: @test_vcvts_n_u32_f32(
+// NYI:   [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32)
+// NYI:   ret i32 [[VCVTS_N_U32_F32]]
+// uint32_t test_vcvts_n_u32_f32(float32_t a) {
+//   return (uint32_t)vcvts_n_u32_f32(a, 32);
+// }
+
+// NYI-LABEL: @test_vcvtd_n_u64_f64(
+// NYI:   [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64)
+// NYI:   ret i64 [[VCVTD_N_U64_F64]]
+// uint64_t test_vcvtd_n_u64_f64(float64_t a) {
+//   return (uint64_t)vcvtd_n_u64_f64(a, 64);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
+//   return vreinterpret_s8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
+//   return vreinterpret_s8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
+//   return vreinterpret_s8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_u8(
+// NYI:   ret <8 x i8> %a
+// int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
+//   return vreinterpret_s8_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
+//   return vreinterpret_s8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
+//   return vreinterpret_s8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
+//   return vreinterpret_s8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
+//   return vreinterpret_s8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
+//   return vreinterpret_s8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_f64(float64x1_t a) {
+//   return vreinterpret_s8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_p8(
+// NYI:   ret <8 x i8> %a
+// int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
+//   return vreinterpret_s8_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
+//   return vreinterpret_s8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) {
+//   return vreinterpret_s8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
+//   return vreinterpret_s16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
+//   return vreinterpret_s16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
+//   return vreinterpret_s16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
+//   return vreinterpret_s16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_u16(
+// NYI:   ret <4 x i16> %a
+// int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
+//   return vreinterpret_s16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
+//   return vreinterpret_s16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
+//   return vreinterpret_s16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
+//   return vreinterpret_s16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
+//   return vreinterpret_s16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_f64(float64x1_t a) {
+//   return vreinterpret_s16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
+//   return vreinterpret_s16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_p16(
+// NYI:   ret <4 x i16> %a
+// int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
+//   return vreinterpret_s16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) {
+//   return vreinterpret_s16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
+//   return vreinterpret_s32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
+//   return vreinterpret_s32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
+//   return vreinterpret_s32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
+//   return vreinterpret_s32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
+//   return vreinterpret_s32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_u32(
+// NYI:   ret <2 x i32> %a
+// int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
+//   return vreinterpret_s32_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
+//   return vreinterpret_s32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
+//   return vreinterpret_s32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
+//   return vreinterpret_s32_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_f64(float64x1_t a) {
+//   return vreinterpret_s32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
+//   return vreinterpret_s32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
+//   return vreinterpret_s32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) {
+//   return vreinterpret_s32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
+//   return vreinterpret_s64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
+//   return vreinterpret_s64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
+//   return vreinterpret_s64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
+//   return vreinterpret_s64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
+//   return vreinterpret_s64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
+//   return vreinterpret_s64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_u64(
+// NYI:   ret <1 x i64> %a
+// int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
+//   return vreinterpret_s64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
+//   return vreinterpret_s64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
+//   return vreinterpret_s64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_f64(float64x1_t a) {
+//   return vreinterpret_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
+//   return vreinterpret_s64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
+//   return vreinterpret_s64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_s64_p64(
+// NYI:   ret <1 x i64> %a
+// int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) {
+//   return vreinterpret_s64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_s8(
+// NYI:   ret <8 x i8> %a
+// uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
+//   return vreinterpret_u8_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
+//   return vreinterpret_u8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
+//   return vreinterpret_u8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
+//   return vreinterpret_u8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
+//   return vreinterpret_u8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
+//   return vreinterpret_u8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
+//   return vreinterpret_u8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
+//   return vreinterpret_u8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
+//   return vreinterpret_u8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) {
+//   return vreinterpret_u8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_p8(
+// NYI:   ret <8 x i8> %a
+// uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
+//   return vreinterpret_u8_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
+//   return vreinterpret_u8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) {
+//   return vreinterpret_u8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
+//   return vreinterpret_u16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_s16(
+// NYI:   ret <4 x i16> %a
+// uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
+//   return vreinterpret_u16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
+//   return vreinterpret_u16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
+//   return vreinterpret_u16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
+//   return vreinterpret_u16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
+//   return vreinterpret_u16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
+//   return vreinterpret_u16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
+//   return vreinterpret_u16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
+//   return vreinterpret_u16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) {
+//   return vreinterpret_u16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
+//   return vreinterpret_u16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_p16(
+// NYI:   ret <4 x i16> %a
+// uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
+//   return vreinterpret_u16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) {
+//   return vreinterpret_u16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
+//   return vreinterpret_u32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
+//   return vreinterpret_u32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_s32(
+// NYI:   ret <2 x i32> %a
+// uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
+//   return vreinterpret_u32_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
+//   return vreinterpret_u32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
+//   return vreinterpret_u32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
+//   return vreinterpret_u32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
+//   return vreinterpret_u32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
+//   return vreinterpret_u32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
+//   return vreinterpret_u32_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) {
+//   return vreinterpret_u32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
+//   return vreinterpret_u32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
+//   return vreinterpret_u32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
+// NYI:   ret <2 x i32> [[TMP0]]
+// uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) {
+//   return vreinterpret_u32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
+//   return vreinterpret_u64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
+//   return vreinterpret_u64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
+//   return vreinterpret_u64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_s64(
+// NYI:   ret <1 x i64> %a
+// uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
+//   return vreinterpret_u64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
+//   return vreinterpret_u64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
+//   return vreinterpret_u64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
+//   return vreinterpret_u64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
+//   return vreinterpret_u64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
+//   return vreinterpret_u64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) {
+//   return vreinterpret_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
+//   return vreinterpret_u64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
+//   return vreinterpret_u64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_u64_p64(
+// NYI:   ret <1 x i64> %a
+// uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) {
+//   return vreinterpret_u64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
+//   return vreinterpret_f16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
+//   return vreinterpret_f16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
+//   return vreinterpret_f16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
+//   return vreinterpret_f16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
+//   return vreinterpret_f16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
+//   return vreinterpret_f16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
+//   return vreinterpret_f16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
+//   return vreinterpret_f16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
+//   return vreinterpret_f16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_f64(float64x1_t a) {
+//   return vreinterpret_f16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
+//   return vreinterpret_f16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
+//   return vreinterpret_f16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
+// NYI:   ret <4 x half> [[TMP0]]
+// float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) {
+//   return vreinterpret_f16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
+//   return vreinterpret_f32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
+//   return vreinterpret_f32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
+//   return vreinterpret_f32_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
+//   return vreinterpret_f32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
+//   return vreinterpret_f32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
+//   return vreinterpret_f32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
+//   return vreinterpret_f32_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
+//   return vreinterpret_f32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
+//   return vreinterpret_f32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_f64(float64x1_t a) {
+//   return vreinterpret_f32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
+//   return vreinterpret_f32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
+//   return vreinterpret_f32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
+// NYI:   ret <2 x float> [[TMP0]]
+// float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) {
+//   return vreinterpret_f32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_s8(int8x8_t a) {
+//   return vreinterpret_f64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_s16(int16x4_t a) {
+//   return vreinterpret_f64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_s32(int32x2_t a) {
+//   return vreinterpret_f64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_s64(int64x1_t a) {
+//   return vreinterpret_f64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) {
+//   return vreinterpret_f64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) {
+//   return vreinterpret_f64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) {
+//   return vreinterpret_f64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) {
+//   return vreinterpret_f64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_f16(float16x4_t a) {
+//   return vreinterpret_f64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_f32(float32x2_t a) {
+//   return vreinterpret_f64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) {
+//   return vreinterpret_f64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) {
+//   return vreinterpret_f64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_f64_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[TMP0]]
+// float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) {
+//   return vreinterpret_f64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_s8(
+// NYI:   ret <8 x i8> %a
+// poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
+//   return vreinterpret_p8_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
+//   return vreinterpret_p8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
+//   return vreinterpret_p8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
+//   return vreinterpret_p8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_u8(
+// NYI:   ret <8 x i8> %a
+// poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
+//   return vreinterpret_p8_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
+//   return vreinterpret_p8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
+//   return vreinterpret_p8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
+//   return vreinterpret_p8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
+//   return vreinterpret_p8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
+//   return vreinterpret_p8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) {
+//   return vreinterpret_p8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
+//   return vreinterpret_p8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   ret <8 x i8> [[TMP0]]
+// poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) {
+//   return vreinterpret_p8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
+//   return vreinterpret_p16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_s16(
+// NYI:   ret <4 x i16> %a
+// poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
+//   return vreinterpret_p16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
+//   return vreinterpret_p16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
+//   return vreinterpret_p16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
+//   return vreinterpret_p16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_u16(
+// NYI:   ret <4 x i16> %a
+// poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
+//   return vreinterpret_p16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
+//   return vreinterpret_p16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
+//   return vreinterpret_p16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
+//   return vreinterpret_p16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
+//   return vreinterpret_p16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) {
+//   return vreinterpret_p16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
+//   return vreinterpret_p16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
+// NYI:   ret <4 x i16> [[TMP0]]
+// poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) {
+//   return vreinterpret_p16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) {
+//   return vreinterpret_p64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) {
+//   return vreinterpret_p64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) {
+//   return vreinterpret_p64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_s64(
+// NYI:   ret <1 x i64> %a
+// poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) {
+//   return vreinterpret_p64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) {
+//   return vreinterpret_p64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) {
+//   return vreinterpret_p64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) {
+//   return vreinterpret_p64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_u64(
+// NYI:   ret <1 x i64> %a
+// poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) {
+//   return vreinterpret_p64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) {
+//   return vreinterpret_p64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) {
+//   return vreinterpret_p64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) {
+//   return vreinterpret_p64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) {
+//   return vreinterpret_p64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpret_p64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
+// NYI:   ret <1 x i64> [[TMP0]]
+// poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) {
+//   return vreinterpret_p64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
+//   return vreinterpretq_s8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
+//   return vreinterpretq_s8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
+//   return vreinterpretq_s8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_u8(
+// NYI:   ret <16 x i8> %a
+// int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
+//   return vreinterpretq_s8_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
+//   return vreinterpretq_s8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
+//   return vreinterpretq_s8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
+//   return vreinterpretq_s8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
+//   return vreinterpretq_s8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
+//   return vreinterpretq_s8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) {
+//   return vreinterpretq_s8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_p8(
+// NYI:   ret <16 x i8> %a
+// int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
+//   return vreinterpretq_s8_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
+//   return vreinterpretq_s8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) {
+//   return vreinterpretq_s8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
+//   return vreinterpretq_s16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
+//   return vreinterpretq_s16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
+//   return vreinterpretq_s16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
+//   return vreinterpretq_s16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_u16(
+// NYI:   ret <8 x i16> %a
+// int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
+//   return vreinterpretq_s16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
+//   return vreinterpretq_s16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
+//   return vreinterpretq_s16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
+//   return vreinterpretq_s16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
+//   return vreinterpretq_s16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) {
+//   return vreinterpretq_s16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
+//   return vreinterpretq_s16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_p16(
+// NYI:   ret <8 x i16> %a
+// int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
+//   return vreinterpretq_s16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) {
+//   return vreinterpretq_s16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
+//   return vreinterpretq_s32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
+//   return vreinterpretq_s32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
+//   return vreinterpretq_s32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
+//   return vreinterpretq_s32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
+//   return vreinterpretq_s32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_u32(
+// NYI:   ret <4 x i32> %a
+// int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
+//   return vreinterpretq_s32_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
+//   return vreinterpretq_s32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
+//   return vreinterpretq_s32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
+//   return vreinterpretq_s32_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) {
+//   return vreinterpretq_s32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
+//   return vreinterpretq_s32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
+//   return vreinterpretq_s32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) {
+//   return vreinterpretq_s32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
+//   return vreinterpretq_s64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
+//   return vreinterpretq_s64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
+//   return vreinterpretq_s64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
+//   return vreinterpretq_s64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
+//   return vreinterpretq_s64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
+//   return vreinterpretq_s64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_u64(
+// NYI:   ret <2 x i64> %a
+// int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
+//   return vreinterpretq_s64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
+//   return vreinterpretq_s64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
+//   return vreinterpretq_s64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) {
+//   return vreinterpretq_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
+//   return vreinterpretq_s64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
+//   return vreinterpretq_s64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_s64_p64(
+// NYI:   ret <2 x i64> %a
+// int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) {
+//   return vreinterpretq_s64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_s8(
+// NYI:   ret <16 x i8> %a
+// uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
+//   return vreinterpretq_u8_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
+//   return vreinterpretq_u8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
+//   return vreinterpretq_u8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
+//   return vreinterpretq_u8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
+//   return vreinterpretq_u8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
+//   return vreinterpretq_u8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
+//   return vreinterpretq_u8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
+//   return vreinterpretq_u8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
+//   return vreinterpretq_u8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) {
+//   return vreinterpretq_u8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_p8(
+// NYI:   ret <16 x i8> %a
+// uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
+//   return vreinterpretq_u8_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
+//   return vreinterpretq_u8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) {
+//   return vreinterpretq_u8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
+//   return vreinterpretq_u16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_s16(
+// NYI:   ret <8 x i16> %a
+// uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
+//   return vreinterpretq_u16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
+//   return vreinterpretq_u16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
+//   return vreinterpretq_u16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
+//   return vreinterpretq_u16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
+//   return vreinterpretq_u16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
+//   return vreinterpretq_u16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
+//   return vreinterpretq_u16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
+//   return vreinterpretq_u16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) {
+//   return vreinterpretq_u16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
+//   return vreinterpretq_u16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_p16(
+// NYI:   ret <8 x i16> %a
+// uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
+//   return vreinterpretq_u16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) {
+//   return vreinterpretq_u16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
+//   return vreinterpretq_u32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
+//   return vreinterpretq_u32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_s32(
+// NYI:   ret <4 x i32> %a
+// uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
+//   return vreinterpretq_u32_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
+//   return vreinterpretq_u32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
+//   return vreinterpretq_u32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
+//   return vreinterpretq_u32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
+//   return vreinterpretq_u32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
+//   return vreinterpretq_u32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
+//   return vreinterpretq_u32_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) {
+//   return vreinterpretq_u32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
+//   return vreinterpretq_u32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
+//   return vreinterpretq_u32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+// NYI:   ret <4 x i32> [[TMP0]]
+// uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) {
+//   return vreinterpretq_u32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
+//   return vreinterpretq_u64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
+//   return vreinterpretq_u64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
+//   return vreinterpretq_u64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_s64(
+// NYI:   ret <2 x i64> %a
+// uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
+//   return vreinterpretq_u64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
+//   return vreinterpretq_u64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
+//   return vreinterpretq_u64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
+//   return vreinterpretq_u64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
+//   return vreinterpretq_u64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
+//   return vreinterpretq_u64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) {
+//   return vreinterpretq_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
+//   return vreinterpretq_u64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
+//   return vreinterpretq_u64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_u64_p64(
+// NYI:   ret <2 x i64> %a
+// uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) {
+//   return vreinterpretq_u64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
+//   return vreinterpretq_f16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
+//   return vreinterpretq_f16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
+//   return vreinterpretq_f16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
+//   return vreinterpretq_f16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
+//   return vreinterpretq_f16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
+//   return vreinterpretq_f16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
+//   return vreinterpretq_f16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
+//   return vreinterpretq_f16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
+//   return vreinterpretq_f16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) {
+//   return vreinterpretq_f16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
+//   return vreinterpretq_f16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
+//   return vreinterpretq_f16_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
+// NYI:   ret <8 x half> [[TMP0]]
+// float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) {
+//   return vreinterpretq_f16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
+//   return vreinterpretq_f32_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
+//   return vreinterpretq_f32_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
+//   return vreinterpretq_f32_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
+//   return vreinterpretq_f32_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
+//   return vreinterpretq_f32_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
+//   return vreinterpretq_f32_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
+//   return vreinterpretq_f32_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
+//   return vreinterpretq_f32_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
+//   return vreinterpretq_f32_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) {
+//   return vreinterpretq_f32_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
+//   return vreinterpretq_f32_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
+//   return vreinterpretq_f32_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f32_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
+// NYI:   ret <4 x float> [[TMP0]]
+// float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) {
+//   return vreinterpretq_f32_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) {
+//   return vreinterpretq_f64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) {
+//   return vreinterpretq_f64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) {
+//   return vreinterpretq_f64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) {
+//   return vreinterpretq_f64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) {
+//   return vreinterpretq_f64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) {
+//   return vreinterpretq_f64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) {
+//   return vreinterpretq_f64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) {
+//   return vreinterpretq_f64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) {
+//   return vreinterpretq_f64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) {
+//   return vreinterpretq_f64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) {
+//   return vreinterpretq_f64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) {
+//   return vreinterpretq_f64_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_f64_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double>
+// NYI:   ret <2 x double> [[TMP0]]
+// float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) {
+//   return vreinterpretq_f64_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_s8(
+// NYI:   ret <16 x i8> %a
+// poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
+//   return vreinterpretq_p8_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
+//   return vreinterpretq_p8_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
+//   return vreinterpretq_p8_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
+//   return vreinterpretq_p8_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_u8(
+// NYI:   ret <16 x i8> %a
+// poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
+//   return vreinterpretq_p8_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
+//   return vreinterpretq_p8_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
+//   return vreinterpretq_p8_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
+//   return vreinterpretq_p8_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
+//   return vreinterpretq_p8_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
+//   return vreinterpretq_p8_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) {
+//   return vreinterpretq_p8_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
+//   return vreinterpretq_p8_p16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p8_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   ret <16 x i8> [[TMP0]]
+// poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) {
+//   return vreinterpretq_p8_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
+//   return vreinterpretq_p16_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_s16(
+// NYI:   ret <8 x i16> %a
+// poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
+//   return vreinterpretq_p16_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
+//   return vreinterpretq_p16_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
+//   return vreinterpretq_p16_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
+//   return vreinterpretq_p16_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_u16(
+// NYI:   ret <8 x i16> %a
+// poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
+//   return vreinterpretq_p16_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
+//   return vreinterpretq_p16_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
+//   return vreinterpretq_p16_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
+//   return vreinterpretq_p16_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
+//   return vreinterpretq_p16_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) {
+//   return vreinterpretq_p16_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
+//   return vreinterpretq_p16_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p16_p64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+// NYI:   ret <8 x i16> [[TMP0]]
+// poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) {
+//   return vreinterpretq_p16_p64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_s8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) {
+//   return vreinterpretq_p64_s8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_s16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) {
+//   return vreinterpretq_p64_s16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_s32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) {
+//   return vreinterpretq_p64_s32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_s64(
+// NYI:   ret <2 x i64> %a
+// poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) {
+//   return vreinterpretq_p64_s64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_u8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) {
+//   return vreinterpretq_p64_u8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) {
+//   return vreinterpretq_p64_u16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) {
+//   return vreinterpretq_p64_u32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_u64(
+// NYI:   ret <2 x i64> %a
+// poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) {
+//   return vreinterpretq_p64_u64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_f16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) {
+//   return vreinterpretq_p64_f16(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_f32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) {
+//   return vreinterpretq_p64_f32(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) {
+//   return vreinterpretq_p64_f64(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_p8(
+// NYI:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) {
+//   return vreinterpretq_p64_p8(a);
+// }
+
+// NYI-LABEL: @test_vreinterpretq_p64_p16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
+// NYI:   ret <2 x i64> [[TMP0]]
+// poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) {
+//   return vreinterpretq_p64_p16(a);
+// }
+
+float32_t test_vabds_f32(float32_t a, float32_t b) {
+  return vabds_f32(a, b);
+
+  // CIR-LABEL: vabds_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.sisd.fabd" {{.*}}, {{.*}} : (!cir.float, !cir.float) -> !cir.float
+
+  // LLVM-LABEL: @test_vabds_f32(
+  // LLVM:   [[VABDS_F32:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %0, float %1)
+  // LLVM:   ret float [[VABDS_F32]]
+}
+
+float64_t test_vabdd_f64(float64_t a, float64_t b) {
+  return vabdd_f64(a, b);
+
+  // CIR-LABEL: vabdd_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.sisd.fabd" {{.*}}, {{.*}} : (!cir.double, !cir.double) -> !cir.double
+
+  // LLVM-LABEL: @test_vabdd_f64(
+  // LLVM:   [[VABDD_F64:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %0, double %1)
+  // LLVM:   ret double [[VABDD_F64]]
+}
+
+// NYI-LABEL: @test_vuqaddq_s8(
+// NYI: entry:
+// NYI-NEXT:  [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI-NEXT:  ret <16 x i8> [[V]]
+// int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) {
+//   return vuqaddq_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddq_s32(
+// NYI: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI-NEXT:  ret <4 x i32> [[V]]
+// int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) {
+//   return vuqaddq_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddq_s64(
+// NYI: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI-NEXT:  ret <2 x i64> [[V]]
+// int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) {
+//   return vuqaddq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vuqaddq_s16(
+// NYI: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI-NEXT:  ret <8 x i16> [[V]]
+// int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) {
+//   return vuqaddq_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadd_s8(
+// NYI: entry:
+// NYI-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI-NEXT: ret <8 x i8> [[V]]
+// int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) {
+//   return vuqadd_s8(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadd_s32(
+// NYI: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI-NEXT:  ret <2 x i32> [[V]]
+// int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) {
+//   return vuqadd_s32(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadd_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   ret <1 x i64> [[VUQADD2_I]]
+// int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) {
+//   return vuqadd_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vuqadd_s16(
+// NYI: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI-NEXT:  ret <4 x i16> [[V]]
+// int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) {
+//   return vuqadd_s16(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadd_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b)
+// NYI:   ret <1 x i64> [[VSQADD2_I]]
+// uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) {
+//   return vsqadd_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadd_u8(
+// NYI:   [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b)
+// NYI:   ret <8 x i8> [[VSQADD_I]]
+// uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) {
+//   return vsqadd_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddq_u8(
+// NYI:   [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b)
+// NYI:   ret <16 x i8> [[VSQADD_I]]
+// uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) {
+//   return vsqaddq_u8(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadd_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b)
+// NYI:   ret <4 x i16> [[VSQADD2_I]]
+// uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) {
+//   return vsqadd_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddq_u16(
+// NYI:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b)
+// NYI:   ret <8 x i16> [[VSQADD2_I]]
+// uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) {
+//   return vsqaddq_u16(a, b);
+// }
+
+// NYI-LABEL: @test_vsqadd_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b)
+// NYI:   ret <2 x i32> [[VSQADD2_I]]
+// uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) {
+//   return vsqadd_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddq_u32(
+// NYI:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b)
+// NYI:   ret <4 x i32> [[VSQADD2_I]]
+// uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) {
+//   return vsqaddq_u32(a, b);
+// }
+
+// NYI-LABEL: @test_vsqaddq_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
+// NYI:   [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   ret <2 x i64> [[VSQADD2_I]]
+// uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) {
+//   return vsqaddq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vabs_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a)
+// NYI:   ret <1 x i64> [[VABS1_I]]
+// int64x1_t test_vabs_s64(int64x1_t a) {
+//   return vabs_s64(a);
+// }
+
+// NYI-LABEL: @test_vqabs_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a)
+// NYI:   [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQABS_V1_I]]
+// int64x1_t test_vqabs_s64(int64x1_t a) {
+//   return vqabs_s64(a);
+// }
+
+// NYI-LABEL: @test_vqneg_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a)
+// NYI:   [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8>
+// NYI:   ret <1 x i64> [[VQNEG_V1_I]]
+// int64x1_t test_vqneg_s64(int64x1_t a) {
+//   return vqneg_s64(a);
+// }
+
+// NYI-LABEL: @test_vneg_s64(
+// NYI:   [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a
+// NYI:   ret <1 x i64> [[SUB_I]]
+// int64x1_t test_vneg_s64(int64x1_t a) {
+//   return vneg_s64(a);
+// }
+
+float32_t test_vaddv_f32(float32x2_t a) {
+  return vaddv_f32(a);
+
+  // CIR-LABEL: vaddv_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.faddv" {{%.*}} : (!cir.vector<!cir.float x 2>) -> !cir.float
+
+  // LLVM-LABEL: test_vaddv_f32
+  // LLVM-SAME: (<2 x float> [[a:%.*]])
+  // LLVM: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> {{.*}})
+  // LLVM: ret float [[VADDV_F32_I]]
+}
+
+float32_t test_vaddvq_f32(float32x4_t a) {
+  return vaddvq_f32(a);
+
+  // CIR-LABEL: vaddvq_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.faddv" {{%.*}} : (!cir.vector<!cir.float x 4>) -> !cir.float
+
+  // LLVM-LABEL: test_vaddvq_f32
+  // LLVM-SAME: (<4 x float> [[a:%.*]])
+  // LLVM: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> {{.*}})
+  // LLVM: ret float [[VADDVQ_F32_I]]
+}
+
+float64_t test_vaddvq_f64(float64x2_t a) {
+  return vaddvq_f64(a);
+
+  // CIR-LABEL: vaddvq_f64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.faddv" {{%.*}} : (!cir.vector<!cir.double x 2>) -> !cir.double
+
+  // LLVM-LABEL: test_vaddvq_f64
+  // LLVM-SAME: (<2 x double> [[a:%.*]])
+  // LLVM: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> {{.*}})
+  // LLVM: ret double [[VADDVQ_F64_I]]
+}
+
+float32_t test_vmaxv_f32(float32x2_t a) {
+  return vmaxv_f32(a);
+
+  // CIR-LABEL: vmaxv_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fmaxv" {{%.*}} : (!cir.vector<!cir.float x 2>) -> !cir.float
+
+  // LLVM-LABEL: test_vmaxv_f32
+  // LLVM-SAME: (<2 x float> [[a:%.*]])
+  // LLVM:   [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> {{.*}})
+  // LLVM:   ret float [[VMAXV_F32_I]]
+}
+
+float64_t test_vmaxvq_f64(float64x2_t a) {
+  return vmaxvq_f64(a);
+
+  // CIR-LABEL: vmaxvq_f64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fmaxv" {{%.*}} : (!cir.vector<!cir.double x 2>) -> !cir.double
+
+  // LLVM-LABEL: test_vmaxvq_f64
+  // LLVM-SAME: (<2 x double> [[a:%.*]])
+  // LLVM:  [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> {{.*}})
+  // LLVM:  ret double [[VMAXVQ_F64_I]]
+}
+
+// NYI-LABEL: @test_vminv_f32(
+// NYI:   [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
+// NYI:   ret float [[VMINV_F32_I]]
+// float32_t test_vminv_f32(float32x2_t a) {
+//   return vminv_f32(a);
+// }
+
+float64_t test_vminvq_f64(float64x2_t a) {
+  return vminvq_f64(a);
+
+  // CIR-LABEL: vminvq_f64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fminv" {{%.*}} : (!cir.vector<!cir.double x 2>) -> !cir.double
+
+  // LLVM-LABEL: @test_vminvq_f64
+  // LLVM-SAME: (<2 x double> [[a:%.*]])
+  // LLVM:   [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> {{.*}})
+  // LLVM:   ret double [[VMINVQ_F64_I]]
+}
+
+
+float32_t test_vmaxnmvq_f32(float32x4_t a) {
+  return vmaxnmvq_f32(a);
+
+  // CIR-LABEL: vmaxnmvq_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fmaxnmv" {{%.*}} : (!cir.vector<!cir.float x 4>) -> !cir.float
+
+  // LLVM-LABEL: @test_vmaxnmvq_f32
+  // LLVM-SAME: (<4 x float> [[a:%.*]])
+  // LLVM:  [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> {{.*}})
+  // LLVM:  ret float [[VMAXNMVQ_F32_I]]
+}
+
+float64_t test_vmaxnmvq_f64(float64x2_t a) {
+  return vmaxnmvq_f64(a);
+
+  // CIR-LABEL: vmaxnmvq_f64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fmaxnmv" {{%.*}} : (!cir.vector<!cir.double x 2>) -> !cir.double
+
+  // LLVM-LABEL: @test_vmaxnmvq_f64
+  // LLVM-SAME: (<2 x double> [[a:%.*]])
+  // LLVM:  [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> {{.*}})
+  // LLVM:  ret double [[VMAXNMVQ_F64_I]]
+}
+
+float32_t test_vmaxnmv_f32(float32x2_t a) {
+  return vmaxnmv_f32(a);
+
+  // CIR-LABEL: vmaxnmv_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fmaxnmv" {{%.*}} : (!cir.vector<!cir.float x 2>) -> !cir.float
+
+  // LLVM-LABEL: @test_vmaxnmv_f32
+  // LLVM-SAME: (<2 x float> [[a:%.*]])
+  // LLVM:   [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> {{.*}})
+  // LLVM:   ret float [[VMAXNMV_F32_I]]
+}
+
+float64_t test_vminnmvq_f64(float64x2_t a) {
+  return vminnmvq_f64(a);
+
+  // CIR-LABEL: vminnmvq_f64
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fminnmv" {{%.*}} : (!cir.vector<!cir.double x 2>) -> !cir.double
+
+  // LLVM-LABEL: @test_vminnmvq_f64
+  // LLVM-SAME: (<2 x double> [[a:%.*]])
+  // LLVM:   [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> {{.*}})
+  // LLVM:   ret double [[VMINNMVQ_F64_I]]
+}
+
+float32_t test_vminnmvq_f32(float32x4_t a) {
+  return vminnmvq_f32(a);
+
+  // CIR-LABEL: vminnmvq_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fminnmv" {{%.*}} : (!cir.vector<!cir.float x 4>) -> !cir.float
+
+  // LLVM-LABEL: @test_vminnmvq_f32
+  // LLVM-SAME: (<4 x float> [[a:%.*]])
+  // LLVM:   [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> {{.*}})
+  // LLVM:   ret float [[VMINNMVQ_F32_I]]
+}
+
+float32_t test_vminnmv_f32(float32x2_t a) {
+  return vminnmv_f32(a);
+
+  // CIR-LABEL: vminnmv_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fminnmv" {{%.*}} : (!cir.vector<!cir.float x 2>) -> !cir.float
+
+  // LLVM-LABEL: @test_vminnmv_f32
+  // LLVM-SAME: (<2 x float> [[a:%.*]])
+  // LLVM:   [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> {{.*}})
+  // LLVM:   ret float [[VMINNMV_F32_I]]
+}
+
+// NYI-LABEL: @test_vpaddq_s64(
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VPADDQ_V2_I]]
+// int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
+//   return vpaddq_s64(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddq_u64(
+// NYI:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+// NYI:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
+// NYI:   ret <2 x i64> [[VPADDQ_V2_I]]
+// uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) {
+//   return vpaddq_u64(a, b);
+// }
+
+// NYI-LABEL: @test_vpaddd_u64(
+// NYI:   [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
+// NYI:   ret i64 [[VPADDD_U64_I]]
+// uint64_t test_vpaddd_u64(uint64x2_t a) {
+//   return vpaddd_u64(a);
+// }
+
+// NYI-LABEL: @test_vaddvq_s64(
+// NYI:   [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
+// NYI:   ret i64 [[VADDVQ_S64_I]]
+// int64_t test_vaddvq_s64(int64x2_t a) {
+//   return vaddvq_s64(a);
+// }
+
+// NYI-LABEL: @test_vaddvq_u64(
+// NYI:   [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
+// NYI:   ret i64 [[VADDVQ_U64_I]]
+// uint64_t test_vaddvq_u64(uint64x2_t a) {
+//   return vaddvq_u64(a);
+// }
+
+// NYI-LABEL: @test_vadd_f64(
+// NYI:   [[ADD_I:%.*]] = fadd <1 x double> %a, %b
+// NYI:   ret <1 x double> [[ADD_I]]
+// float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) {
+//   return vadd_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmul_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <1 x double> %a, %b
+// NYI:   ret <1 x double> [[MUL_I]]
+// float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) {
+//   return vmul_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vdiv_f64(
+// NYI:   [[DIV_I:%.*]] = fdiv <1 x double> %a, %b
+// NYI:   ret <1 x double> [[DIV_I]]
+// float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) {
+//   return vdiv_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vmla_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
+// NYI:   [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]]
+// NYI:   ret <1 x double> [[ADD_I]]
+// float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+//   return vmla_f64(a, b, c);
+// }
+
+// NYI-LABEL: @test_vmls_f64(
+// NYI:   [[MUL_I:%.*]] = fmul <1 x double> %b, %c
+// NYI:   [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]]
+// NYI:   ret <1 x double> [[SUB_I]]
+// float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+//   return vmls_f64(a, b, c);
+// }
+
+// NYI-LABEL: @test_vfma_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
+// NYI:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a)
+// NYI:   ret <1 x double> [[TMP3]]
+// float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+//   return vfma_f64(a, b, c);
+// }
+
+// NYI-LABEL: @test_vfms_f64(
+// NYI:   [[SUB_I:%.*]] = fneg <1 x double> %b
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8>
+// NYI:   [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8>
+// NYI:   [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a)
+// NYI:   ret <1 x double> [[TMP3]]
+// float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) {
+//   return vfms_f64(a, b, c);
+// }
+
+// NYI-LABEL: @test_vsub_f64(
+// NYI:   [[SUB_I:%.*]] = fsub <1 x double> %a, %b
+// NYI:   ret <1 x double> [[SUB_I]]
+// float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) {
+//   return vsub_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vabd_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VABD2_I]]
+// float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) {
+//   return vabd_f64(a, b);
+// }
+
+float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) {
+  return vmax_f64(a, b);
+
+  // CIR-LABEL: vmax_f64
+  // CIR: cir.fmaximum {{%.*}}, {{%.*}} : !cir.vector<!cir.double x 1>
+
+  // LLVM-LABEL: test_vmax_f64
+  // LLVM-SAME: (<1 x double> [[a:%.*]], <1 x double> [[b:%.*]])
+  // LLVM:   [[VMAX2_I:%.*]] = call <1 x double> @llvm.maximum.v1f64(<1 x double> {{.*}}, <1 x double> {{.*}})
+}
+
+// NYI-LABEL: @test_vmaxnm_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VMAXNM2_I]]
+// float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) {
+//   return vmaxnm_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vminnm_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VMINNM2_I]]
+// float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) {
+//   return vminnm_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vabs_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VABS1_I]]
+// float64x1_t test_vabs_f64(float64x1_t a) {
+//   return vabs_f64(a);
+// }
+
+// NYI-LABEL: @test_vneg_f64(
+// NYI:   [[SUB_I:%.*]] = fneg <1 x double> %a
+// NYI:   ret <1 x double> [[SUB_I]]
+// float64x1_t test_vneg_f64(float64x1_t a) {
+//   return vneg_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[TMP1]]
+// int64x1_t test_vcvt_s64_f64(float64x1_t a) {
+//   return vcvt_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[TMP1]]
+// uint64x1_t test_vcvt_u64_f64(float64x1_t a) {
+//   return vcvt_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtn_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTN1_I]]
+// int64x1_t test_vcvtn_s64_f64(float64x1_t a) {
+//   return vcvtn_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtn_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTN1_I]]
+// uint64x1_t test_vcvtn_u64_f64(float64x1_t a) {
+//   return vcvtn_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtp_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTP1_I]]
+// int64x1_t test_vcvtp_s64_f64(float64x1_t a) {
+//   return vcvtp_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtp_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTP1_I]]
+// uint64x1_t test_vcvtp_u64_f64(float64x1_t a) {
+//   return vcvtp_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtm_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTM1_I]]
+// int64x1_t test_vcvtm_s64_f64(float64x1_t a) {
+//   return vcvtm_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvtm_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTM1_I]]
+// uint64x1_t test_vcvtm_u64_f64(float64x1_t a) {
+//   return vcvtm_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvta_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTA1_I]]
+// int64x1_t test_vcvta_s64_f64(float64x1_t a) {
+//   return vcvta_s64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvta_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
+// NYI:   ret <1 x i64> [[VCVTA1_I]]
+// uint64x1_t test_vcvta_u64_f64(float64x1_t a) {
+//   return vcvta_u64_f64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[VCVT_I]]
+// float64x1_t test_vcvt_f64_s64(int64x1_t a) {
+//   return vcvt_f64_s64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double>
+// NYI:   ret <1 x double> [[VCVT_I]]
+// float64x1_t test_vcvt_f64_u64(uint64x1_t a) {
+//   return vcvt_f64_u64(a);
+// }
+
+// NYI-LABEL: @test_vcvt_n_s64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// NYI:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
+// NYI:   ret <1 x i64> [[VCVT_N1]]
+// int64x1_t test_vcvt_n_s64_f64(float64x1_t a) {
+//   return vcvt_n_s64_f64(a, 64);
+// }
+
+// NYI-LABEL: @test_vcvt_n_u64_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
+// NYI:   [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64)
+// NYI:   ret <1 x i64> [[VCVT_N1]]
+// uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) {
+//   return vcvt_n_u64_f64(a, 64);
+// }
+
+// NYI-LABEL: @test_vcvt_n_f64_s64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
+// NYI:   ret <1 x double> [[VCVT_N1]]
+// float64x1_t test_vcvt_n_f64_s64(int64x1_t a) {
+//   return vcvt_n_f64_s64(a, 64);
+// }
+
+// NYI-LABEL: @test_vcvt_n_f64_u64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
+// NYI:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
+// NYI:   [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64)
+// NYI:   ret <1 x double> [[VCVT_N1]]
+// float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) {
+//   return vcvt_n_f64_u64(a, 64);
+// }
+
+// NYI-LABEL: @test_vrndn_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDN1_I]]
+// float64x1_t test_vrndn_f64(float64x1_t a) {
+//   return vrndn_f64(a);
+// }
+
+// NYI-LABEL: @test_vrnda_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDA1_I]]
+// float64x1_t test_vrnda_f64(float64x1_t a) {
+//   return vrnda_f64(a);
+// }
+
+// NYI-LABEL: @test_vrndp_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDP1_I]]
+// float64x1_t test_vrndp_f64(float64x1_t a) {
+//   return vrndp_f64(a);
+// }
+
+// NYI-LABEL: @test_vrndm_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDM1_I]]
+// float64x1_t test_vrndm_f64(float64x1_t a) {
+//   return vrndm_f64(a);
+// }
+
+// NYI-LABEL: @test_vrndx_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDX1_I]]
+// float64x1_t test_vrndx_f64(float64x1_t a) {
+//   return vrndx_f64(a);
+// }
+
+// NYI-LABEL: @test_vrnd_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDZ1_I]]
+// float64x1_t test_vrnd_f64(float64x1_t a) {
+//   return vrnd_f64(a);
+// }
+
+// NYI-LABEL: @test_vrndi_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRNDI1_I]]
+// float64x1_t test_vrndi_f64(float64x1_t a) {
+//   return vrndi_f64(a);
+// }
+
+// NYI-LABEL: @test_vrsqrte_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRSQRTE_V1_I]]
+// float64x1_t test_vrsqrte_f64(float64x1_t a) {
+//   return vrsqrte_f64(a);
+// }
+
+// NYI-LABEL: @test_vrecpe_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VRECPE_V1_I]]
+// float64x1_t test_vrecpe_f64(float64x1_t a) {
+//   return vrecpe_f64(a);
+// }
+
+// NYI-LABEL: @test_vsqrt_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
+// NYI:   ret <1 x double> [[VSQRT_I]]
+// float64x1_t test_vsqrt_f64(float64x1_t a) {
+//   return vsqrt_f64(a);
+// }
+
+// NYI-LABEL: @test_vrecps_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   ret <1 x double> [[VRECPS_V2_I]]
+// float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) {
+//   return vrecps_f64(a, b);
+// }
+
+// NYI-LABEL: @test_vrsqrts_f64(
+// NYI:   [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
+// NYI:   [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
+// NYI:   [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
+// NYI:   [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8>
+// NYI:   ret <1 x double> [[VRSQRTS_V2_I]]
+// float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
+//   return vrsqrts_f64(a, b);
+// }
+
+int32_t test_vminv_s32(int32x2_t a) {
+  return vminv_s32(a);
+
+  // CIR-LABEL: vminv_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sminv" {{%.*}} : (!cir.vector<!s32i x 2>) -> !s32i
+
+  // LLVM-LABEL: @test_vminv_s32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]])
+  // LLVM:   [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> {{.*}})
+  // LLVM:   ret i32 [[VMINV_S32_I]]
+}
+
+uint32_t test_vminv_u32(uint32x2_t a) {
+  return vminv_u32(a);
+
+  // CIR-LABEL: vminv_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uminv" {{%.*}} : (!cir.vector<!u32i x 2>) -> !u32i
+
+  // LLVM-LABEL: @test_vminv_u32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]])
+  // LLVM:   [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> {{.*}})
+  // LLVM:   ret i32 [[VMINV_U32_I]]
+}
+
+float32_t test_vminvq_f32(float32x4_t a) {
+  return vminvq_f32(a);
+
+  // CIR-LABEL: vminvq_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fminv" {{%.*}} : (!cir.vector<!cir.float x 4>) -> !cir.float
+
+  // LLVM-LABEL: @test_vminvq_f32
+  // LLVM-SAME: (<4 x float> [[a:%.*]])
+  // LLVM:  [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> {{.*}})
+  // LLVM:  ret float [[VMINVQ_F32_I]]
+}
+
+int32_t test_vmaxv_s32(int32x2_t a) {
+  return vmaxv_s32(a);
+
+  // CIR-LABEL: vmaxv_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.smaxv" {{%.*}} : (!cir.vector<!s32i x 2>) -> !s32i
+
+  // LLVM-LABEL: @test_vmaxv_s32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]])
+  // LLVM:  [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> {{.*}})
+  // LLVM:  ret i32 [[VMAXV_S32_I]]
+}
+
+// NYI-LABEL: @test_vmaxv_u32(
+// NYI:   [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
+// NYI:   ret i32 [[VMAXV_U32_I]]
+uint32_t test_vmaxv_u32(uint32x2_t a) {
+  return vmaxv_u32(a);
+
+  // CIR-LABEL: vmaxv_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.umaxv" {{%.*}} : (!cir.vector<!u32i x 2>) -> !u32i
+
+  // LLVM-LABEL: @test_vmaxv_u32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]])
+  // LLVM:  [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> {{.*}})
+  // LLVM:  ret i32 [[VMAXV_U32_I]]
+}
+
+uint16_t test_vmaxvq_u16(uint16x8_t a) {
+  return vmaxvq_u16(a);
+
+  // CIR-LABEL: vmaxvq_u16
+  // CIR: cir.llvm.intrinsic "vector.reduce.umax" {{%.*}} : (!cir.vector<!u16i x 8>) -> !u16i
+
+  // LLVM-LABEL: @test_vmaxvq_u16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]])
+  // LLVM: [[VMAXVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> {{.*}})
+  // LLVM: ret i16 [[VMAXVQ_U16_I]]
+
+  // OGCG-LABEL: @test_vmaxvq_u16
+  // OGCG: {{%.*}} = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> {{%.*}})
+  // OGCG: ret i16
+}
+
+int16_t test_vmaxvq_s16(int16x8_t a) {
+  return vmaxvq_s16(a);
+
+  // CIR-LABEL: vmaxvq_s16
+  // CIR: cir.llvm.intrinsic "vector.reduce.smax" {{%.*}} : (!cir.vector<!s16i x 8>) -> !s16i
+
+  // LLVM-LABEL: @test_vmaxvq_s16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]])
+  // LLVM: [[VMAXVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> {{.*}})
+  // LLVM: ret i16 [[VMAXVQ_S16_I]]
+
+  // OGCG-LABEL: @test_vmaxvq_s16
+  // OGCG: {{%.*}} = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> {{%.*}})
+  // OGCG: ret i16
+}
+
+uint8_t test_vminvq_u8(uint8x16_t a) {
+  return vminvq_u8(a);
+
+  // CIR-LABEL: vminvq_u8
+  // CIR: cir.llvm.intrinsic "vector.reduce.umin" {{%.*}} : (!cir.vector<!u8i x 16>) -> !u8i
+
+  // LLVM-LABEL: @test_vminvq_u8
+  // LLVM-SAME: (<16 x i8> [[a:%.*]])
+  // LLVM: [[VMINVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> {{.*}})
+  // LLVM: ret i8 [[VMINVQ_U8_I]]
+
+  // OGCG-LABEL: @test_vminvq_u8
+  // OGCG: {{%.*}} = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> {{%.*}})
+  // OGCG: ret i8
+}
+
+int16_t test_vminvq_s16(int16x8_t a) {
+  return vminvq_s16(a);
+
+  // CIR-LABEL: vminvq_s16
+  // CIR: cir.llvm.intrinsic "vector.reduce.smin" {{%.*}} : (!cir.vector<!s16i x 8>) -> !s16i
+
+  // LLVM-LABEL: @test_vminvq_s16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]])
+  // LLVM: [[VMINVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> {{.*}})
+  // LLVM: ret i16 [[VMINVQ_S16_I]]
+
+  // OGCG-LABEL: @test_vminvq_s16
+  // OGCG: {{%.*}} = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> {{%.*}})
+  // OGCG: ret i16
+}
+
+uint8_t test_vminv_u8(uint8x8_t a) {
+  return vminv_u8(a);
+
+  // CIR-LABEL: vminv_u8
+  // CIR: cir.llvm.intrinsic "vector.reduce.umin" {{%.*}} : (!cir.vector<!u8i x 8>) -> !u8i
+
+  // LLVM-LABEL: @test_vminv_u8
+  // LLVM-SAME: (<8 x i8> [[a:%.*]])
+  // LLVM: [[VMINV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> {{.*}})
+  // LLVM: ret i8 [[VMINV_U8_I]]
+
+  // OGCG-LABEL: @test_vminv_u8
+  // OGCG: {{%.*}} = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> {{%.*}})
+  // OGCG: ret i8
+}
+
+uint16_t test_vminv_u16(uint16x4_t a) {
+  return vminv_u16(a);
+
+  // CIR-LABEL: vminv_u16
+  // CIR: cir.llvm.intrinsic "vector.reduce.umin" {{%.*}} : (!cir.vector<!u16i x 4>) -> !u16i
+
+  // LLVM-LABEL: @test_vminv_u16
+  // LLVM-SAME: (<4 x i16> [[a:%.*]])
+  // LLVM: [[VMINV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> {{.*}})
+  // LLVM: ret i16 [[VMINV_U16_I]]
+
+  // OGCG-LABEL: @test_vminv_u16
+  // OGCG: {{%.*}} = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> {{%.*}})
+  // OGCG: ret i16
+}
+
+uint16_t test_vminvq_u16(uint16x8_t a) {
+  return vminvq_u16(a);
+
+  // CIR-LABEL: vminvq_u16
+  // CIR: cir.llvm.intrinsic "vector.reduce.umin" {{%.*}} : (!cir.vector<!u16i x 8>) -> !u16i
+
+  // LLVM-LABEL: @test_vminvq_u16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]])
+  // LLVM: [[VMINVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> {{.*}})
+  // LLVM: ret i16 [[VMINVQ_U16_I]]
+
+  // OGCG-LABEL: @test_vminvq_u16
+  // OGCG: {{%.*}} = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> {{%.*}})
+  // OGCG: ret i16
+}
+
+int8_t test_vminv_s8(int8x8_t a) {
+  return vminv_s8(a);
+
+  // CIR-LABEL: vminv_s8
+  // CIR: cir.llvm.intrinsic "vector.reduce.smin" {{%.*}} : (!cir.vector<!s8i x 8>) -> !s8i
+
+  // LLVM-LABEL: @test_vminv_s8
+  // LLVM-SAME: (<8 x i8> [[a:%.*]])
+  // LLVM: [[VMINV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> {{.*}})
+  // LLVM: ret i8 [[VMINV_S8_I]]
+
+  // OGCG-LABEL: @test_vminv_s8
+  // OGCG: {{%.*}} = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> {{%.*}})
+  // OGCG: ret i8
+}
+
+int16_t test_vminv_s16(int16x4_t a) {
+  return vminv_s16(a);
+
+  // CIR-LABEL: vminv_s16
+  // CIR: cir.llvm.intrinsic "vector.reduce.smin" {{%.*}} : (!cir.vector<!s16i x 4>) -> !s16i
+
+  // LLVM-LABEL: @test_vminv_s16
+  // LLVM-SAME: (<4 x i16> [[a:%.*]])
+  // LLVM: [[VMINV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> {{.*}})
+  // LLVM: ret i16 [[VMINV_S16_I]]
+
+  // OGCG-LABEL: @test_vminv_s16
+  // OGCG: {{%.*}} = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> {{%.*}})
+  // OGCG: ret i16
+}
+
+int8_t test_vminvq_s8(int8x16_t a) {
+  return vminvq_s8(a);
+
+  // CIR-LABEL: vminvq_s8
+  // CIR: cir.llvm.intrinsic "vector.reduce.smin" {{%.*}} : (!cir.vector<!s8i x 16>) -> !s8i
+
+  // LLVM-LABEL: @test_vminvq_s8
+  // LLVM-SAME: (<16 x i8> [[a:%.*]])
+  // LLVM: [[VMINVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> {{.*}})
+  // LLVM: ret i8 [[VMINVQ_S8_I]]
+
+  // OGCG-LABEL: @test_vminvq_s8
+  // OGCG: {{%.*}} = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> {{%.*}})
+  // OGCG: ret i8
+}
+
+int16_t test_vaddvq_s16(int16x8_t a) {
+  return vaddvq_s16(a);
+
+  // CIR-LABEL: vaddvq_s16
+  // CIR: cir.llvm.intrinsic "vector.reduce.add" {{%.*}} : (!cir.vector<!s16i x 8>) -> !s16i
+
+  // LLVM-LABEL: @test_vaddvq_s16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]])
+  // LLVM: [[VADDVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> {{.*}})
+  // LLVM: ret i16 [[VADDVQ_S16_I]]
+
+  // OGCG-LABEL: @test_vaddvq_s16
+  // OGCG: {{%.*}} = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> {{%.*}})
+  // OGCG: ret i16
+}
+
+int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
+  return vpminq_s16(a, b);
+
+  // CIR-LABEL: vpminq_s16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.sminp" {{%.*}} : (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_vpminq_s16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]], <8 x i16> [[b:%.*]])
+  // LLVM: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> {{.*}}, <8 x i16> {{.*}})
+  // LLVM: ret <8 x i16>
+
+  // OGCG-LABEL: @test_vpminq_s16
+  // OGCG: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+  // OGCG: ret <8 x i16>
+}
+
+uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
+  return vpminq_u16(a, b);
+
+  // CIR-LABEL: vpminq_u16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uminp" {{%.*}} : (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM-LABEL: @test_vpminq_u16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]], <8 x i16> [[b:%.*]])
+  // LLVM: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> {{.*}}, <8 x i16> {{.*}})
+  // LLVM: ret <8 x i16>
+
+  // OGCG-LABEL: @test_vpminq_u16
+  // OGCG: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+  // OGCG: ret <8 x i16>
+}
+
+uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) {
+  return vpmaxq_u16(a, b);
+
+  // CIR-LABEL: vpmaxq_u16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.umaxp" {{%.*}} : (!cir.vector<!u16i x 8>, !cir.vector<!u16i x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM-LABEL: @test_vpmaxq_u16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]], <8 x i16> [[b:%.*]])
+  // LLVM: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> {{.*}}, <8 x i16> {{.*}})
+  // LLVM: ret <8 x i16>
+
+  // OGCG-LABEL: @test_vpmaxq_u16
+  // OGCG: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+  // OGCG: ret <8 x i16>
+}
+
+int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) {
+  return vpmaxq_s16(a, b);
+
+  // CIR-LABEL: vpmaxq_s16
+  // CIR: cir.llvm.intrinsic "aarch64.neon.smaxp" {{%.*}} : (!cir.vector<!s16i x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_vpmaxq_s16
+  // LLVM-SAME: (<8 x i16> [[a:%.*]], <8 x i16> [[b:%.*]])
+  // LLVM: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> {{.*}}, <8 x i16> {{.*}})
+  // LLVM: ret <8 x i16>
+
+  // OGCG-LABEL: @test_vpmaxq_s16
+  // OGCG: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> {{%.*}}, <8 x i16> {{%.*}})
+  // OGCG: ret <8 x i16>
+}
+
+float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
+  return vpminq_f32(a, b);
+
+  // CIR-LABEL: vpminq_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fminp" {{%.*}} : (!cir.vector<!cir.float x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vpminq_f32
+  // LLVM-SAME: (<4 x float> [[a:%.*]], <4 x float> [[b:%.*]])
+  // LLVM: {{%.*}} = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}})
+  // LLVM: ret <4 x float>
+
+  // OGCG-LABEL: @test_vpminq_f32
+  // OGCG: {{%.*}} = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> {{%.*}}, <4 x float> {{%.*}})
+  // OGCG: ret <4 x float>
+}
+
+float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) {
+  return vpmaxq_f32(a, b);
+
+  // CIR-LABEL: vpmaxq_f32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.fmaxp" {{%.*}} : (!cir.vector<!cir.float x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vpmaxq_f32
+  // LLVM-SAME: (<4 x float> [[a:%.*]], <4 x float> [[b:%.*]])
+  // LLVM: {{%.*}} = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> {{.*}}, <4 x float> {{.*}})
+  // LLVM: ret <4 x float>
+
+  // OGCG-LABEL: @test_vpmaxq_f32
+  // OGCG: {{%.*}} = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> {{%.*}}, <4 x float> {{%.*}})
+  // OGCG: ret <4 x float>
+}
+
+
+int32_t test_vaddv_s32(int32x2_t a) {
+  return vaddv_s32(a);
+
+  // CIR-LABEL: vaddv_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.saddv" {{%.*}} : (!cir.vector<!s32i x 2>) -> !s32i
+
+  // LLVM-LABEL: test_vaddv_s32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]])
+  // LLVM:   [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> {{.*}})
+  // LLVM:   ret i32 [[VADDV_S32_I]]
+}
+
+uint32_t test_vaddv_u32(uint32x2_t a) {
+  return vaddv_u32(a);
+
+  // CIR-LABEL: vaddv_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uaddv" {{%.*}} : (!cir.vector<!u32i x 2>) -> !u32i
+
+  // LLVM-LABEL: test_vaddv_u32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]])
+  // LLVM:   [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> {{.*}})
+  // LLVM:   ret i32 [[VADDV_U32_I]]
+}
+
+int64_t test_vaddlv_s32(int32x2_t a) {
+  return vaddlv_s32(a);
+
+  // CIR-LABEL: vaddlv_s32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.saddlv" {{%.*}} : (!cir.vector<!s32i x 2>) -> !s64i
+
+  // LLVM-LABEL: test_vaddlv_s32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]])
+  // LLVM:   [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> {{.*}})
+  // LLVM:   ret i64 [[VADDLV_S32_I]]
+}
+
+uint64_t test_vaddlv_u32(uint32x2_t a) {
+  return vaddlv_u32(a);
+
+  // CIR-LABEL: vaddlv_u32
+  // CIR: cir.llvm.intrinsic "aarch64.neon.uaddlv" {{%.*}} : (!cir.vector<!u32i x 2>) -> !u64i
+
+  // LLVM-LABEL: test_vaddlv_u32
+  // LLVM-SAME: (<2 x i32> [[a:%.*]])
+  // LLVM:   [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> {{.*}})
+  // LLVM:   ret i64 [[VADDLV_U32_I]]
+}
+
+uint8x8_t test_vmovn_u16(uint16x8_t a) {
+  return vmovn_u16(a);
+  // CIR-LABEL: vmovn_u16
+  // CIR: [[ARG:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u16i x 8>
+  // CIR: {{%.*}} = cir.cast integral [[ARG]] : !cir.vector<!u16i x 8> -> !cir.vector<!u8i x 8>
+
+  // LLVM: {{.*}}@test_vmovn_u16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVN_I:%.*]] = trunc <8 x i16> {{.*}} to <8 x i8>
+  // LLVM: ret <8 x i8> [[VMOVN_I]]
+}
+
+uint16x4_t test_vmovn_u32(uint32x4_t a) {
+  return vmovn_u32(a);
+  // CIR-LABEL: vmovn_u32
+  // CIR: [[ARG:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u32i x 4>
+  // CIR: {{%.*}} = cir.cast integral [[ARG]] : !cir.vector<!u32i x 4> -> !cir.vector<!u16i x 4>
+
+  // LLVM: {{.*}}@test_vmovn_u32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVN_I:%.*]] = trunc <4 x i32> {{.*}} to <4 x i16>
+}
+
+uint32x2_t test_vmovn_u64(uint64x2_t a) {
+  return vmovn_u64(a);
+  // CIR-LABEL: vmovn_u64
+  // CIR: [[ARG:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!u64i x 2>
+  // CIR: {{%.*}} = cir.cast integral [[ARG]] : !cir.vector<!u64i x 2> -> !cir.vector<!u32i x 2>
+
+  // LLVM: {{.*}}@test_vmovn_u64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVN_I:%.*]] = trunc <2 x i64> {{.*}} to <2 x i32>
+}
+
+int8x8_t test_vmovn_s16(int16x8_t a) {
+  return vmovn_s16(a);
+  // CIR-LABEL: vmovn_s16
+  // CIR: [[ARG:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.cast integral [[ARG]] : !cir.vector<!s16i x 8> -> !cir.vector<!s8i x 8>
+
+  // LLVM: {{.*}}@test_vmovn_s16(<8 x i16>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVN_I:%.*]] = trunc <8 x i16> {{.*}} to <8 x i8>
+}
+
+int16x4_t test_vmovn_s32(int32x4_t a) {
+  return vmovn_s32(a);
+  // CIR-LABEL: vmovn_s32
+  // CIR: [[ARG:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s32i x 4>
+  // CIR: {{%.*}} = cir.cast integral [[ARG]] : !cir.vector<!s32i x 4> -> !cir.vector<!s16i x 4>
+
+  // LLVM: {{.*}}@test_vmovn_s32(<4 x i32>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVN_I:%.*]] = trunc <4 x i32> {{.*}} to <4 x i16>
+}
+
+int32x2_t test_vmovn_s64(int64x2_t a) {
+  return vmovn_s64(a);
+  // CIR-LABEL: vmovn_s64
+  // CIR: [[ARG:%.*]] = cir.cast bitcast {{%.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+  // CIR: {{%.*}} = cir.cast integral [[ARG]] : !cir.vector<!s64i x 2> -> !cir.vector<!s32i x 2>
+
+  // LLVM: {{.*}}@test_vmovn_s64(<2 x i64>{{.*}}[[A:%.*]])
+  // LLVM: [[VMOVN_I:%.*]] = trunc <2 x i64> {{.*}} to <2 x i32>
+}
+
+uint8x8_t test_vld1_dup_u8(uint8_t const * ptr) {
+  return vld1_dup_u8(ptr);
+}
+
+// CIR-LABEL: vld1_dup_u8
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!u8i>, !u8i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u8i, !cir.vector<!u8i x 8>
+
+// LLVM: {{.*}}test_vld1_dup_u8(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
+// LLVM: [[VEC:%.*]] = insertelement <8 x i8> poison, i8 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <8 x i8> [[VEC]], <8 x i8> poison, <8 x i32> zeroinitializer
+
+int8x8_t test_vld1_dup_s8(int8_t const * ptr) {
+  return vld1_dup_s8(ptr);
+}
+
+// CIR-LABEL: test_vld1_dup_s8
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!s8i>, !s8i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s8i, !cir.vector<!s8i x 8>
+
+// LLVM: {{.*}}test_vld1_dup_s8(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
+// LLVM: [[VEC:%.*]] = insertelement <8 x i8> poison, i8 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <8 x i8> [[VEC]], <8 x i8> poison, <8 x i32> zeroinitializer
+
+uint16x4_t test_vld1_dup_u16(uint16_t const * ptr) {
+  return vld1_dup_u16(ptr);
+}
+
+// CIR-LABEL: test_vld1_dup_u16
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!u16i>, !u16i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u16i, !cir.vector<!u16i x 4>
+
+// LLVM: {{.*}}test_vld1_dup_u16(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
+// LLVM: [[VEC:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <4 x i32> zeroinitializer
+
+int16x4_t test_vld1_dup_s16(int16_t const * ptr) {
+  return vld1_dup_s16(ptr);
+}
+
+// CIR-LABEL: test_vld1_dup_s16
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!s16i>, !s16i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s16i, !cir.vector<!s16i x 4>
+
+// LLVM: {{.*}}test_vld1_dup_s16(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
+// LLVM: [[VEC:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <4 x i32> zeroinitializer
+
+int32x2_t test_vld1_dup_s32(int32_t const * ptr) {
+  return vld1_dup_s32(ptr);
+}
+
+// CIR-LABEL: test_vld1_dup_s32
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s32i, !cir.vector<!s32i x 2>
+
+// LLVM: {{.*}}test_vld1_dup_s32(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+// LLVM: [[VEC:%.*]] = insertelement <2 x i32> poison, i32 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <2 x i32> [[VEC]], <2 x i32> poison, <2 x i32> zeroinitializer
+
+int64x1_t test_vld1_dup_s64(int64_t const * ptr) {
+  return vld1_dup_s64(ptr);
+}
+
+// CIR-LABEL: test_vld1_dup_s64
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!s64i>, !s64i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s64i, !cir.vector<!s64i x 1>
+
+// LLVM: {{.*}}test_vld1_dup_s64(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i64, ptr [[PTR]], align 8
+// LLVM: [[VEC:%.*]] = insertelement <1 x i64> poison, i64 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <1 x i64> [[VEC]], <1 x i64> poison, <1 x i32> zeroinitializer
+
+float32x2_t test_vld1_dup_f32(float32_t const * ptr) {
+  return vld1_dup_f32(ptr);
+}
+
+// CIR-LABEL: test_vld1_dup_f32
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!cir.float>, !cir.float
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.float, !cir.vector<!cir.float x 2>
+
+// LLVM: {{.*}}test_vld1_dup_f32(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load float, ptr [[PTR]], align 4
+// LLVM: [[VEC:%.*]] = insertelement <2 x float> poison, float [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <2 x float> [[VEC]], <2 x float> poison, <2 x i32> zeroinitializer
+
+float64x1_t test_vld1_dup_f64(float64_t const * ptr) {
+  return vld1_dup_f64(ptr);
+}
+
+// CIR-LABEL: test_vld1_dup_f64
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!cir.double>, !cir.double
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.double, !cir.vector<!cir.double x 1>
+
+// LLVM: {{.*}}test_vld1_dup_f64(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load double, ptr [[PTR]], align 8
+// LLVM: [[VEC:%.*]] = insertelement <1 x double> poison, double [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <1 x double> [[VEC]], <1 x double> poison, <1 x i32> zeroinitializer
+
+uint8x16_t test_vld1q_dup_u8(uint8_t const * ptr) {
+  return vld1q_dup_u8(ptr);
+}
+
+// CIR-LABEL: test_vld1q_dup_u8
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!u8i>, !u8i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u8i, !cir.vector<!u8i x 16>
+
+// LLVM: {{.*}}test_vld1q_dup_u8(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
+// LLVM: [[VEC:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <16 x i8> [[VEC]], <16 x i8> poison, <16 x i32> zeroinitializer
+
+int8x16_t test_vld1q_dup_s8(int8_t const * ptr) {
+  return vld1q_dup_s8(ptr);
+}
+
+// CIR-LABEL: test_vld1q_dup_s8
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!s8i>, !s8i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s8i, !cir.vector<!s8i x 16>
+
+// LLVM: {{.*}}test_vld1q_dup_s8(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i8, ptr [[PTR]], align 1
+// LLVM: [[VEC:%.*]] = insertelement <16 x i8> poison, i8 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <16 x i8> [[VEC]], <16 x i8> poison, <16 x i32> zeroinitializer
+
+uint16x8_t test_vld1q_dup_u16(uint16_t const * ptr) {
+  return vld1q_dup_u16(ptr);
+}
+
+// CIR-LABEL: test_vld1q_dup_u16
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!u16i>, !u16i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !u16i, !cir.vector<!u16i x 8>
+
+// LLVM: {{.*}}test_vld1q_dup_u16(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
+// LLVM: [[VEC:%.*]] = insertelement <8 x i16> poison, i16 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <8 x i16> [[VEC]], <8 x i16> poison, <8 x i32> zeroinitializer
+
+int16x8_t test_vld1q_dup_s16(int16_t const * ptr) {
+  return vld1q_dup_s16(ptr);
+}
+
+// CIR-LABEL: test_vld1q_dup_s16
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!s16i>, !s16i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s16i, !cir.vector<!s16i x 8>
+
+// LLVM: {{.*}}test_vld1q_dup_s16(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i16, ptr [[PTR]], align 2
+// LLVM: [[VEC:%.*]] = insertelement <8 x i16> poison, i16 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <8 x i16> [[VEC]], <8 x i16> poison, <8 x i32> zeroinitializer
+
+int32x4_t test_vld1q_dup_s32(int32_t const * ptr) {
+  return vld1q_dup_s32(ptr);
+}
+
+// CIR-LABEL: test_vld1q_dup_s32
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s32i, !cir.vector<!s32i x 4>
+
+// LLVM: {{.*}}test_vld1q_dup_s32(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4
+// LLVM: [[VEC:%.*]] = insertelement <4 x i32> poison, i32 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <4 x i32> zeroinitializer
+
+int64x2_t test_vld1q_dup_s64(int64_t const * ptr) {
+  return vld1q_dup_s64(ptr);
+}
+
+// CIR-LABEL: test_vld1q_dup_s64
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!s64i>, !s64i
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !s64i, !cir.vector<!s64i x 2>
+
+// LLVM: {{.*}}test_vld1q_dup_s64(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load i64, ptr [[PTR]], align 8
+// LLVM: [[VEC:%.*]] = insertelement <2 x i64> poison, i64 [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <2 x i64> [[VEC]], <2 x i64> poison, <2 x i32> zeroinitializer
+
+float32x4_t test_vld1q_dup_f32(float32_t const * ptr) {
+  return vld1q_dup_f32(ptr);
+}
+
+// CIR-LABEL: test_vld1q_dup_f32
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!cir.float>, !cir.float
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.float, !cir.vector<!cir.float x 4>
+
+// LLVM: {{.*}}test_vld1q_dup_f32(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load float, ptr [[PTR]], align 4
+// LLVM: [[VEC:%.*]] = insertelement <4 x float> poison, float [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <4 x float> [[VEC]], <4 x float> poison, <4 x i32> zeroinitializer
+
+float64x2_t test_vld1q_dup_f64(float64_t const * ptr) {
+  return vld1q_dup_f64(ptr);
+}
+
+// CIR-LABEL: test_vld1q_dup_f64
+// CIR: [[VAL:%.*]] = cir.load{{.*}} {{%.*}} : !cir.ptr<!cir.double>, !cir.double
+// CIR: {{%.*}} = cir.vec.splat [[VAL]] : !cir.double, !cir.vector<!cir.double x 2>
+
+// LLVM: {{.*}}test_vld1q_dup_f64(ptr{{.*}}[[PTR:%.*]])
+// LLVM: [[VAL:%.*]] = load double, ptr [[PTR]], align 8
+// LLVM: [[VEC:%.*]] = insertelement <2 x double> poison, double [[VAL]], i64 0
+// LLVM: {{%.*}} = shufflevector <2 x double> [[VEC]], <2 x double> poison, <2 x i32> zeroinitializer
+
+uint16_t test_vaddlvq_u8(uint8x16_t a) {
+  return vaddlvq_u8(a);
+
+  // CIR-LABEL: vaddlvq_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.uaddlv" {{%.*}} : (!cir.vector<!u8i x 16>) -> !u32i
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !u32i -> !u16i
+
+  // LLVM-LABEL: @test_vaddlvq_u8
+  // LLVM: {{%.*}} = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> {{%.*}})
+  // LLVM: {{%.*}} = trunc i32 {{%.*}} to i16
+  // LLVM: ret i16
+
+  // OGCG-LABEL: @test_vaddlvq_u8
+  // OGCG: {{%.*}} = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> {{%.*}})
+  // OGCG: {{%.*}} = trunc i32 {{%.*}} to i16
+  // OGCG: ret i16
+}
+
+int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
+  return vaddhn_s16(a, b);
+
+  // CIR-LABEL: vaddhn_s16
+  // CIR: {{%.*}} = cir.binop(add, {{%.*}}, {{%.*}}) : !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.cast bitcast {{%.*}} : !cir.vector<!s16i x 8> -> !cir.vector<!u16i x 8>
+  // CIR: {{%.*}} = cir.shift(right, {{%.*}} : !cir.vector<!u16i x 8>, {{%.*}}) -> !cir.vector<!u16i x 8>
+  // CIR: {{%.*}} = cir.cast bitcast {{%.*}} : !cir.vector<!u16i x 8> -> !cir.vector<!s16i x 8>
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !cir.vector<!s16i x 8> -> !cir.vector<!s8i x 8>
+
+  // LLVM-LABEL: @test_vaddhn_s16
+  // LLVM: {{%.*}} = add <8 x i16> {{%.*}}, {{%.*}}
+  // LLVM: {{%.*}} = lshr <8 x i16> {{%.*}}, splat (i16 8)
+  // LLVM: {{%.*}} = trunc <8 x i16> {{%.*}} to <8 x i8>
+  // LLVM: ret <8 x i8>
+
+  // OGCG-LABEL: @test_vaddhn_s16
+  // OGCG: {{%.*}} = add <8 x i16> {{%.*}}, {{%.*}}
+  // OGCG: {{%.*}} = lshr <8 x i16> {{%.*}}, splat (i16 8)
+  // OGCG: {{%.*}} = trunc <8 x i16> {{%.*}} to <8 x i8>
+  // OGCG: ret <8 x i8>
+}
+
+// CHECK-LABEL: test_vld2q_u8
+uint8x16x2_t test_vld2q_u8(uint8_t const *a) {
+  return vld2q_u8(a);
+
+  // CIR-LABEL: vld2q_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.ld2" {{%.*}} : (!cir.ptr<!void>) -> !rec_anon_struct
+  // CIR: cir.store align(16) {{%.*}}, {{%.*}} : !rec_anon_struct, !cir.ptr<!rec_anon_struct>
+
+  // LLVM-LABEL: @test_vld2q_u8
+  // LLVM: {{%.*}} = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr {{%.*}})
+  // LLVM: store { <16 x i8>, <16 x i8> } {{%.*}}, ptr {{%.*}}, align 16
+  // LLVM: call void @llvm.memcpy.{{.*}}(ptr {{%.*}}, ptr {{%.*}}, i32 32, i1 false)
+  // LLVM: {{%.*}} = load %struct.uint8x16x2_t, ptr {{%.*}}, align 1
+  // LLVM: ret %struct.uint8x16x2_t
+
+  // OGCG-LABEL: @test_vld2q_u8
+  // OGCG: {{%.*}} = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr {{%.*}})
+  // OGCG: store { <16 x i8>, <16 x i8> } {{%.*}}, ptr {{%.*}}, align 16
+  // OGCG: call void @llvm.memcpy.{{.*}}(ptr{{.*}}, ptr{{.*}}, i64 32, i1 false)
+  // OGCG: {{%.*}} = load %struct.uint8x16x2_t, ptr {{%.*}}, align 16
+  // OGCG: ret %struct.uint8x16x2_t
+}
+
+// CHECK-LABEL: test_vld3q_u8
+uint8x16x3_t test_vld3q_u8(uint8_t const *a) {
+  return vld3q_u8(a);
+
+  // CIR-LABEL: vld3q_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.ld3" {{%.*}} : (!cir.ptr<!void>) -> !rec_anon_struct{{.*}}
+  // CIR: cir.store align(16) {{%.*}}, {{%.*}} : !rec_anon_struct{{.*}}, !cir.ptr<!rec_anon_struct{{.*}}>
+
+  // LLVM-LABEL: @test_vld3q_u8
+  // LLVM: {{%.*}} = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr {{%.*}})
+  // LLVM: store { <16 x i8>, <16 x i8>, <16 x i8> } {{%.*}}, ptr {{%.*}}, align 16
+  // LLVM: call void @llvm.memcpy.{{.*}}(ptr {{%.*}}, ptr {{%.*}}, i32 48, i1 false)
+  // LLVM: {{%.*}} = load %struct.uint8x16x3_t, ptr {{%.*}}, align 1
+  // LLVM: ret %struct.uint8x16x3_t
+
+  // OGCG-LABEL: @test_vld3q_u8
+  // OGCG: {{%.*}} = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr {{%.*}})
+  // OGCG: store { <16 x i8>, <16 x i8>, <16 x i8> } {{%.*}}, ptr {{%.*}}, align 16
+  // OGCG: call void @llvm.memcpy.{{.*}}(ptr{{.*}}, ptr{{.*}}, i64 48, i1 false)
+  // OGCG: {{%.*}} = load %struct.uint8x16x3_t, ptr {{%.*}}, align 16
+  // OGCG: ret %struct.uint8x16x3_t
+}
+
+// CHECK-LABEL: test_vld4q_u8
+uint8x16x4_t test_vld4q_u8(uint8_t const *a) {
+  return vld4q_u8(a);
+
+  // CIR-LABEL: vld4q_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.ld4" {{%.*}} : (!cir.ptr<!void>) -> !rec_anon_struct{{.*}}
+  // CIR: cir.store align(16) {{%.*}}, {{%.*}} : !rec_anon_struct{{.*}}, !cir.ptr<!rec_anon_struct{{.*}}>
+
+  // LLVM-LABEL: @test_vld4q_u8
+  // LLVM: {{%.*}} = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr {{%.*}})
+  // LLVM: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } {{%.*}}, ptr {{%.*}}, align 16
+  // LLVM: call void @llvm.memcpy.{{.*}}(ptr {{%.*}}, ptr {{%.*}}, i32 64, i1 false)
+  // LLVM: {{%.*}} = load %struct.uint8x16x4_t, ptr {{%.*}}, align 1
+  // LLVM: ret %struct.uint8x16x4_t
+
+  // OGCG-LABEL: @test_vld4q_u8
+  // OGCG: {{%.*}} = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr {{%.*}})
+  // OGCG: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } {{%.*}}, ptr {{%.*}}, align 16
+  // OGCG: call void @llvm.memcpy.{{.*}}(ptr{{.*}}, ptr{{.*}}, i64 64, i1 false)
+  // OGCG: {{%.*}} = load %struct.uint8x16x4_t, ptr {{%.*}}, align 16
+  // OGCG: ret %struct.uint8x16x4_t
+}
+
+// CHECK-LABEL: test_vld4q_lane_u8
+uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) {
+  return vld4q_lane_u8(a, b, 15);
+
+  // CIR-LABEL: vld4q_lane_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.ld4lane" {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}} : (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>, !s64i, !cir.ptr<!void>) -> !rec_anon_struct{{.*}}
+
+  // LLVM-LABEL: @test_vld4q_lane_u8
+  // LLVM: {{%.*}} = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, i64 15, ptr {{%.*}})
+
+  // OGCG-LABEL: @test_vld4q_lane_u8
+  // OGCG: {{%.*}} = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, i64 15, ptr {{%.*}})
+}
+
+// CHECK-LABEL: test_vst2q_u8
+void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) {
+  vst2q_u8(a, b);
+
+  // CIR-LABEL: vst2q_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st2" {{%.*}}, {{%.*}}, {{%.*}} : (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>, !cir.ptr<!void>)
+
+  // LLVM-LABEL: @test_vst2q_u8
+  // LLVM: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> {{%.*}}, <16 x i8> {{%.*}}, ptr {{%.*}})
+
+  // OGCG-LABEL: @test_vst2q_u8
+  // OGCG: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> {{%.*}}, <16 x i8> {{%.*}}, ptr {{%.*}})
+}
+
+// CHECK-LABEL: test_vst4q_lane_u8
+void test_vst4q_lane_u8(uint8_t *a, uint8x16x4_t b) {
+  vst4q_lane_u8(a, b, 15);
+
+  // CIR-LABEL: vst4q_lane_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.st4lane" {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}}, {{%.*}} : (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>, !s64i, !cir.ptr<!void>)
+
+  // LLVM-LABEL: @test_vst4q_lane_u8
+  // LLVM: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, i64 15, ptr {{%.*}})
+
+  // OGCG-LABEL: @test_vst4q_lane_u8
+  // OGCG: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, i64 15, ptr {{%.*}})
+}
+
+// CHECK-LABEL: test_vsriq_n_u8
+uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
+  return vsriq_n_u8(a, b, 3);
+
+  // CIR-LABEL: vsriq_n_u8
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.vsri" {{%.*}}, {{%.*}}, {{%.*}} : (!cir.vector<!u8i x 16>, !cir.vector<!u8i x 16>, !s32i) -> !cir.vector<!u8i x 16>
+
+  // LLVM-LABEL: @test_vsriq_n_u8
+  // LLVM: {{%.*}} = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> {{%.*}}, <16 x i8> {{%.*}}, i32 3)
+
+  // OGCG-LABEL: @test_vsriq_n_u8
+  // OGCG: {{%.*}} = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> {{%.*}}, <16 x i8> {{%.*}}, i32 3)
+}
+
+// CHECK-LABEL: test_vqshrun_n_s16
+uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
+  return vqshrun_n_s16(a, 3);
+
+  // CIR-LABEL: vqshrun_n_s16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.sqshrun" {{%.*}}, {{%.*}} : (!cir.vector<!s16i x 8>, !s32i) -> !cir.vector<!u8i x 8>
+
+  // LLVM-LABEL: @test_vqshrun_n_s16
+  // LLVM: {{%.*}} = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> {{%.*}}, i32 3)
+
+  // OGCG-LABEL: @test_vqshrun_n_s16
+  // OGCG: {{%.*}} = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> {{%.*}}, i32 3)
+}
+
+// CHECK-LABEL: test_vfmaq_f32
+float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) {
+  return vfmaq_f32(v1, v2, v3);
+
+  // CIR-LABEL: vfmaq_f32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "fma" {{%.*}}, {{%.*}}, {{%.*}} : (!cir.vector<!cir.float x 4>, !cir.vector<!cir.float x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vfmaq_f32
+  // LLVM: {{%.*}} = call <4 x float> @llvm.fma.v4f32(<4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x float> {{%.*}})
+
+  // OGCG-LABEL: @test_vfmaq_f32
+  // OGCG: {{%.*}} = call <4 x float> @llvm.fma.v4f32(<4 x float> {{%.*}}, <4 x float> {{%.*}}, <4 x float> {{%.*}})
+}
+
+// CHECK-LABEL: test_vtbl1_s8
+int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
+  return vtbl1_s8(a, b);
+
+  // CIR-LABEL: vtbl1_s8
+  // CIR: {{%.*}} = cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<!s8i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s8i x 16>
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.tbl1" {{%.*}}, {{%.*}} : (!cir.vector<!s8i x 16>, !cir.vector<!s8i x 8>) -> !cir.vector<!s8i x 8>
+
+  // LLVM-LABEL: @test_vtbl1_s8
+  // LLVM: {{%.*}} = shufflevector <8 x i8> {{%.*}}, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // LLVM: {{%.*}} = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> {{%.*}}, <8 x i8> {{%.*}})
+
+  // OGCG-LABEL: @test_vtbl1_s8
+  // OGCG: {{%.*}} = shufflevector <8 x i8> {{%.*}}, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // OGCG: {{%.*}} = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> {{%.*}}, <8 x i8> {{%.*}})
+}
+
+// CHECK-LABEL: test_vcvtq_f64_s64
+float64x2_t test_vcvtq_f64_s64(int64x2_t a) {
+  return vcvtq_f64_s64(a);
+
+  // CIR-LABEL: vcvtq_f64_s64
+  // CIR: {{%.*}} = cir.cast int_to_float {{%.*}} : !cir.vector<!s64i x 2> -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_vcvtq_f64_s64
+  // LLVM: {{%.*}} = sitofp <2 x i64> {{%.*}} to <2 x double>
+
+  // OGCG-LABEL: @test_vcvtq_f64_s64
+  // OGCG: {{%.*}} = sitofp <2 x i64> {{%.*}} to <2 x double>
+}
+
+// CHECK-LABEL: test_vcvtq_u32_f32
+uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
+  return vcvtq_u32_f32(a);
+
+  // CIR-LABEL: vcvtq_u32_f32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fcvtzu" {{%.*}} : (!cir.vector<!cir.float x 4>) -> !cir.vector<!u32i x 4>
+
+  // LLVM-LABEL: @test_vcvtq_u32_f32
+  // LLVM: {{%.*}} = call <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float> {{%.*}})
+
+  // OGCG-LABEL: @test_vcvtq_u32_f32
+  // OGCG: {{%.*}} = call <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float> {{%.*}})
+}
+
+// CHECK-LABEL: test_vcvtq_s32_f32
+int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
+  return vcvtq_s32_f32(a);
+
+  // CIR-LABEL: vcvtq_s32_f32
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fcvtzs" {{%.*}} : (!cir.vector<!cir.float x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_vcvtq_s32_f32
+  // LLVM: {{%.*}} = call <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float> {{%.*}})
+
+  // OGCG-LABEL: @test_vcvtq_s32_f32
+  // OGCG: {{%.*}} = call <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float> {{%.*}})
+}
+
+// CHECK-LABEL: test_vcvtq_u64_f64
+uint64x2_t test_vcvtq_u64_f64(float64x2_t a) {
+  return vcvtq_u64_f64(a);
+
+  // CIR-LABEL: vcvtq_u64_f64
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fcvtzu" {{%.*}} : (!cir.vector<!cir.double x 2>) -> !cir.vector<!u64i x 2>
+
+  // LLVM-LABEL: @test_vcvtq_u64_f64
+  // LLVM: {{%.*}} = call <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double> {{%.*}})
+
+  // OGCG-LABEL: @test_vcvtq_u64_f64
+  // OGCG: {{%.*}} = call <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double> {{%.*}})
+}
+
+// CHECK-LABEL: test_vcvtq_s64_f64
+int64x2_t test_vcvtq_s64_f64(float64x2_t a) {
+  return vcvtq_s64_f64(a);
+
+  // CIR-LABEL: vcvtq_s64_f64
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fcvtzs" {{%.*}} : (!cir.vector<!cir.double x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_vcvtq_s64_f64
+  // LLVM: {{%.*}} = call <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double> {{%.*}})
+
+  // OGCG-LABEL: @test_vcvtq_s64_f64
+  // OGCG: {{%.*}} = call <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double> {{%.*}})
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c b/clang/test/CIR/Incubator/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c
new file mode 100644
index 0000000000000..d17dc9fc3a8a4
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/v8.2a-neon-intrinsics-generic.c
@@ -0,0 +1,529 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -target-feature +v8.2a \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -target-feature +v8.2a \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -fno-clangir-call-conv-lowering -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -target-feature +v8.2a \
+// RUN:  -flax-vector-conversions=none -emit-llvm -o - %s \
+// RUN: | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// This test mimics clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics.c, which eventually
+// CIR shall be able to support fully. Since this is going to take some time to converge,
+// the unsupported/NYI code is commented out, so that we can incrementally improve this.
+// The NYI filecheck used contains the LLVM output from OG codegen that should guide the
+// correct result when implementing this into the CIR pipeline.
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: define {{[^@]+}}@test_vbsl_f16
+// CHECK-SAME: (<4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8>
+// CHECK-NEXT:    [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// CHECK-NEXT:    [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
+// CHECK-NEXT:    [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]]
+// CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT:    [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT:    [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half>
+// CHECK-NEXT:    ret <4 x half> [[TMP4]]
+//
+// float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
+//   return vbsl_f16(a, b, c);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vbslq_f16
+// CHECK-SAME: (<8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8>
+// CHECK-NEXT:    [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// CHECK-NEXT:    [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT:    [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]]
+// CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i16> [[A]], splat (i16 -1)
+// CHECK-NEXT:    [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
+// CHECK-NEXT:    [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP4]]
+//
+// float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) {
+//   return vbslq_f16(a, b, c);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vzip_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
+// CHECK-NEXT:    [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT:    store <4 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
+// CHECK-NEXT:    [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT:    store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
+// CHECK-NEXT:    store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
+//
+// float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) {
+//   return vzip_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vzipq_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT:    store <8 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
+// CHECK-NEXT:    [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT:    store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
+// CHECK-NEXT:    store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
+// CHECK-NEXT:    ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
+//
+// float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) {
+//   return vzipq_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vuzp_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
+// CHECK-NEXT:    [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT:    store <4 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
+// CHECK-NEXT:    [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT:    store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
+// CHECK-NEXT:    store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
+//
+// float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) {
+//   return vuzp_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vuzpq_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT:    store <8 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
+// CHECK-NEXT:    [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT:    store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
+// CHECK-NEXT:    store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
+// CHECK-NEXT:    ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
+//
+// float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) {
+//   return vuzpq_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vtrn_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
+// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT:    store <4 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1
+// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT:    store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0
+// CHECK-NEXT:    store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]]
+//
+// float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) {
+//   return vtrn_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vtrnq_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT:    store <8 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1
+// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT:    store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0
+// CHECK-NEXT:    store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16
+// CHECK-NEXT:    ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]]
+//
+// float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) {
+//   return vtrnq_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vmov_n_f16
+// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
+// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
+// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
+// CHECK-NEXT:    ret <4 x half> [[VECINIT3]]
+//
+// float16x4_t test_vmov_n_f16(float16_t a) {
+//   return vmov_n_f16(a);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vmovq_n_f16
+// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
+// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
+// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
+// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
+// CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
+// CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
+// CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
+// CHECK-NEXT:    ret <8 x half> [[VECINIT7]]
+//
+// float16x8_t test_vmovq_n_f16(float16_t a) {
+//   return vmovq_n_f16(a);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16
+// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0
+// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2
+// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3
+// CHECK-NEXT:    ret <4 x half> [[VECINIT3]]
+//
+// float16x4_t test_vdup_n_f16(float16_t a) {
+//   return vdup_n_f16(a);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16
+// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0
+// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2
+// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3
+// CHECK-NEXT:    [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4
+// CHECK-NEXT:    [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5
+// CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6
+// CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7
+// CHECK-NEXT:    ret <8 x half> [[VECINIT7]]
+//
+// float16x8_t test_vdupq_n_f16(float16_t a) {
+//   return vdupq_n_f16(a);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    ret <4 x half> [[LANE]]
+//
+// float16x4_t test_vdup_lane_f16(float16x4_t a) {
+//   return vdup_lane_f16(a, 3);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+// CHECK-NEXT:    ret <8 x half> [[LANE]]
+//
+// float16x8_t test_vdupq_lane_f16(float16x4_t a) {
+//   return vdupq_lane_f16(a, 3);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK-NEXT:    ret <4 x half> [[LANE]]
+//
+// float16x4_t test_vdup_laneq_f16(float16x8_t a) {
+//   return vdup_laneq_f16(a, 1);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT:    [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT:    ret <8 x half> [[LANE]]
+//
+// float16x8_t test_vdupq_laneq_f16(float16x8_t a) {
+//   return vdupq_laneq_f16(a, 7);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vext_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
+// CHECK-NEXT:    [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+// CHECK-NEXT:    ret <4 x half> [[VEXT]]
+//
+// float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) {
+//   return vext_f16(a, b, 2);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vextq_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
+// CHECK-NEXT:    [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
+// CHECK-NEXT:    ret <8 x half> [[VEXT]]
+//
+// float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) {
+//   return vextq_f16(a, b, 5);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vrev64_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
+//
+// float16x4_t test_vrev64_f16(float16x4_t a) {
+//   return vrev64_f16(a);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vrev64q_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
+//
+// float16x8_t test_vrev64q_f16(float16x8_t a) {
+//   return vrev64q_f16(a);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vzip1_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+// CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
+//
+// float16x4_t test_vzip1_f16(float16x4_t a, float16x4_t b) {
+//   return vzip1_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vzip1q_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+// CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
+//
+// float16x8_t test_vzip1q_f16(float16x8_t a, float16x8_t b) {
+//   return vzip1q_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vzip2_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+// CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
+//
+// float16x4_t test_vzip2_f16(float16x4_t a, float16x4_t b) {
+//   return vzip2_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vzip2q_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+// CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
+//
+// float16x8_t test_vzip2q_f16(float16x8_t a, float16x8_t b) {
+//   return vzip2q_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vuzp1_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
+//
+// float16x4_t test_vuzp1_f16(float16x4_t a, float16x4_t b) {
+//   return vuzp1_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vuzp1q_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+// CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
+//
+// float16x8_t test_vuzp1q_f16(float16x8_t a, float16x8_t b) {
+//   return vuzp1q_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vuzp2_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+// CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
+//
+// float16x4_t test_vuzp2_f16(float16x4_t a, float16x4_t b) {
+//   return vuzp2_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vuzp2q_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+// CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
+//
+// float16x8_t test_vuzp2q_f16(float16x8_t a, float16x8_t b) {
+//   return vuzp2q_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vtrn1_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
+//
+// float16x4_t test_vtrn1_f16(float16x4_t a, float16x4_t b) {
+//   return vtrn1_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vtrn1q_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
+//
+// float16x8_t test_vtrn1q_f16(float16x8_t a, float16x8_t b) {
+//   return vtrn1q_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vtrn2_f16
+// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// CHECK-NEXT:    ret <4 x half> [[SHUFFLE_I]]
+//
+// float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) {
+//   return vtrn2_f16(a, b);
+// }
+
+// CHECK-LABEL: define {{[^@]+}}@test_vtrn2q_f16
+// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// CHECK-NEXT:    ret <8 x half> [[SHUFFLE_I]]
+//
+// float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) {
+//   return vtrn2q_f16(a, b);
+// }
+
+float16_t test_vduph_laneq_f16(float16x8_t vec) {
+  return vduph_laneq_f16(vec, 7);
+
+  // CIR-LABEL: vduph_laneq_f16
+  // CIR: [[TMP0:%.*]] = cir.const #cir.int<7> : !s32i
+  // CIR: [[TMP1:%.*]] = cir.vec.extract {{.*}}[{{.*}} : !s32i] : !cir.vector<!cir.f16 x 8>
+
+  // LLVM-LABEL: test_vduph_laneq_f16
+  // LLVM-SAME: (<8 x half> [[VEC:%.*]])
+  // LLVM: [[VGET_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7
+  // LLVM: ret half [[VGET_LANE]]
+}
+
+float16_t test_vduph_lane_f16(float16x4_t vec) {
+  return vduph_lane_f16(vec, 3);
+
+  // CIR-LABEL: vduph_lane_f16
+  // CIR: [[TMP0:%.*]] = cir.const #cir.int<3> : !s32i
+  // CIR: [[TMP1:%.*]] = cir.vec.extract {{.*}}[{{.*}} : !s32i] : !cir.vector<!cir.f16 x 4>
+
+  // LLVM-LABEL: test_vduph_lane_f16
+  // LLVM-SAME: (<4 x half> [[VEC:%.*]])
+  // LLVM: [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3
+  // LLVM: ret half [[VGET_LANE]]
+}
+
+// LLVM-LABEL: test_vcvtq_u16_f16
+uint16x8_t test_vcvtq_u16_f16(float16x8_t a) {
+  return vcvtq_u16_f16(a);
+
+  // CIR-LABEL: vcvtq_u16_f16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fcvtzu" {{%.*}} : (!cir.vector<!cir.f16 x 8>) -> !cir.vector<!u16i x 8>
+
+  // LLVM: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.fcvtzu.v8i16.v8f16(<8 x half> {{%.*}})
+
+  // OGCG-LABEL: @test_vcvtq_u16_f16
+  // OGCG: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.fcvtzu.v8i16.v8f16(<8 x half> {{%.*}})
+}
+
+// LLVM-LABEL: test_vcvtq_s16_f16
+int16x8_t test_vcvtq_s16_f16(float16x8_t a) {
+  return vcvtq_s16_f16(a);
+
+  // CIR-LABEL: vcvtq_s16_f16
+  // CIR: {{%.*}} = cir.llvm.intrinsic "aarch64.neon.fcvtzs" {{%.*}} : (!cir.vector<!cir.f16 x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.fcvtzs.v8i16.v8f16(<8 x half> {{%.*}})
+
+  // OGCG-LABEL: @test_vcvtq_s16_f16
+  // OGCG: {{%.*}} = call <8 x i16> @llvm.aarch64.neon.fcvtzs.v8i16.v8f16(<8 x half> {{%.*}})
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.c b/clang/test/CIR/Incubator/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.c
new file mode 100644
index 0000000000000..37705be600d11
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.c
@@ -0,0 +1,206 @@
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -target-feature +v8.5a \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -target-feature +v8.5a \
+// RUN:    -fclangir -disable-O0-optnone \
+// RUN:  -flax-vector-conversions=none -emit-llvm -fno-clangir-call-conv-lowering -o - %s \
+// RUN: | opt -S -passes=mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+// This test mimics clang/test/CodeGen/AArch64/v8.2a-neon-frint3264-intrinsics.c, which eventually
+// CIR shall be able to support fully. Since this is going to take some time to converge,
+// the unsupported/NYI code is commented out, so that we can incrementally improve this.
+// The NYI filecheck used contains the LLVM output from OG codegen that should guide the
+// correct result when implementing this into the CIR pipeline.
+
+#include <arm_neon.h>
+
+float32x2_t test_vrnd32x_f32(float32x2_t a) {
+  return vrnd32x_f32(a);
+
+  // CIR-LABEL: vrnd32x_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint32x" {{.*}} : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+
+  // LLVM-LABEL: @test_vrnd32x_f32
+  // LLVM:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint32x.v2f32(<2 x float> {{.*}})
+}
+
+
+float32x4_t test_vrnd32xq_f32(float32x4_t a) {
+  return vrnd32xq_f32(a);
+
+  // CIR-LABEL: vrnd32xq_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint32x" {{.*}} : (!cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vrnd32xq_f32
+  // LLVM:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint32x.v4f32(<4 x float> {{.*}})
+}
+
+float32x2_t test_vrnd32z_f32(float32x2_t a) {
+  return vrnd32z_f32(a);
+
+  // CIR-LABEL: vrnd32z_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint32z" {{.*}} : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+
+  // LLVM-LABEL: @test_vrnd32z_f32
+  // LLVM:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint32z.v2f32(<2 x float> {{.*}})
+}
+
+float32x4_t test_vrnd32zq_f32(float32x4_t a) {
+  return vrnd32zq_f32(a);
+
+  // CIR-LABEL: vrnd32zq_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint32z" {{.*}} : (!cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vrnd32zq_f32
+  // LLVM:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint32z.v4f32(<4 x float> {{.*}})
+}
+
+// CHECK-LABEL: test_vrnd64x_f32
+// CHECK:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> %a)
+// CHECK:  ret <2 x float> [[RND]]
+float32x2_t test_vrnd64x_f32(float32x2_t a) {
+  return vrnd64x_f32(a);
+
+  // CIR-LABEL: vrnd64x_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint64x" {{.*}} : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+
+  // LLVM-LABEL: @test_vrnd64x_f32
+  // LLVM:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint64x.v2f32(<2 x float> {{.*}})
+}
+
+// CHECK-LABEL: test_vrnd64xq_f32
+// CHECK:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> %a)
+// CHECK:  ret <4 x float> [[RND]]
+float32x4_t test_vrnd64xq_f32(float32x4_t a) {
+  return vrnd64xq_f32(a);
+
+  // CIR-LABEL: vrnd64xq_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint64x" {{.*}} : (!cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vrnd64xq_f32
+  // LLVM:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint64x.v4f32(<4 x float> {{.*}})
+}
+
+// CHECK-LABEL: test_vrnd64z_f32
+// CHECK:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> %a)
+// CHECK:  ret <2 x float> [[RND]]
+float32x2_t test_vrnd64z_f32(float32x2_t a) {
+  return vrnd64z_f32(a);
+
+  // CIR-LABEL: vrnd64z_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint64z" {{.*}} : (!cir.vector<!cir.float x 2>) -> !cir.vector<!cir.float x 2>
+
+  // LLVM-LABEL: @test_vrnd64z_f32
+  // LLVM:  [[RND:%.*]] =  call <2 x float> @llvm.aarch64.neon.frint64z.v2f32(<2 x float> {{.*}})
+}
+
+// CHECK-LABEL: test_vrnd64zq_f32
+// CHECK:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
+// CHECK:  ret <4 x float> [[RND]]
+float32x4_t test_vrnd64zq_f32(float32x4_t a) {
+  return vrnd64zq_f32(a);
+
+  // CIR-LABEL: vrnd64zq_f32
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint64z" {{.*}} : (!cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_vrnd64zq_f32
+  // LLVM:  [[RND:%.*]] =  call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> {{.*}})
+}
+
+float64x1_t test_vrnd32x_f64(float64x1_t a) {
+  return vrnd32x_f64(a);
+
+  // CIR-LABEL: vrnd32x_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint32x" {{.*}} : (!cir.vector<!cir.double x 1>) -> !cir.vector<!cir.double x 1>
+
+  // LLVM-LABEL: @test_vrnd32x_f64
+  // LLVM:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> {{.*}})
+}
+
+
+float64x2_t test_vrnd32xq_f64(float64x2_t a) {
+  return vrnd32xq_f64(a);
+
+  // CIR-LABEL: vrnd32xq_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint32x" {{.*}} : (!cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_vrnd32xq_f64
+  // LLVM:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> {{.*}})
+}
+
+float64x1_t test_vrnd32z_f64(float64x1_t a) {
+  return vrnd32z_f64(a);
+
+  // CIR-LABEL: vrnd32z_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint32z" {{.*}} : (!cir.vector<!cir.double x 1>) -> !cir.vector<!cir.double x 1>
+
+  // LLVM-LABEL: @test_vrnd32z_f64
+  // LLVM:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> {{.*}})
+}
+
+float64x2_t test_vrnd32zq_f64(float64x2_t a) {
+  return vrnd32zq_f64(a);
+
+  // CIR-LABEL: vrnd32zq_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint32z" {{.*}} : (!cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_vrnd32zq_f64
+  // LLVM:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> {{.*}})
+}
+
+// CHECK-LABEL: test_vrnd64x_f64
+// CHECK:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
+// CHECK:  ret <1 x double> [[RND]]
+float64x1_t test_vrnd64x_f64(float64x1_t a) {
+  return vrnd64x_f64(a);
+
+  // CIR-LABEL: vrnd64x_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint64x" {{.*}} : (!cir.vector<!cir.double x 1>) -> !cir.vector<!cir.double x 1>
+
+  // LLVM-LABEL: @test_vrnd64x_f64
+  // LLVM:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> {{.*}})
+}
+
+// CHECK-LABEL: test_vrnd64xq_f64
+// CHECK:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
+// CHECK:  ret <2 x double> [[RND]]
+float64x2_t test_vrnd64xq_f64(float64x2_t a) {
+  return vrnd64xq_f64(a);
+
+  // CIR-LABEL: vrnd64xq_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint64x" {{.*}} : (!cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_vrnd64xq_f64
+  // LLVM:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> {{.*}})
+}
+
+// CHECK-LABEL: test_vrnd64z_f64
+// CHECK:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
+// CHECK:  ret <1 x double> [[RND]]
+float64x1_t test_vrnd64z_f64(float64x1_t a) {
+  return vrnd64z_f64(a);
+
+  // CIR-LABEL: vrnd64z_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint64z" {{.*}} : (!cir.vector<!cir.double x 1>) -> !cir.vector<!cir.double x 1>
+
+  // LLVM-LABEL: @test_vrnd64z_f64
+  // LLVM:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> {{.*}})
+}
+
+// CHECK-LABEL: test_vrnd64zq_f64
+// CHECK:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
+// CHECK:  ret <2 x double> [[RND]]
+float64x2_t test_vrnd64zq_f64(float64x2_t a) {
+  return vrnd64zq_f64(a);
+
+  // CIR-LABEL: vrnd64zq_f64
+  // CIR: [[TMP0:%.*]] = cir.llvm.intrinsic "aarch64.neon.frint64z" {{.*}} : (!cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_vrnd64zq_f64
+  // LLVM:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> {{.*}})
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/address-spaces.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/address-spaces.cu
new file mode 100644
index 0000000000000..66862fdfc356b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/address-spaces.cu
@@ -0,0 +1,19 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+__global__ void fn() {
+  int i = 0;
+  __shared__ int j;
+  j = i;
+}
+
+// CIR: cir.global "private" internal dso_local lang_address_space(offload_local) @_ZZ2fnvE1j : !s32i
+// CIR: cir.func {{.*}} @_Z2fnv
+// CIR: [[Local:%[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init]
+// CIR: [[Shared:%[0-9]+]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i, lang_address_space(offload_local)>
+// CIR: [[Tmp:%[0-9]+]] = cir.load {{.*}} [[Local]] : !cir.ptr<!s32i>, !s32i
+// CIR: cir.store{{.*}} [[Tmp]], [[Shared]] : !s32i, !cir.ptr<!s32i, lang_address_space(offload_local)>
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/addrspace-lowering.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/addrspace-lowering.cu
new file mode 100644
index 0000000000000..91f26fa295978
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/addrspace-lowering.cu
@@ -0,0 +1,19 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+
+__shared__ int a;
+
+// LLVM-DEVICE: @a = addrspace(3) {{.*}}
+
+__device__ int b;
+
+// LLVM-DEVICE: @b = addrspace(1) {{.*}}
+
+__constant__ int c;
+
+// LLVM-DEVICE: @c = addrspace(4) {{.*}}
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/builtin-functions.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/builtin-functions.cu
new file mode 100644
index 0000000000000..d6e5ed5a2d42c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/builtin-functions.cu
@@ -0,0 +1,79 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCHECK --input-file=%t.ll %s
+
+__device__ void sync() {
+
+  // CIR: cir.llvm.intrinsic "nvvm.barrier.cta.sync.aligned.all" {{.*}} : (!s32i)
+  // LLVM: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
+  // OGCHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
+  __nvvm_bar_sync(0);
+}
+
+__device__ void builtins() {
+  float f1, f2;
+  double d1, d2;
+
+  // CIR: cir.llvm.intrinsic "nvvm.fmax.f" {{.*}} : (!cir.float, !cir.float) -> !cir.float
+  // LLVM: call float @llvm.nvvm.fmax.f(float {{.*}}, float {{.*}})
+  float t1 = __nvvm_fmax_f(f1, f2);
+  // CIR: cir.llvm.intrinsic "nvvm.fmin.f" {{.*}} : (!cir.float, !cir.float) -> !cir.float
+  // LLVM: call float @llvm.nvvm.fmin.f(float {{.*}}, float {{.*}})
+  float t2 = __nvvm_fmin_f(f1, f2);
+  // CIR: cir.llvm.intrinsic "nvvm.sqrt.rn.f" {{.*}} : (!cir.float) -> !cir.float
+  // LLVM: call float @llvm.nvvm.sqrt.rn.f(float {{.*}})
+  float t3 = __nvvm_sqrt_rn_f(f1);
+  // CIR: cir.llvm.intrinsic "nvvm.rcp.rn.f" {{.*}} : (!cir.float) -> !cir.float
+  // LLVM: call float @llvm.nvvm.rcp.rn.f(float {{.*}})
+  float t4 = __nvvm_rcp_rn_f(f2);
+  // CIR: cir.llvm.intrinsic "nvvm.add.rn.f" {{.*}} : (!cir.float, !cir.float) -> !cir.float
+  // LLVM: call float @llvm.nvvm.add.rn.f(float {{.*}}, float {{.*}})
+  float t5 = __nvvm_add_rn_f(f1, f2);
+
+  // CIR: cir.llvm.intrinsic "nvvm.fmax.d" {{.*}} : (!cir.double, !cir.double) -> !cir.double
+  // LLVM: call double @llvm.nvvm.fmax.d(double {{.*}}, double {{.*}})
+  double td1 = __nvvm_fmax_d(d1, d2);
+  // CIR: cir.llvm.intrinsic "nvvm.fmin.d" {{.*}} : (!cir.double, !cir.double) -> !cir.double
+  // LLVM: call double @llvm.nvvm.fmin.d(double {{.*}}, double {{.*}})
+  double td2 = __nvvm_fmin_d(d1, d2);
+  // CIR: cir.llvm.intrinsic "nvvm.sqrt.rn.d" {{.*}} : (!cir.double) -> !cir.double
+  // LLVM: call double @llvm.nvvm.sqrt.rn.d(double {{.*}})
+  double td3 = __nvvm_sqrt_rn_d(d1);
+  // CIR: cir.llvm.intrinsic "nvvm.rcp.rn.d" {{.*}} : (!cir.double) -> !cir.double
+  // LLVM: call double @llvm.nvvm.rcp.rn.d(double {{.*}})
+  double td4 = __nvvm_rcp_rn_d(d2);
+
+  int i1, i2;
+
+  // CIR: cir.llvm.intrinsic "nvvm.mulhi.i" {{.*}} : (!s32i, !s32i) -> !s32i
+  // LLVM: call i32 @llvm.nvvm.mulhi.i(i32 {{.*}}, i32 {{.*}})
+  int ti1 = __nvvm_mulhi_i(i1, i2);
+
+  // CIR: cir.llvm.intrinsic "nvvm.membar.cta"
+  // LLVM: call void @llvm.nvvm.membar.cta()
+  __nvvm_membar_cta();
+  // CIR: cir.llvm.intrinsic "nvvm.membar.gl"
+  // LLVM: call void @llvm.nvvm.membar.gl()
+  __nvvm_membar_gl();
+  // CIR: cir.llvm.intrinsic "nvvm.membar.sys"
+  // LLVM: call void @llvm.nvvm.membar.sys()
+  __nvvm_membar_sys();
+  
+  // CIR: cir.llvm.intrinsic "nvvm.barrier.cta.sync.aligned.all"
+  // LLVM: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
+  // OGCHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
+  __syncthreads();
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/builtins-nvptx-ptx60.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/builtins-nvptx-ptx60.cu
new file mode 100644
index 0000000000000..0bfb65623c091
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/builtins-nvptx-ptx60.cu
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_70 \
+// RUN:            -fcuda-is-device -target-feature +ptx60 \
+// RUN:            -emit-cir -o %t.cir -x cuda %s
+// RUN: FileCheck -check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx65 \
+// RUN:            -emit-cir -o %t.cir -x cuda %s
+// RUN: FileCheck -check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx70 \
+// RUN:            -emit-cir -o %t.cir -x cuda %s
+// RUN: FileCheck -check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_70 \
+// RUN:            -fcuda-is-device -target-feature +ptx60 \
+// RUN:            -emit-llvm -o %t.ll -x cuda %s
+// RUN: FileCheck -check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx65 \
+// RUN:            -emit-llvm -o %t.ll -x cuda %s
+// RUN: FileCheck -check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx70 \
+// RUN:            -emit-llvm -o %t.ll -x cuda %s
+// RUN: FileCheck -check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_70 \
+// RUN:            -fcuda-is-device -target-feature +ptx60 \
+// RUN:            -emit-llvm -o %t_og.ll -x cuda %s
+// RUN: FileCheck -check-prefix=OGCHECK --input-file=%t_og.ll %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx65 \
+// RUN:            -emit-llvm -o %t_og.ll -x cuda %s
+// RUN: FileCheck -check-prefix=OGCHECK --input-file=%t_og.ll %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 \
+// RUN:            -fcuda-is-device -target-feature +ptx70 \
+// RUN:            -emit-llvm -o %t_og.ll -x cuda %s
+// RUN: FileCheck -check-prefix=OGCHECK --input-file=%t_og.ll %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+typedef unsigned long long uint64_t;
+
+__device__ void nvvm_sync(unsigned mask, int i, float f, int a, int b,
+                          bool pred, uint64_t i64) {
+
+  // CIR: cir.llvm.intrinsic "nvvm.bar.warp.sync" {{.*}} : (!u32i)
+  // LLVM: call void @llvm.nvvm.bar.warp.sync(i32
+  // OGCHECK: call void @llvm.nvvm.bar.warp.sync(i32
+  __nvvm_bar_warp_sync(mask);
+
+  // CIR: cir.llvm.intrinsic "nvvm.barrier.cta.sync.all" {{.*}} : (!u32i)
+  // LLVM: call void @llvm.nvvm.barrier.cta.sync.all(i32
+  // OGCHECK: call void @llvm.nvvm.barrier.cta.sync.all(i32
+  __nvvm_barrier_sync(mask);
+
+  // CIR: cir.llvm.intrinsic "nvvm.barrier.cta.sync.count" {{.*}} : (!u32i, !u32i)
+  // LLVM: call void @llvm.nvvm.barrier.cta.sync.count(i32
+  // OGCHECK: call void @llvm.nvvm.barrier.cta.sync.count(i32
+  __nvvm_barrier_sync_cnt(mask, i);
+
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/builtins-sm90.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/builtins-sm90.cu
new file mode 100644
index 0000000000000..5df9569144b43
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/builtins-sm90.cu
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-feature +ptx80 \
+// RUN:            -target-cpu sm_90 -fclangir -emit-cir -fcuda-is-device -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-feature +ptx80 \
+// RUN:            -target-cpu sm_90 -fclangir -emit-llvm -fcuda-is-device -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-feature +ptx80 \
+// RUN:            -target-cpu sm_90 -fclangir -emit-llvm -fcuda-is-device -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCHECK --input-file=%t.ll %s
+
+// CIR-LABEL: _Z6kernelPlPvj(
+// LLVM: define{{.*}} void @_Z6kernelPlPvj(
+// OGCHECK: define{{.*}} void @_Z6kernelPlPvj(
+__attribute__((global)) void kernel(long *out, void *ptr, unsigned u) {
+  // CIR: cir.llvm.intrinsic "nvvm.barrier.cluster.arrive"
+  // LLVM: call void @llvm.nvvm.barrier.cluster.arrive()
+  // OGCHECK: call void @llvm.nvvm.barrier.cluster.arrive()
+  __nvvm_barrier_cluster_arrive();
+
+  // CIR: cir.llvm.intrinsic "nvvm.barrier.cluster.arrive.relaxed"
+  // LLVM: call void @llvm.nvvm.barrier.cluster.arrive.relaxed()
+  // OGCHECK: call void @llvm.nvvm.barrier.cluster.arrive.relaxed()
+
+  __nvvm_barrier_cluster_arrive_relaxed();
+  // CIR: cir.llvm.intrinsic "nvvm.barrier.cluster.wait"
+  // LLVM: call void @llvm.nvvm.barrier.cluster.wait()
+  // OGCHECK: call void @llvm.nvvm.barrier.cluster.wait()
+  __nvvm_barrier_cluster_wait();
+
+  // CIR: cir.llvm.intrinsic "nvvm.fence.sc.cluster"
+  // LLVM: call void @llvm.nvvm.fence.sc.cluster()
+  // OGCHECK: call void @llvm.nvvm.fence.sc.cluster()
+  __nvvm_fence_sc_cluster();
+
+  // CIR: cir.return
+  // LLVM: ret void
+  // OGCHECK: ret void
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/cuda-builtin-vars.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/cuda-builtin-vars.cu
new file mode 100644
index 0000000000000..da4e3daee02bd
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/cuda-builtin-vars.cu
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -o - %s   \
+// RUN: | FileCheck --check-prefix=LLVM %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -o - %s   \
+// RUN: | FileCheck --check-prefix=CIR %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda  \
+// RUN:            -fcuda-is-device -emit-llvm -o - %s   \
+// RUN: | FileCheck --check-prefix=OGCG %s
+
+#include "__clang_cuda_builtin_vars.h"
+
+// LLVM: define{{.*}} void @_Z6kernelPi(ptr %0)
+// OGCG: define{{.*}} void @_Z6kernelPi(ptr noundef %out)
+__attribute__((global))
+void kernel(int *out) {
+  int i = 0;
+
+  out[i++] = threadIdx.x;
+  // CIR:  cir.func {{.*}} @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.tid.x"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+
+  out[i++] = threadIdx.y;
+  // CIR:  cir.func {{.*}} @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.tid.y"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+
+  out[i++] = threadIdx.z;
+  // CIR:  cir.func {{.*}} @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_zEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.tid.z"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+
+
+  out[i++] = blockIdx.x;
+  // CIR:  cir.func {{.*}} @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.ctaid.x"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+
+  out[i++] = blockIdx.y;
+  // CIR:  cir.func {{.*}} @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.ctaid.y"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+
+  out[i++] = blockIdx.z;
+  // CIR:  cir.func {{.*}} @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_zEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.ctaid.z"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+
+
+  out[i++] = blockDim.x;
+  // CIR:  cir.func {{.*}} @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.ntid.x"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+
+  out[i++] = blockDim.y;
+  // CIR:  cir.func {{.*}} @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_yEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.ntid.y"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+
+  out[i++] = blockDim.z;
+  // CIR:  cir.func {{.*}} @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_zEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.ntid.z"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+
+
+  out[i++] = gridDim.x;
+  // CIR:  cir.func {{.*}} @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.nctaid.x"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+
+  out[i++] = gridDim.y;
+  // CIR:  cir.func {{.*}} @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_yEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.nctaid.y"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+
+  out[i++] = gridDim.z;
+  // CIR:  cir.func {{.*}} @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_zEv()
+  // CIR:  cir.llvm.intrinsic "nvvm.read.ptx.sreg.nctaid.z"
+  // LLVM: call{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+  // OGCG: call noundef{{.*}} i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+
+
+  out[i++] = warpSize;
+  // CIR: [[REGISTER:%.*]] = cir.const #cir.int<32>
+  // CIR: cir.store{{.*}} [[REGISTER]]
+  // LLVM: store i32 32,
+  // OGCG: store i32 32,
+
+
+  // CIR: cir.return loc
+  // LLVM: ret void
+  // OGCG: ret void
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/destructor.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/destructor.cu
new file mode 100644
index 0000000000000..0447d0b84dd85
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/destructor.cu
@@ -0,0 +1,61 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+// Make sure we do emit device-side kernel even if it's only referenced
+// by the destructor of a variable not present on device.
+template<typename T> __global__ void f(T) {}
+template<typename T> struct A {
+  ~A() { f<<<1, 1>>>(T()); }
+};
+
+// CIR-HOST: module
+// CIR-DEVICE: module
+// CIR-DEVICE: cir.func {{.*}} @_Z1fIiEvT_
+// LLVM-DEVICE: define dso_local ptx_kernel void @_Z1fIiEvT_
+// OGCG-DEVICE: define ptx_kernel void @_Z1fIiEvT_
+
+// CIR-HOST: cir.func {{.*}} @_ZN1AIiED2Ev{{.*}} {
+// CIR-HOST:   cir.call @__cudaPushCallConfiguration
+// CIR-HOST:   cir.call @_Z16__device_stub__fIiEvT_
+// CIR-HOST: }
+
+// LLVM-HOST: define linkonce_odr void @_ZN1AIiED2Ev
+// LLVM-HOST: call i32 @__cudaPushCallConfiguration(
+// LLVM-HOST: call void @_Z16__device_stub__fIiEvT_
+
+// OGCG-HOST: define linkonce_odr void @_ZN1AIiED2Ev
+// OGCG-HOST: call i32 @__cudaPushCallConfiguration(
+// OGCG-HOST: call void @_Z16__device_stub__fIiEvT_
+
+
+
+A<int> a;
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/global-vars.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/global-vars.cu
new file mode 100644
index 0000000000000..4c84b96bb646e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/global-vars.cu
@@ -0,0 +1,101 @@
+#include "cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+__shared__ int shared;
+// CIR-DEVICE: cir.global external{{.*}}lang_address_space(offload_local) @shared = #cir.undef
+// LLVM-DEVICE: @shared = addrspace(3) global i32 undef, align 4
+// CIR-HOST: cir.global{{.*}}@shared = #cir.undef : !s32i {alignment = 4 : i64}
+// CIR-HOST-NOT: cu.shadow_name
+// LLVM-HOST: @shared = internal global i32 undef, align 4
+// OGCG-HOST: @shared = internal global i32
+// OGCG-DEVICE: @shared = addrspace(3) global i32 undef, align 4
+
+__constant__ int b;
+// CIR-DEVICE: cir.global constant external{{.*}}lang_address_space(offload_constant) @b = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized, cu.var_registration = #cir.cu.var_registration<Variable, constant>}
+// LLVM-DEVICE: @b = addrspace(4) externally_initialized constant i32 0, align 4
+// CIR-HOST: cir.global{{.*}}"private"{{.*}}internal{{.*}}@b = #cir.undef : !s32i {alignment = 4 : i64, cu.shadow_name = #cir.cu.shadow_name<b>, cu.var_registration = #cir.cu.var_registration<Variable, constant>}
+// LLVM-HOST: @b = internal global i32 undef, align 4
+// OGCG-HOST: @b = internal global i32
+// OGCG-DEVICE: @b = addrspace(4) externally_initialized constant i32 0, align 4
+
+// External device variables should remain external on host side (they're just declarations)
+// Note: External declarations may not appear in output if they're not referenced
+extern __device__ int ext_device_var;
+// CIR-HOST-NOT: cir.global{{.*}}@ext_device_var
+// LLVM-HOST-NOT: @ext_device_var
+// OGCG-HOST-NOT: @ext_device_var
+// OGCG-DEVICE-NOT: @ext_device_var
+
+extern __constant__ int ext_constant_var;
+// CIR-HOST-NOT: cir.global{{.*}}@ext_constant_var
+// LLVM-HOST-NOT: @ext_constant_var
+// OGCG-HOST-NOT: @ext_constant_var
+// OGCG-DEVICE-NOT: @ext_constant_var
+
+// External device variables with definitions should be internal on host
+extern __device__ int ext_device_var_def;
+__device__ int ext_device_var_def = 1;
+// CIR-DEVICE: cir.global external{{.*}}lang_address_space(offload_global) @ext_device_var_def = #cir.int<1>
+// LLVM-DEVICE: @ext_device_var_def = addrspace(1) externally_initialized global i32 1, align 4
+// CIR-HOST: cir.global{{.*}}"private"{{.*}}internal{{.*}}@ext_device_var_def = #cir.undef : !s32i {alignment = 4 : i64, cu.shadow_name = #cir.cu.shadow_name<ext_device_var_def>, cu.var_registration = #cir.cu.var_registration<Variable>}
+// LLVM-HOST: @ext_device_var_def = internal global i32 undef, align 4
+// OGCG-HOST: @ext_device_var_def = internal global i32
+// OGCG-DEVICE: @ext_device_var_def = addrspace(1) externally_initialized global i32 1, align 4
+
+extern __constant__ int ext_constant_var_def;
+__constant__ int ext_constant_var_def = 2;
+// CIR-DEVICE: cir.global constant external{{.*}}lang_address_space(offload_constant) @ext_constant_var_def = #cir.int<2>
+// LLVM-DEVICE: @ext_constant_var_def = addrspace(4) externally_initialized constant i32 2, align 4
+// OGCG-DEVICE: @ext_constant_var_def = addrspace(4) externally_initialized constant i32 2, align 4
+// CIR-HOST: cir.global{{.*}}"private"{{.*}}internal{{.*}}@ext_constant_var_def = #cir.undef : !s32i {alignment = 4 : i64, cu.shadow_name = #cir.cu.shadow_name<ext_constant_var_def>, cu.var_registration = #cir.cu.var_registration<Variable, constant>}
+// LLVM-HOST: @ext_constant_var_def = internal global i32 undef, align 4
+// OGCG-HOST: @ext_constant_var_def = internal global i32
+
+// Regular host variables should NOT be internalized
+int host_var;
+// CIR-HOST: cir.global external @host_var = #cir.int<0> : !s32i
+// LLVM-HOST: @host_var = global i32 0, align 4
+// OGCG-HOST: @host_var ={{.*}} global i32
+
+// CIR-DEVICE-NOT: cir.global{{.*}}@host_var
+// LLVM-DEVICE-NOT: @host_var
+// OGCG-DEVICE-NOT: @host_var
+
+// External host variables should remain external (may not appear if not referenced)
+extern int ext_host_var;
+// CIR-HOST-NOT: cir.global{{.*}}@ext_host_var
+// LLVM-HOST-NOT: @ext_host_var
+// OGCG-HOST-NOT: @ext_host_var
+
+// CIR-DEVICE-NOT: cir.global{{.*}}@ext_host_var
+// LLVM-DEVICE-NOT: @ext_host_var
+// OGCG-DEVICE-NOT: @ext_host_var
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/mangling.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/mangling.cu
new file mode 100644
index 0000000000000..02bb4000186d4
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/mangling.cu
@@ -0,0 +1,92 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+namespace ns {
+    __global__ void cpp_global_function_1(int a, int* b, float c) {}
+    // CIR-HOST: cir.func {{.*}} @_ZN2ns36__device_stub__cpp_global_function_1EiPif
+    // CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_global_function_1EiPif
+
+    __global__ void cpp_global_function_2(int a, int* b, float c) {}
+
+    // CIR-HOST: cir.func {{.*}} @_ZN2ns36__device_stub__cpp_global_function_2EiPif
+    // CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_global_function_2EiPif
+
+    __host__ void cpp_host_function_1(int a, int* b, float c) {}
+
+    // CIR-HOST: cir.func {{.*}} @_ZN2ns19cpp_host_function_1EiPif
+
+    __host__ void cpp_host_function_2(int a, int* b, float c) {}
+
+    // CIR-HOST: cir.func {{.*}} @_ZN2ns19cpp_host_function_2EiPif
+
+    __device__ void cpp_device_function_1(int a, int* b, float c) {}
+
+    // CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_device_function_1EiPif
+
+    __device__ void cpp_device_function_2(int a, int* b, float c) {}
+
+    // CIR-DEVICE: cir.func {{.*}} @_ZN2ns21cpp_device_function_2EiPif
+}
+
+__global__ void cpp_global_function_1(int a, int* b, float c) {}
+
+// CIR-HOST: cir.func {{.*}} @_Z36__device_stub__cpp_global_function_1iPif
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_global_function_1iPif
+
+__global__ void cpp_global_function_2(int a, int* b, float c) {}
+
+// CIR-HOST: cir.func {{.*}} @_Z36__device_stub__cpp_global_function_2iPif
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_global_function_2iPif
+
+__host__ void cpp_host_function_1(int a, int* b, float c) {}
+
+// CIR-HOST: cir.func {{.*}} @_Z19cpp_host_function_1iPif
+
+__host__ void cpp_host_function_2(int a, int* b, float c) {}
+
+// CIR-HOST: cir.func {{.*}} @_Z19cpp_host_function_2iPif
+
+__device__ void cpp_device_function_1(int a, int* b, float c) {}
+
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_device_function_1iPif
+
+__device__ void cpp_device_function_2(int a, int* b, float c) {}
+
+// CIR-DEVICE: cir.func {{.*}} @_Z21cpp_device_function_2iPif
+
+extern "C" {
+    __global__ void c_global_function_1(int a, int* b, float c) {}
+
+    // CIR-HOST: cir.func {{.*}} @__device_stub__c_global_function_1
+    // CIR-DEVICE: cir.func {{.*}} @c_global_function_1
+
+    __global__ void c_global_function_2(int a, int* b, float c) {}
+
+    // CIR-HOST: cir.func {{.*}} @__device_stub__c_global_function_2
+    // CIR-DEVICE: cir.func {{.*}} @c_global_function_2
+
+    __host__ void c_host_function_1(int a, int* b, float c) {}
+
+    // CIR-HOST: cir.func {{.*}} @c_host_function_1
+
+    __host__ void c_host_function_2(int a, int* b, float c) {}
+
+    // CIR-HOST: cir.func {{.*}} @c_host_function_2
+
+    __device__ void c_device_function_1(int a, int* b, float c) {}
+
+    // CIR-DEVICE: cir.func {{.*}} @c_device_function_1
+
+    __device__ void c_device_function_2(int a, int* b, float c) {}
+
+    // CIR-DEVICE: cir.func {{.*}} @c_device_function_2
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/printf.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/printf.cu
new file mode 100644
index 0000000000000..f923f16dd04a2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/printf.cu
@@ -0,0 +1,48 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+__device__ void printer() {
+  printf("%d", 0);
+}
+
+// CIR-DEVICE: cir.func {{.*}} @_Z7printerv() extra({{.*}}) {
+// CIR-DEVICE:   %[[#Packed:]] = cir.alloca !rec_anon_struct
+// CIR-DEVICE:   %[[#Zero:]] = cir.const #cir.int<0> : !s32i loc(#loc5)
+// CIR-DEVICE:   %[[#Field0:]] = cir.get_member %0[0]
+// CIR-DEVICE:   cir.store align(4) %[[#Zero]], %[[#Field0]]
+// CIR-DEVICE:   %[[#Output:]] = cir.cast bitcast %[[#Packed]] : !cir.ptr<!rec_anon_struct>
+// CIR-DEVICE:   cir.call @vprintf(%{{.+}}, %[[#Output]])
+// CIR-DEVICE:   cir.return
+// CIR-DEVICE: }
+
+// LLVM-DEVICE: define dso_local void @_Z7printerv() {{.*}} {
+// LLVM-DEVICE:   %[[#LLVMPacked:]] = alloca { i32 }, i64 1, align 8
+// LLVM-DEVICE:   %[[#LLVMField0:]] = getelementptr { i32 }, ptr %[[#LLVMPacked]], i32 0, i32 0
+// LLVM-DEVICE:   store i32 0, ptr %[[#LLVMField0]], align 4
+// LLVM-DEVICE:   call i32 @vprintf(ptr @.str, ptr %[[#LLVMPacked]])
+// LLVM-DEVICE:   ret void
+// LLVM-DEVICE: }
+
+__device__ void no_extra() {
+  printf("hello world");
+}
+
+// CIR-DEVICE: cir.func {{.*}} @_Z8no_extrav() extra(#fn_attr) {
+// CIR-DEVICE:   %[[#NULLPTR:]] = cir.const #cir.ptr<null>
+// CIR-DEVICE:   cir.call @vprintf(%{{.+}}, %[[#NULLPTR]])
+// CIR-DEVICE:   cir.return
+// CIR-DEVICE: }
+
+// LLVM-DEVICE: define dso_local void @_Z8no_extrav() {{.*}} {
+// LLVM-DEVICE:   call i32 @vprintf(ptr @.str.1, ptr null)
+// LLVM-DEVICE:   ret void
+// LLVM-DEVICE: }
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/registration.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/registration.cu
new file mode 100644
index 0000000000000..029b0e3806bb1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/registration.cu
@@ -0,0 +1,209 @@
+#include "../Inputs/cuda.h"
+
+// RUN: echo "sample fatbin" > %t.fatbin
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            -fcuda-include-gpubinary %t.fatbin \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -fcuda-include-gpubinary %t.fatbin \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu  \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -fcuda-include-gpubinary %t.fatbin \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+
+// CIR-HOST: module @"{{.*}}" attributes {
+// CIR-HOST:   cir.cu.binary_handle = #cir.cu.binary_handle<{{.*}}.fatbin>,
+// CIR-HOST:   cir.global_ctors = [#cir.global_ctor<"__cuda_module_ctor", {{[0-9]+}}>]
+// CIR-HOST: }
+
+// Module destructor goes here.
+// This is not a real destructor, as explained in LoweringPrepare.
+
+// CIR-HOST: cir.func internal private @__cuda_module_dtor() {
+// CIR-HOST:   %[[#HandleGlobal:]] = cir.get_global @__cuda_gpubin_handle
+// CIR-HOST:   %[[#Handle:]] = cir.load %0
+// CIR-HOST:   cir.call @__cudaUnregisterFatBinary(%[[#Handle]])
+// CIR-HOST: }
+
+// CIR-HOST: cir.global "private" constant cir_private @".str_Z2fnv" =
+// CIR-HOST-SAME: #cir.const_array<"_Z2fnv", trailing_zeros>
+
+// COM: In OG this variable has an `unnamed_addr` attribute.
+// LLVM-HOST: @.str_Z2fnv = private constant [7 x i8] c"_Z2fnv\00"
+
+// The corresponding CIR test for these three variables are down below.
+// They are here because LLVM IR puts global variables at the front of file.
+
+// LLVM-HOST: @__cuda_fatbin_str = private constant [14 x i8] c"sample fatbin\0A", section ".nv_fatbin"
+// LLVM-HOST: @__cuda_fatbin_wrapper = internal constant {
+// LLVM-HOST:   i32 1180844977, i32 1, ptr @__cuda_fatbin_str, ptr null
+// LLVM-HOST: }
+// LLVM-HOST: @llvm.global_ctors = {{.*}}ptr @__cuda_module_ctor
+
+// LLVM-HOST: define internal void @__cuda_module_dtor() {
+// LLVM-HOST:   %[[#LLVMHandleVar:]] = load ptr, ptr @__cuda_gpubin_handle, align 8
+// LLVM-HOST:   call void @__cudaUnregisterFatBinary(ptr %[[#LLVMHandleVar]])
+// LLVM-HOST:   ret void
+// LLVM-HOST: }
+
+__global__ void fn() {}
+
+__device__ int a;
+__constant__ int b;
+
+// CIR-HOST: cir.func internal private @__cuda_register_globals(%[[FatbinHandle:[a-zA-Z0-9]+]]{{.*}}) {
+// CIR-HOST:   %[[#NULL:]] = cir.const #cir.ptr<null>
+// CIR-HOST:   %[[#T1:]] = cir.get_global @".str_Z2fnv"
+// CIR-HOST:   %[[#DeviceFn:]] = cir.cast bitcast %[[#T1]]
+// CIR-HOST:   %[[#T2:]] = cir.get_global @_Z17__device_stub__fnv
+// CIR-HOST:   %[[#HostFn:]] = cir.cast bitcast %[[#T2]]
+// CIR-HOST:   %[[#MinusOne:]] = cir.const #cir.int<-1>
+// CIR-HOST:   cir.call @__cudaRegisterFunction(
+// CIR-HOST-SAME: %[[FatbinHandle]],
+// CIR-HOST-SAME: %[[#HostFn]],
+// CIR-HOST-SAME: %[[#DeviceFn]],
+// CIR-HOST-SAME: %[[#DeviceFn]],
+// CIR-HOST-SAME: %[[#MinusOne]],
+// CIR-HOST-SAME: %[[#NULL]], %[[#NULL]], %[[#NULL]], %[[#NULL]], %[[#NULL]])
+// Registration for __constant__ int b (isConstant=1):
+// CIR-HOST:   %[[#T3:]] = cir.get_global @".strb0"
+// CIR-HOST:   %[[#DeviceB:]] = cir.cast bitcast %[[#T3]]
+// CIR-HOST:   %[[#T4:]] = cir.get_global @b
+// CIR-HOST:   %[[#HostB:]] = cir.cast bitcast %[[#T4]]
+// CIR-HOST:   %[[#ExtB:]] = cir.const #cir.int<0>
+// CIR-HOST:   %[[#SzB:]] = cir.const #cir.int<4>
+// CIR-HOST:   %[[#ConstB:]] = cir.const #cir.int<1>
+// CIR-HOST:   %[[#ZeroB:]] = cir.const #cir.int<0>
+// CIR-HOST:   cir.call @__cudaRegisterVar(%arg0, %[[#HostB]], %[[#DeviceB]], %[[#DeviceB]],
+// CIR-HOST-SAME: %[[#ExtB]], %[[#SzB]], %[[#ConstB]], %[[#ZeroB]])
+//
+// Registration for __device__ int a (isConstant=0):
+// CIR-HOST:   %[[#T5:]] = cir.get_global @".stra1"
+// CIR-HOST:   %[[#DeviceA:]] = cir.cast bitcast %[[#T5]]
+// CIR-HOST:   %[[#T6:]] = cir.get_global @a
+// CIR-HOST:   %[[#HostA:]] = cir.cast bitcast %[[#T6]]
+// CIR-HOST:   %[[#ExtA:]] = cir.const #cir.int<0>
+// CIR-HOST:   %[[#SzA:]] = cir.const #cir.int<4>
+// CIR-HOST:   %[[#ConstA:]] = cir.const #cir.int<0>
+// CIR-HOST:   %[[#ZeroA:]] = cir.const #cir.int<0>
+// CIR-HOST:   cir.call @__cudaRegisterVar(%arg0, %[[#HostA]], %[[#DeviceA]], %[[#DeviceA]],
+// CIR-HOST-SAME: %[[#ExtA]], %[[#SzA]], %[[#ConstA]], %[[#ZeroA]])
+// CIR-HOST: }
+
+// LLVM-HOST: define internal void @__cuda_register_globals(ptr %[[#LLVMFatbin:]]) {
+// LLVM-HOST:   call i32 @__cudaRegisterFunction(
+// LLVM-HOST-SAME: ptr %[[#LLVMFatbin]],
+// LLVM-HOST-SAME: ptr @_Z17__device_stub__fnv,
+// LLVM-HOST-SAME: ptr @.str_Z2fnv,
+// LLVM-HOST-SAME: ptr @.str_Z2fnv,
+// LLVM-HOST-SAME: i32 -1,
+// LLVM-HOST-SAME: ptr null, ptr null, ptr null, ptr null, ptr null)
+// LLVM-HOST:   call void @__cudaRegisterVar(
+// LLVM-HOST-SAME: ptr %0, ptr @b, ptr @.strb0, ptr @.strb0,
+// LLVM-HOST-SAME: i32 0, i64 4, i32 1, i32 0)
+// LLVM-HOST:   call void @__cudaRegisterVar(
+// LLVM-HOST-SAME: ptr %0, ptr @a, ptr @.stra1, ptr @.stra1,
+// LLVM-HOST-SAME: i32 0, i64 4, i32 0, i32 0)
+// LLVM-HOST: }
+
+// The content in const array should be the same as echoed above,
+// with a trailing line break ('\n', 0x0A).
+// CIR-HOST: cir.global "private" constant cir_private @__cuda_fatbin_str =
+// CIR-HOST-SAME: #cir.const_array<"sample fatbin\0A">
+// CIR-HOST-SAME: {{.*}}section = ".nv_fatbin"
+
+// The first value is CUDA file head magic number.
+// CIR-HOST: cir.global "private" constant internal @__cuda_fatbin_wrapper
+// CIR-HOST: = #cir.const_record<{
+// CIR-HOST:   #cir.int<1180844977> : !s32i,
+// CIR-HOST:   #cir.int<1> : !s32i,
+// CIR-HOST:   #cir.global_view<@__cuda_fatbin_str> : !cir.ptr<!void>,
+// CIR-HOST:   #cir.ptr<null> : !cir.ptr<!void>
+// CIR-HOST: }>
+// CIR-HOST-SAME: {{.*}}section = ".nvFatBinSegment"
+
+// CIR-HOST: cir.func {{.*}} @__cudaRegisterFatBinary
+// CIR-HOST: cir.func {{.*}} @__cuda_module_ctor() {
+// CIR-HOST:   %[[#Fatbin:]] = cir.call @__cudaRegisterFatBinary
+// CIR-HOST:   %[[#FatbinGlobal:]] = cir.get_global @__cuda_gpubin_handle
+// CIR-HOST:   cir.store %[[#Fatbin]], %[[#FatbinGlobal]]
+// CIR-HOST:   cir.call @__cuda_register_globals
+// CIR-HOST:   cir.call @__cudaRegisterFatBinaryEnd
+// CIR-HOST:   %[[#ModuleDtor:]] = cir.get_global @__cuda_module_dtor
+// CIR-HOST:   cir.call @atexit(%[[#ModuleDtor]])
+// CIR-HOST: }
+
+// LLVM-HOST: define internal void @__cuda_module_ctor() {
+// LLVM-HOST:   %[[#LLVMFatbin:]] = call ptr @__cudaRegisterFatBinary(ptr @__cuda_fatbin_wrapper)
+// LLVM-HOST:   store ptr %[[#LLVMFatbin]], ptr @__cuda_gpubin_handle
+// LLVM-HOST:   call void @__cuda_register_globals
+// LLVM-HOST:   call void @__cudaRegisterFatBinaryEnd
+// LLVM-HOST:   call i32 @atexit(ptr @__cuda_module_dtor)
+// LLVM-HOST: }
+
+// OGCG-HOST: @a = internal global i32 undef, align 4
+// OGCG-HOST: @b = internal global i32 undef, align 4
+// OGCG-HOST: @0 = private unnamed_addr constant [7 x i8] c"_Z2fnv\00", align 1
+// OGCG-HOST: @1 = private unnamed_addr constant [2 x i8] c"a\00", align 1
+// OGCG-HOST: @2 = private unnamed_addr constant [2 x i8] c"b\00", align 1
+// OGCG-HOST: @3 = private constant [14 x i8] c"sample fatbin\0A", section ".nv_fatbin", align 8
+// OGCG-HOST: @__cuda_fatbin_wrapper = internal constant { i32, i32, ptr, ptr } { i32 1180844977, i32 1, ptr @3, ptr null }, section ".nvFatBinSegment", align 8
+// OGCG-HOST: @__cuda_gpubin_handle = internal global ptr null, align 8
+// OGCG-HOST: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cuda_module_ctor, ptr null }]
+
+// OGCG-HOST: define internal void @__cuda_register_globals(ptr %[[#HANDLE:]]) {
+// OGCG-HOST: entry:
+// OGCG-HOST:   %1 = call i32 @__cudaRegisterFunction(ptr %[[#HANDLE]],
+// OGCG-HOST-SAME: ptr @_Z17__device_stub__fnv,
+// OGCG-HOST-SAME: ptr @0,
+// OGCG-HOST-SAME: ptr @0,
+// OGCG-HOST-SAME: i32 -1,
+// OGCG-HOST-SAME: ptr null,
+// OGCG-HOST-SAME: ptr null,
+// OGCG-HOST-SAME: ptr null,
+// OGCG-HOST-SAME: ptr null,
+// OGCG-HOST-SAME: ptr null)
+// OGCG-HOST:   call void @__cudaRegisterVar(ptr %[[#HANDLE]],
+// OGCG-HOST-SAME: ptr @a,
+// OGCG-HOST-SAME: ptr @1,
+// OGCG-HOST-SAME: ptr @1,
+// OGCG-HOST-SAME: i32 0,
+// OGCG-HOST-SAME: i64 4,
+// OGCG-HOST-SAME: i32 0,
+// OGCG-HOST-SAME: i32 0)
+// OGCG-HOST:   call void @__cudaRegisterVar(ptr %[[#HANDLE]],
+// OGCG-HOST-SAME: ptr @b,
+// OGCG-HOST-SAME: ptr @2,
+// OGCG-HOST-SAME: ptr @2,
+// OGCG-HOST-SAME: i32 0,
+// OGCG-HOST-SAME: i64 4,
+// OGCG-HOST-SAME: i32 1,
+// OGCG-HOST-SAME: i32 0)
+// OGCG-HOST:   ret void
+// OGCG-HOST: }
+
+// OGCG-HOST: define internal void @__cuda_module_ctor() {
+// OGCG-HOST: entry:
+// OGCG-HOST:   %[[#WRAPADDR:]] = call ptr @__cudaRegisterFatBinary(ptr @__cuda_fatbin_wrapper)
+// OGCG-HOST:   store ptr %[[#WRAPADDR]], ptr @__cuda_gpubin_handle, align 8
+// OGCG-HOST:   call void @__cuda_register_globals(ptr %[[#WRAPADDR]])
+// OGCG-HOST:   call void @__cudaRegisterFatBinaryEnd(ptr %[[#WRAPADDR]])
+// OGCG-HOST:   %1 = call i32 @atexit(ptr @__cuda_module_dtor)
+// OGCG-HOST:   ret void
+// OGCG-HOST: }
+
+// OGCG-HOST: define internal void @__cuda_module_dtor() {
+// OGCG-HOST: entry:
+// OGCG-HOST:   %[[#HANDLE:]] = load ptr, ptr @__cuda_gpubin_handle, align 8
+// OGCG-HOST:   call void @__cudaUnregisterFatBinary(ptr %[[#HANDLE]])
+// OGCG-HOST:   ret void
+// OGCG-HOST: }
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/simple-nvptx-triple.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/simple-nvptx-triple.cu
new file mode 100644
index 0000000000000..acdf54a28df1d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/simple-nvptx-triple.cu
@@ -0,0 +1,9 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+__device__ void device_fn(int* a, double b, float c) {}
+// CHECK: cir.func {{.*}} @_Z9device_fnPidf
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/simple.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/simple.cu
new file mode 100644
index 0000000000000..04962421ea5ca
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/simple.cu
@@ -0,0 +1,141 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu  \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+// Per Thread Stream test cases:
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-cir -target-sdk-version=12.3 \
+// RUN: -fgpu-default-stream=per-thread -DCUDA_API_PER_THREAD_DEFAULT_STREAM \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefixes=CIR-HOST-PTH --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN: -fgpu-default-stream=per-thread -DCUDA_API_PER_THREAD_DEFAULT_STREAM \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefixes=LLVM-HOST-PTH --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:            -x cuda -emit-llvm -target-sdk-version=12.3 \
+// RUN: -fgpu-default-stream=per-thread -DCUDA_API_PER_THREAD_DEFAULT_STREAM \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefixes=OGCG-HOST-PTH --input-file=%t.ll %s
+
+// Attribute for global_fn
+// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}
+
+__host__ void host_fn(int *a, int *b, int *c) {}
+// CIR-HOST: cir.func {{.*}} @_Z7host_fnPiS_S_
+// CIR-DEVICE-NOT: cir.func {{.*}} @_Z7host_fnPiS_S_
+
+__device__ void device_fn(int* a, double b, float c) {}
+// CIR-HOST-NOT: cir.func {{.*}} @_Z9device_fnPidf
+// CIR-DEVICE: cir.func {{.*}} @_Z9device_fnPidf
+
+__global__ void global_fn(int a) {}
+// CIR-DEVICE: @_Z9global_fni({{.*}} cc(ptx_kernel)
+// LLVM-DEVICE: define dso_local ptx_kernel void @_Z9global_fni
+// OGCG-DEVICE: define dso_local ptx_kernel void @_Z9global_fni
+
+// Check for device stub emission.
+
+// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]])
+// CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args"
+// CIR-HOST: %[[#Decayed:]] = cir.cast array_to_ptrdecay %[[#CIRKernelArgs]]
+// CIR-HOST: cir.call @__cudaPopCallConfiguration
+// CIR-HOST: cir.get_global @_Z24__device_stub__global_fni
+// CIR-HOST: cir.call @cudaLaunchKernel
+// CIR-HOST-PTH: cir.call @cudaLaunchKernel_ptsz
+
+// LLVM-HOST: void @_Z24__device_stub__global_fni
+// LLVM-HOST: %[[#KernelArgs:]] = alloca [1 x ptr], i64 1, align 16
+// LLVM-HOST: %[[#GEP1:]] = getelementptr ptr, ptr %[[#KernelArgs]], i32 0
+// LLVM-HOST: %[[#GEP2:]] = getelementptr [1 x ptr], ptr %[[#KernelArgs]], i32 0, i64 0
+// LLVM-HOST: call i32 @__cudaPopCallConfiguration
+// LLVM-HOST: call i32 @cudaLaunchKernel(ptr @_Z24__device_stub__global_fni
+// LLVM-HOST-PTH: call i32 @cudaLaunchKernel_ptsz(ptr @_Z24__device_stub__global_fni
+
+// OGCG-HOST: void @_Z24__device_stub__global_fni
+// OGCG-HOST: %kernel_args = alloca ptr, i64 1, align 16
+// OGCG-HOST: getelementptr ptr, ptr %kernel_args, i32 0
+// OGCG-HOST: call i32 @__cudaPopCallConfiguration
+// OGCG-HOST: call noundef i32 @cudaLaunchKernel(ptr noundef @_Z24__device_stub__global_fni
+// OGCG-HOST-PTH: call noundef i32 @cudaLaunchKernel_ptsz(ptr noundef @_Z24__device_stub__global_fni
+
+
+int main() {
+  global_fn<<<1, 1>>>(1);
+}
+// CIR-DEVICE-NOT: cir.func {{.*}} @main()
+
+// CIR-HOST: cir.func {{.*}} @main()
+// CIR-HOST: cir.call @_ZN4dim3C1Ejjj
+// CIR-HOST: cir.call @_ZN4dim3C1Ejjj
+// CIR-HOST: [[Push:%[0-9]+]] = cir.call @__cudaPushCallConfiguration
+// CIR-HOST: [[ConfigOK:%[0-9]+]] = cir.cast int_to_bool [[Push]]
+// CIR-HOST: cir.if [[ConfigOK]] {
+// CIR-HOST: } else {
+// CIR-HOST:   [[Arg:%[0-9]+]] = cir.const #cir.int<1>
+// CIR-HOST:   cir.call @_Z24__device_stub__global_fni([[Arg]])
+// CIR-HOST: }
+
+// LLVM-HOST: define dso_local i32 @main
+// LLVM-HOST: alloca %struct.dim3
+// LLVM-HOST: alloca %struct.dim3
+// LLVM-HOST: call void @_ZN4dim3C1Ejjj
+// LLVM-HOST: call void @_ZN4dim3C1Ejjj
+// LLVM-HOST: %[[#ConfigOK:]] = call i32 @__cudaPushCallConfiguration
+// LLVM-HOST: %[[#ConfigCond:]] = icmp ne i32 %[[#ConfigOK]], 0
+// LLVM-HOST: br i1 %[[#ConfigCond]], label %[[#Good:]], label %[[#Bad:]]
+// LLVM-HOST: [[#Good]]:
+// LLVM-HOST:   br label %[[#End:]]
+// LLVM-HOST: [[#Bad]]:
+// LLVM-HOST:   call void @_Z24__device_stub__global_fni(i32 1)
+// LLVM-HOST:   br label %[[#End:]]
+// LLVM-HOST: [[#End]]:
+// LLVM-HOST:   %[[#]] = load i32
+// LLVM-HOST:   ret i32
+
+// OGCG-HOST: define dso_local noundef i32 @main
+// OGCG-HOST: alloca %struct.dim3, align 4
+// OGCG-HOST: alloca %struct.dim3, align 4
+// OGCG-HOST: call void @_ZN4dim3C1Ejjj
+// OGCG-HOST: call void @_ZN4dim3C1Ejjj
+// OGCG-HOST: %call = call i32 @__cudaPushCallConfiguration
+// OGCG-HOST: %tobool = icmp ne i32 %call, 0
+// OGCG-HOST: br i1 %tobool, label %kcall.end, label %kcall.configok
+// OGCG-HOST: kcall.configok:
+// OGCG-HOST:   call void @_Z24__device_stub__global_fni(i32 noundef 1)
+// OGCG-HOST:   br label %kcall.end
+// OGCG-HOST: kcall.end:
+// OGCG-HOST:   %{{[0-9]+}} = load i32, ptr %retval, align 4
+// OGCG-HOST:   ret i32
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/surface.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/surface.cu
new file mode 100644
index 0000000000000..61425a3880995
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/surface.cu
@@ -0,0 +1,26 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck --check-prefix=DEVICE-LLVM %s
+// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-cir -o - %s | FileCheck --check-prefix=DEVICE-CIR %s
+// RUN: echo "GPU binary would be here" > %t
+// RUN: %clang_cc1 -fclangir -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s
+
+struct surfaceReference {
+  int desc;
+};
+
+template <typename T, int dim = 1>
+struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {
+};
+
+// Partial specialization over `void`.
+template<int dim>
+struct __attribute__((device_builtin_surface_type)) surface<void, dim> : public surfaceReference {
+};
+
+surface<void, 2> surf;
+
+// DEVICE-LLVM: @surf = addrspace(1) externally_initialized global i64 undef, align 4
+// DEVICE-CIR: cir.global external lang_address_space(offload_global) @surf = #cir.undef : !s64i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized}
+// HOST: @surf = global %"struct.surface<void, 2>" zeroinitializer, align 4
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/CUDA/texture.cu b/clang/test/CIR/Incubator/CodeGen/CUDA/texture.cu
new file mode 100644
index 0000000000000..cb0b1a03dfef4
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/CUDA/texture.cu
@@ -0,0 +1,24 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck --check-prefix=DEVICE-LLVM %s
+// RUN: %clang_cc1 -fclangir -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-cir -o - %s | FileCheck --check-prefix=DEVICE-CIR %s
+// RUN: echo "GPU binary would be here" > %t
+
+struct textureReference {
+  int desc;
+};
+
+enum ReadMode {
+  ElementType = 0,
+  NormalizedFloat = 1
+};
+
+template <typename T, int dim = 1, enum ReadMode mode = ElementType>
+struct __attribute__((device_builtin_texture_type)) texture : public textureReference {
+};
+
+texture<float, 2, NormalizedFloat> tex;
+
+// DEVICE-LLVM: @tex = addrspace(1) externally_initialized global i64 undef, align 4
+// DEVICE-CIR: cir.global external lang_address_space(offload_global) @tex = #cir.undef : !s64i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/address-spaces.cpp b/clang/test/CIR/Incubator/CodeGen/HIP/address-spaces.cpp
new file mode 100644
index 0000000000000..d599620f3bcf2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/address-spaces.cpp
@@ -0,0 +1,20 @@
+#include "cuda.h"
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-cir %s -o %t.ll
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.ll %s
+
+__global__ void fn() {
+  int i = 0;
+  __shared__ int j;
+  j = i;
+}
+
+// CIR: cir.global "private" internal dso_local lang_address_space(offload_local) @_ZZ2fnvE1j : !s32i
+// CIR: cir.func {{.*}} @_Z2fnv
+// CIR: [[Local:%[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i, lang_address_space(offload_private)>, ["i", init]
+// CIR: [[LocalCast:%[0-9]+]] = cir.cast address_space [[Local]] : !cir.ptr<!s32i, lang_address_space(offload_private)> -> !cir.ptr<!s32i>
+// CIR: [[Shared:%[0-9]+]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i, lang_address_space(offload_local)>
+// CIR: [[Tmp:%[0-9]+]] = cir.load {{.*}} [[LocalCast]] : !cir.ptr<!s32i>, !s32i
+// CIR: cir.store{{.*}} [[Tmp]], [[Shared]] : !s32i, !cir.ptr<!s32i, lang_address_space(offload_local)>
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/addrspace-lowering.cpp b/clang/test/CIR/Incubator/CodeGen/HIP/addrspace-lowering.cpp
new file mode 100644
index 0000000000000..951bd44ee50c6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/addrspace-lowering.cpp
@@ -0,0 +1,259 @@
+#include "cuda.h"
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -fhip-new-launch-api -emit-cir \
+// RUN:            -I%S/../Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip  \
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+
+
+// ------------------------------------------------------------
+//  CHECK POINTER ARGUMENT LOWERING (bitcast or addrspacecast)
+// ------------------------------------------------------------
+
+__shared__ int a;
+// LLVM-DEVICE: @a = addrspace(3) global i32 undef, align 4
+// OGCG-DEVICE: @a = addrspace(3) global i32 undef, align 4
+
+__device__ int b;
+// LLVM-DEVICE: @b = addrspace(1) externally_initialized global i32 0, align 4
+// OGCG-DEVICE: @b = addrspace(1) externally_initialized global i32 0, align 4
+
+__constant__ int c;
+// LLVM-DEVICE: @c = addrspace(4) externally_initialized constant i32 0, align 4
+// OGCG-DEVICE: @c = addrspace(4) externally_initialized constant i32 0, align 4
+
+// Forward decls in various address spaces.
+extern "C" __device__ void bar(const char *p);
+extern "C" __device__ void takes_global(float *p);
+extern "C" __device__ void takes_shared(int *p);
+extern "C" __device__ void takes_void(void *p);
+extern "C" __device__ void nullfun(int *p);
+extern "C" __device__ void takeS(struct S s);
+extern "C" __device__ void call_fp(void (*f)(int));
+
+__constant__ int CC[12];
+__device__ float GArr[8];
+__device__ void fp_target(int);
+
+// A struct that contains a pointer
+struct S { int *p; };
+
+// ------------------------------------------------------------
+// 1. local → generic: expected bitcast or AS0 match
+// ------------------------------------------------------------
+__device__ void test_local() {
+  int x = 42;
+  bar((const char*)&x);
+}
+// CIR-DEVICE-LABEL: @_Z10test_localv
+// CIR-DEVICE: cir.alloca
+// CIR-DEVICE: cir.store
+// CIR-DEVICE: cir.cast bitcast {{.*}} -> !cir.ptr<!s8i>
+// CIR-DEVICE: cir.call @bar
+// CIR-DEVICE: cir.return
+
+// LLVM-DEVICE-LABEL: @_Z10test_localv
+// LLVM-DEVICE: alloca i32
+// LLVM-DEVICE: addrspacecast ptr addrspace(5) {{.*}} to ptr
+// LLVM-DEVICE: store i32 42
+// LLVM-DEVICE: call void @bar(ptr {{.*}})
+// LLVM-DEVICE: ret void
+
+// OGCG-DEVICE-LABEL: @_Z10test_localv
+// OGCG-DEVICE: alloca i32, align 4, addrspace(5)
+// OGCG-DEVICE: addrspacecast ptr addrspace(5) {{.*}} to ptr
+// OGCG-DEVICE: store i32 42
+// OGCG-DEVICE: call void @bar(ptr noundef {{.*}})
+// OGCG-DEVICE: ret void
+
+// ------------------------------------------------------------
+// 2. global AS → generic param
+// ------------------------------------------------------------
+__device__ void test_global() {
+  takes_global(GArr);
+}
+// CIR-DEVICE-LABEL: @_Z11test_globalv
+// CIR-DEVICE: cir.get_global @GArr
+// CIR-DEVICE: cir.cast array_to_ptrdecay
+// CIR-DEVICE: cir.cast address_space
+// CIR-DEVICE: cir.call @takes_global
+// CIR-DEVICE: cir.return
+
+// LLVM-DEVICE-LABEL: @_Z11test_globalv
+// LLVM-DEVICE: call void @takes_global(ptr addrspacecast (ptr addrspace(1) @GArr to ptr))
+// LLVM-DEVICE: ret void
+
+// OGCG-DEVICE-LABEL: @_Z11test_globalv
+// OGCG-DEVICE: call void @takes_global(ptr noundef addrspacecast (ptr addrspace(1) @GArr to ptr))
+// OGCG-DEVICE: ret void
+
+// ------------------------------------------------------------
+// 3. shared AS(3) → generic param (requires addrspacecast)
+// ------------------------------------------------------------
+__device__ void test_shared() {
+  __shared__ int s[2];
+  takes_shared(s);
+}
+// CIR-DEVICE-LABEL: @_Z11test_sharedv
+// CIR-DEVICE: cir.get_global @_ZZ11test_sharedvE1s
+// CIR-DEVICE: cir.cast array_to_ptrdecay
+// CIR-DEVICE: cir.cast address_space
+// CIR-DEVICE: cir.call @takes_shared
+// CIR-DEVICE: cir.return
+
+// LLVM-DEVICE-LABEL: @_Z11test_sharedv
+// LLVM-DEVICE: call void @takes_shared(ptr addrspacecast (ptr addrspace(3) @_ZZ11test_sharedvE1s to ptr))
+// LLVM-DEVICE: ret void
+
+// OGCG-DEVICE-LABEL: @_Z11test_sharedv
+// OGCG-DEVICE: call void @takes_shared(ptr noundef addrspacecast (ptr addrspace(3) @_ZZ11test_sharedvE1s to ptr))
+// OGCG-DEVICE: ret void
+
+// ------------------------------------------------------------
+// 4. mismatched pointee types but same AS: bitcast only
+// ------------------------------------------------------------
+__device__ void test_void_bitcast() {
+  int x = 7;
+  takes_void((void*)&x);
+}
+// CIR-DEVICE-LABEL: @_Z17test_void_bitcastv
+// CIR-DEVICE: cir.alloca
+// CIR-DEVICE: cir.store
+// CIR-DEVICE: cir.cast bitcast {{.*}} -> !cir.ptr<!void>
+// CIR-DEVICE: cir.call @takes_void
+// CIR-DEVICE: cir.return
+
+// LLVM-DEVICE-LABEL: @_Z17test_void_bitcastv
+// LLVM-DEVICE: alloca i32
+// LLVM-DEVICE: addrspacecast ptr addrspace(5) {{.*}} to ptr
+// LLVM-DEVICE: store i32 7
+// LLVM-DEVICE: call void @takes_void(ptr {{.*}})
+// LLVM-DEVICE: ret void
+
+// OGCG-DEVICE-LABEL: @_Z17test_void_bitcastv
+// OGCG-DEVICE: alloca i32, align 4, addrspace(5)
+// OGCG-DEVICE: addrspacecast ptr addrspace(5) {{.*}} to ptr
+// OGCG-DEVICE: store i32 7
+// OGCG-DEVICE: call void @takes_void(ptr noundef {{.*}})
+// OGCG-DEVICE: ret void
+
+// ------------------------------------------------------------
+// 5. nullptr: ensure correct null pointer cast is emitted
+// ------------------------------------------------------------
+__device__ void test_null() {
+  nullfun(nullptr);
+}
+// CIR-DEVICE-LABEL: @_Z9test_nullv
+// CIR-DEVICE: cir.const #cir.ptr<null>
+// CIR-DEVICE: cir.call @nullfun
+// CIR-DEVICE: cir.return
+
+// LLVM-DEVICE-LABEL: @_Z9test_nullv
+// LLVM-DEVICE: call void @nullfun(ptr null)
+// LLVM-DEVICE: ret void
+
+// OGCG-DEVICE-LABEL: @_Z9test_nullv
+// OGCG-DEVICE: call void @nullfun(ptr noundef null)
+// OGCG-DEVICE: ret void
+
+// ------------------------------------------------------------
+// 6. Struct containing a pointer
+// ------------------------------------------------------------
+__device__ void test_struct() {
+  int x = 5;
+  S s{&x};
+  takeS(s);
+}
+// CIR-DEVICE-LABEL: @_Z11test_structv
+// CIR-DEVICE: cir.alloca !s32i
+// CIR-DEVICE: cir.alloca !rec_S
+// CIR-DEVICE: cir.get_member {{.*}} "p"
+// CIR-DEVICE: cir.store {{.*}} : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR-DEVICE: cir.copy
+// CIR-DEVICE: cir.call @takeS
+// CIR-DEVICE: cir.return
+
+// LLVM-DEVICE-LABEL: @_Z11test_structv
+// LLVM-DEVICE: alloca i32
+// LLVM-DEVICE: alloca %struct.S
+// LLVM-DEVICE: getelementptr %struct.S
+// LLVM-DEVICE: store ptr {{.*}}, ptr {{.*}}
+// LLVM-DEVICE: call void @llvm.memcpy
+// LLVM-DEVICE: load %struct.S
+// LLVM-DEVICE: call void @takeS(%struct.S {{.*}})
+// LLVM-DEVICE: ret void
+
+// OGCG-DEVICE-LABEL: @_Z11test_structv
+// OGCG-DEVICE: alloca i32, align 4, addrspace(5)
+// OGCG-DEVICE: alloca %struct.S, align 8, addrspace(5)
+// OGCG-DEVICE: alloca %struct.S, align 8, addrspace(5)
+// OGCG-DEVICE: addrspacecast ptr addrspace(5) {{.*}} to ptr
+// OGCG-DEVICE: store i32 5
+// OGCG-DEVICE: getelementptr inbounds nuw %struct.S
+// OGCG-DEVICE: store ptr {{.*}}, ptr {{.*}}
+// OGCG-DEVICE: call void @llvm.memcpy.p0.p0.i64
+// OGCG-DEVICE: load ptr
+// OGCG-DEVICE: call void @takeS(ptr {{.*}})
+// OGCG-DEVICE: ret void
+
+// ------------------------------------------------------------
+// 7. Function pointer argument
+// ------------------------------------------------------------
+__device__ void test_fp() {
+  call_fp(fp_target);
+}
+// CIR-DEVICE-LABEL: @_Z7test_fpv
+// CIR-DEVICE: cir.get_global @_Z9fp_targeti
+// CIR-DEVICE: cir.call @call_fp
+// CIR-DEVICE: cir.return
+
+// LLVM-DEVICE-LABEL: @_Z7test_fpv
+// LLVM-DEVICE: call void @call_fp(ptr @_Z9fp_targeti)
+// LLVM-DEVICE: ret void
+
+// OGCG-DEVICE-LABEL: @_Z7test_fpv
+// OGCG-DEVICE: call void @call_fp(ptr noundef @_Z9fp_targeti)
+// OGCG-DEVICE: ret void
+
+// ------------------------------------------------------------
+// 8. Original test from previous patch: string literal → char*
+// ------------------------------------------------------------
+__device__ void foo() {
+  char cchar[] = "const char.\n";
+  bar(cchar);
+}
+// CIR-DEVICE-LABEL: @_Z3foov
+// CIR-DEVICE: cir.alloca !cir.array<!s8i x 13>, !cir.ptr<!cir.array<!s8i x 13>, lang_address_space(offload_private)>
+// CIR-DEVICE: cir.cast address_space
+// CIR-DEVICE: cir.get_global @__const._Z3foov
+// CIR-DEVICE: cir.copy
+// CIR-DEVICE: cir.cast array_to_ptrdecay
+// CIR-DEVICE: cir.call @bar
+// CIR-DEVICE: cir.return
+
+// LLVM-DEVICE-LABEL: @_Z3foov
+// LLVM-DEVICE: alloca [13 x i8]
+// LLVM-DEVICE: addrspacecast ptr addrspace(5) {{.*}} to ptr
+// LLVM-DEVICE: call void @llvm.memcpy.p0.p0.i32
+// LLVM-DEVICE: getelementptr i8
+// LLVM-DEVICE: call void @bar(ptr {{.*}})
+// LLVM-DEVICE: ret void
+
+// OGCG-DEVICE-LABEL: @_Z3foov
+// OGCG-DEVICE: alloca [13 x i8], align 1, addrspace(5)
+// OGCG-DEVICE: addrspacecast ptr addrspace(5) {{.*}} to ptr
+// OGCG-DEVICE: call void @llvm.memcpy.p0.p4.i64
+// OGCG-DEVICE: getelementptr inbounds [13 x i8]
+// OGCG-DEVICE: call void @bar(ptr noundef {{.*}})
+// OGCG-DEVICE: ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-attrs.hip b/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-attrs.hip
new file mode 100644
index 0000000000000..1d43cc4e2d79d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-attrs.hip
@@ -0,0 +1,113 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=OGCG %s --input-file=%t.ogcg.ll
+
+// Test that AMDGPU-specific attributes are generated for HIP kernels
+
+// Check CIR attribute definitions
+// CIR-DAG: "amdgpu-flat-work-group-size" = "1,1024"
+// CIR-DAG: "amdgpu-flat-work-group-size" = "64,128"
+// CIR-DAG: "amdgpu-waves-per-eu" = "2"
+// CIR-DAG: "amdgpu-waves-per-eu" = "2,4"
+// CIR-DAG: "amdgpu-num-sgpr" = "32"
+// CIR-DAG: "amdgpu-num-vgpr" = "64"
+// CIR-DAG: "amdgpu-max-num-workgroups" = "8,4,2"
+// CIR-DAG: "amdgpu-flat-work-group-size" = "256,256"{{.*}}"amdgpu-num-sgpr" = "48"{{.*}}"amdgpu-num-vgpr" = "32"{{.*}}"amdgpu-waves-per-eu" = "1,2"
+
+// Test: Default attributes for simple kernel
+// CIR: cir.func{{.*}} @_Z13kernel_simplev() cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z13kernel_simplev(){{.*}} #[[SIMPLE_ATTR:[0-9]+]]
+// OGCG: define{{.*}} amdgpu_kernel void @_Z13kernel_simplev(){{.*}} #[[OGCG_SIMPLE_ATTR:[0-9]+]]
+__global__ void kernel_simple() {}
+
+// Test: Explicit flat work group size attribute
+// CIR: cir.func{{.*}} @_Z21kernel_flat_wg_size_1v() cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z21kernel_flat_wg_size_1v(){{.*}} #[[FLAT_WG_ATTR:[0-9]+]]
+// OGCG: define{{.*}} amdgpu_kernel void @_Z21kernel_flat_wg_size_1v(){{.*}} #[[OGCG_FLAT_WG_ATTR:[0-9]+]]
+__attribute__((amdgpu_flat_work_group_size(64, 128)))
+__global__ void kernel_flat_wg_size_1() {}
+
+// Test: Waves per EU attribute
+// CIR: cir.func{{.*}} @_Z19kernel_waves_per_euv() cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z19kernel_waves_per_euv(){{.*}} #[[WAVES_ATTR:[0-9]+]]
+// OGCG: define{{.*}} amdgpu_kernel void @_Z19kernel_waves_per_euv(){{.*}} #[[OGCG_WAVES_ATTR:[0-9]+]]
+__attribute__((amdgpu_waves_per_eu(2)))
+__global__ void kernel_waves_per_eu() {}
+
+// Test: Waves per EU with min and max
+// CIR: cir.func{{.*}} @_Z22kernel_waves_per_eu_mmv() cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z22kernel_waves_per_eu_mmv(){{.*}} #[[WAVES_MM_ATTR:[0-9]+]]
+// OGCG: define{{.*}} amdgpu_kernel void @_Z22kernel_waves_per_eu_mmv(){{.*}} #[[OGCG_WAVES_MM_ATTR:[0-9]+]]
+__attribute__((amdgpu_waves_per_eu(2, 4)))
+__global__ void kernel_waves_per_eu_mm() {}
+
+// Test: Num SGPR attribute
+// CIR: cir.func{{.*}} @_Z15kernel_num_sgprv() cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z15kernel_num_sgprv(){{.*}} #[[SGPR_ATTR:[0-9]+]]
+// OGCG: define{{.*}} amdgpu_kernel void @_Z15kernel_num_sgprv(){{.*}} #[[OGCG_SGPR_ATTR:[0-9]+]]
+__attribute__((amdgpu_num_sgpr(32)))
+__global__ void kernel_num_sgpr() {}
+
+// Test: Num VGPR attribute
+// CIR: cir.func{{.*}} @_Z15kernel_num_vgprv() cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z15kernel_num_vgprv(){{.*}} #[[VGPR_ATTR:[0-9]+]]
+// OGCG: define{{.*}} amdgpu_kernel void @_Z15kernel_num_vgprv(){{.*}} #[[OGCG_VGPR_ATTR:[0-9]+]]
+__attribute__((amdgpu_num_vgpr(64)))
+__global__ void kernel_num_vgpr() {}
+
+// Test: Max num workgroups attribute
+// CIR: cir.func{{.*}} @_Z22kernel_max_num_wgroupsv() cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z22kernel_max_num_wgroupsv(){{.*}} #[[MAX_WG_ATTR:[0-9]+]]
+// OGCG: define{{.*}} amdgpu_kernel void @_Z22kernel_max_num_wgroupsv(){{.*}} #[[OGCG_MAX_WG_ATTR:[0-9]+]]
+__attribute__((amdgpu_max_num_work_groups(8, 4, 2)))
+__global__ void kernel_max_num_wgroups() {}
+
+// Test: Combined attributes
+// CIR: cir.func{{.*}} @_Z15kernel_combinedv() cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z15kernel_combinedv(){{.*}} #[[COMBINED_ATTR:[0-9]+]]
+// OGCG: define{{.*}} amdgpu_kernel void @_Z15kernel_combinedv(){{.*}} #[[OGCG_COMBINED_ATTR:[0-9]+]]
+__attribute__((amdgpu_flat_work_group_size(256, 256)))
+__attribute__((amdgpu_waves_per_eu(1, 2)))
+__attribute__((amdgpu_num_sgpr(48)))
+__attribute__((amdgpu_num_vgpr(32)))
+__global__ void kernel_combined() {}
+
+// Test: Device function should NOT have kernel attributes
+// CIR: cir.func{{.*}} @_Z9device_fnv()
+// CIR-NOT: cc(amdgpu_kernel)
+// LLVM: define{{.*}} void @_Z9device_fnv()
+// LLVM-NOT: amdgpu_kernel
+// OGCG: define{{.*}} void @_Z9device_fnv()
+// OGCG-NOT: amdgpu_kernel
+__device__ void device_fn() {}
+
+// Verify LLVM attributes
+// LLVM-DAG: attributes #[[SIMPLE_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
+// LLVM-DAG: attributes #[[FLAT_WG_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="64,128"
+// LLVM-DAG: attributes #[[WAVES_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2"
+// LLVM-DAG: attributes #[[WAVES_MM_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2,4"
+// LLVM-DAG: attributes #[[SGPR_ATTR]] = {{.*}}"amdgpu-num-sgpr"="32"
+// LLVM-DAG: attributes #[[VGPR_ATTR]] = {{.*}}"amdgpu-num-vgpr"="64"
+// LLVM-DAG: attributes #[[MAX_WG_ATTR]] = {{.*}}"amdgpu-max-num-workgroups"="8,4,2"
+// LLVM-DAG: attributes #[[COMBINED_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="256,256"{{.*}}"amdgpu-num-sgpr"="48"{{.*}}"amdgpu-num-vgpr"="32"{{.*}}"amdgpu-waves-per-eu"="1,2"
+
+// Verify OGCG attributes
+// OGCG-DAG: attributes #[[OGCG_SIMPLE_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
+// OGCG-DAG: attributes #[[OGCG_FLAT_WG_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="64,128"
+// OGCG-DAG: attributes #[[OGCG_WAVES_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2"
+// OGCG-DAG: attributes #[[OGCG_WAVES_MM_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2,4"
+// OGCG-DAG: attributes #[[OGCG_SGPR_ATTR]] = {{.*}}"amdgpu-num-sgpr"="32"
+// OGCG-DAG: attributes #[[OGCG_VGPR_ATTR]] = {{.*}}"amdgpu-num-vgpr"="64"
+// OGCG-DAG: attributes #[[OGCG_MAX_WG_ATTR]] = {{.*}}"amdgpu-max-num-workgroups"="8,4,2"
+// OGCG-DAG: attributes #[[OGCG_COMBINED_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="256,256"{{.*}}"amdgpu-num-sgpr"="48"{{.*}}"amdgpu-num-vgpr"="32"{{.*}}"amdgpu-waves-per-eu"="1,2"
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-hip-kernel-abi.hip b/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-hip-kernel-abi.hip
new file mode 100644
index 0000000000000..fbea3d79f3731
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-hip-kernel-abi.hip
@@ -0,0 +1,74 @@
+#include "../Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir -fclangir-call-conv-lowering\
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir -fclangir-call-conv-lowering\
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test ABI lowering from CIR to LLVM IR for AMDGPU
+//===----------------------------------------------------------------------===//
+
+// Test simple kernel
+// CIR: cir.func{{.*}} @_Z13simple_kernelv{{.*}} cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z13simple_kernelv()
+// OGCG: define{{.*}} amdgpu_kernel void @_Z13simple_kernelv()
+__global__ void simple_kernel() {}
+
+// Test kernel with int argument
+// CIR: cir.func{{.*}} @_Z14kernel_int_argi(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z14kernel_int_argi(i32 %{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @_Z14kernel_int_argi(i32{{.*}} %{{.*}})
+__global__ void kernel_int_arg(int x) {}
+
+// Test kernel with char argument
+// CIR: cir.func{{.*}} @_Z15kernel_char_argc(%arg{{[0-9]+}}: !s8i{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z15kernel_char_argc(i8 {{.*}}%{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @_Z15kernel_char_argc(i8{{.*}} %{{.*}})
+__global__ void kernel_char_arg(char c) {}
+
+// Test kernel with pointer (HIP coerces generic pointers to global addrspace 1)
+// CIR: cir.func{{.*}} @_Z14kernel_ptr_argPi(%arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z14kernel_ptr_argPi(ptr addrspace(1) %{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @_Z14kernel_ptr_argPi(ptr addrspace(1){{.*}} %{{.*}})
+__global__ void kernel_ptr_arg(int *ptr) {}
+
+// Test kernel with multiple args (pointer coerced to global addrspace 1)
+// CIR: cir.func{{.*}} @_Z16kernel_multi_argifPf(%arg{{[0-9]+}}: !s32i{{.*}} !cir.float{{.*}} !cir.ptr<!cir.float{{.*}}>{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z16kernel_multi_argifPf(i32 %{{.*}}, float %{{.*}}, ptr addrspace(1) %{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @_Z16kernel_multi_argifPf(i32{{.*}} %{{.*}}, float{{.*}} %{{.*}}, ptr addrspace(1){{.*}} %{{.*}})
+__global__ void kernel_multi_arg(int a, float b, float *c) {}
+
+// Test device function
+// CIR: cir.func{{.*}} @_Z9device_fni(%arg{{[0-9]+}}: !s32i{{.*}})
+// LLVM: define{{.*}} void @_Z9device_fni(i32 %{{.*}})
+// OGCG: define{{.*}} void @_Z9device_fni(i32{{.*}} %{{.*}})
+__device__ void device_fn(int x) {}
+
+// Test device function with return value
+// CIR: cir.func{{.*}} @_Z15device_fn_floatf(%arg{{[0-9]+}}: !cir.float{{.*}}) -> !cir.float
+// LLVM: define{{.*}} float @_Z15device_fn_floatf(float %{{.*}})
+// OGCG: define{{.*}} float @_Z15device_fn_floatf(float{{.*}} %{{.*}})
+__device__ float device_fn_float(float f) { return f * 2.0f; }
+
+// Test kernel with pointer (coerced to global addrspace 1)
+// CIR: cir.func{{.*}} @_Z17kernel_shared_ptrPi(%arg{{[0-9]+}}: !cir.ptr<!s32i{{.*}}>{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @_Z17kernel_shared_ptrPi(ptr addrspace(1) %{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @_Z17kernel_shared_ptrPi(ptr addrspace(1){{.*}} %{{.*}})
+__global__ void kernel_shared_ptr(int *ptr) {}
+
+// Test variadic device function
+// CIR: cir.func{{.*}} @_Z11variadic_fniz(%arg{{[0-9]+}}: !s32i{{.*}}, ...)
+// LLVM: define{{.*}} void @_Z11variadic_fniz(i32 %{{.*}}, ...)
+// OGCG: define{{.*}} void @_Z11variadic_fniz(i32{{.*}} %{{.*}}, ...)
+__device__ void variadic_fn(int count, ...) {}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-module-flags.hip b/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-module-flags.hip
new file mode 100644
index 0000000000000..494efb36b36f5
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-module-flags.hip
@@ -0,0 +1,30 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.cir.ll
+// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.cir.ll
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=OGCG %s --input-file=%t.ogcg.ll
+
+// Test that AMDGPU module flags are emitted correctly.
+
+// CIR: module {{.*}} attributes {
+// CIR-SAME: cir.amdgpu_printf_kind = "hostcall"
+// CIR-SAME: cir.amdhsa_code_object_version = 600
+
+// LLVM: !llvm.module.flags = !{
+// LLVM-DAG: !{i32 1, !"amdhsa_code_object_version", i32 600}
+// LLVM-DAG: !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+
+// OGCG: !llvm.module.flags = !{
+// OGCG-DAG: !{i32 1, !"amdhsa_code_object_version", i32 600}
+// OGCG-DAG: !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+
+__global__ void kernel() {}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-vec3-memory-type.hip b/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-vec3-memory-type.hip
new file mode 100644
index 0000000000000..42fff319d39f0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/amdgpu-vec3-memory-type.hip
@@ -0,0 +1,96 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu verde -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu verde -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu verde -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+typedef int v3i32 __attribute__((ext_vector_type(3)));
+typedef unsigned int v3u32 __attribute__((ext_vector_type(3)));
+typedef float v3f32 __attribute__((ext_vector_type(3)));
+
+//===----------------------------------------------------------------------===//
+// Test 96-bit vector
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z18test_store_vec3i32
+// CIR: cir.store {{.*}} : !cir.vector<!s32i x 3>, !cir.ptr<!cir.vector<!s32i x 3>
+// CIR-NOT: x 4
+// LLVM-LABEL: define{{.*}} void @_Z18test_store_vec3i32
+// LLVM: store <3 x i32>
+// LLVM-NOT: <4 x i32>
+// OGCG-LABEL: define{{.*}} void @_Z18test_store_vec3i32
+// OGCG: store <3 x i32>
+__device__ void test_store_vec3i32(v3i32* out, v3i32 val) {
+  *out = val;
+}
+
+// CIR-LABEL: @_Z17test_load_vec3i32
+// CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<!s32i x 3>>, !cir.vector<!s32i x 3>
+// CIR-NOT: x 4
+// LLVM-LABEL: define{{.*}} void @_Z17test_load_vec3i32
+// LLVM: load <3 x i32>
+// LLVM-NOT: <4 x i32>
+// OGCG-LABEL: define{{.*}} void @_Z17test_load_vec3i32
+// OGCG: load <3 x i32>
+__device__ void test_load_vec3i32(v3i32* out, v3i32* in) {
+  *out = *in;
+}
+
+// CIR-LABEL: @_Z18test_store_vec3f32
+// CIR: cir.store {{.*}} : !cir.vector<!cir.float x 3>, !cir.ptr<!cir.vector<!cir.float x 3>
+// CIR-NOT: x 4
+// LLVM-LABEL: define{{.*}} void @_Z18test_store_vec3f32
+// LLVM: store <3 x float>
+// LLVM-NOT: <4 x float>
+// OGCG-LABEL: define{{.*}} void @_Z18test_store_vec3f32
+// OGCG: store <3 x float>
+__device__ void test_store_vec3f32(v3f32* out, v3f32 val) {
+  *out = val;
+}
+
+// CIR-LABEL: @_Z17test_load_vec3f32
+// CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<!cir.float x 3>>, !cir.vector<!cir.float x 3>
+// CIR-NOT: x 4
+// LLVM-LABEL: define{{.*}} void @_Z17test_load_vec3f32
+// LLVM: load <3 x float>
+// LLVM-NOT: <4 x float>
+// OGCG-LABEL: define{{.*}} void @_Z17test_load_vec3f32
+// OGCG: load <3 x float>
+__device__ void test_load_vec3f32(v3f32* out, v3f32* in) {
+  *out = *in;
+}
+
+// CIR-LABEL: @_Z16test_vec3i32_add
+// CIR: cir.binop(add,{{.*}}!cir.vector<!s32i x 3>
+// CIR: cir.store {{.*}} : !cir.vector<!s32i x 3>
+// CIR-NOT: x 4
+// LLVM-LABEL: define{{.*}} void @_Z16test_vec3i32_add
+// LLVM: add <3 x i32>
+// LLVM: store <3 x i32>
+// OGCG-LABEL: define{{.*}} void @_Z16test_vec3i32_add
+// OGCG: add{{.*}}<3 x i32>
+__device__ void test_vec3i32_add(v3i32* out, v3i32 a, v3i32 b) {
+  *out = a + b;
+}
+
+// CIR-LABEL: @_Z16test_vec3f32_mul
+// CIR: cir.binop(mul,{{.*}}!cir.vector<!cir.float x 3>
+// CIR: cir.store {{.*}} : !cir.vector<!cir.float x 3>
+// CIR-NOT: x 4
+// LLVM-LABEL: define{{.*}} void @_Z16test_vec3f32_mul
+// LLVM: fmul{{.*}}<3 x float>
+// LLVM: store <3 x float>
+// OGCG-LABEL: define{{.*}} void @_Z16test_vec3f32_mul
+// OGCG: fmul{{.*}}<3 x float>
+__device__ void test_vec3f32_mul(v3f32* out, v3f32 a, v3f32 b) {
+  *out = a * b;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx10.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx10.hip
new file mode 100644
index 0000000000000..ccfe83381cbb7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx10.hip
@@ -0,0 +1,63 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1011 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1011 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1011 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z15test_permlane16Pjjjjj
+// CIR: cir.llvm.intrinsic "amdgcn.permlane16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i
+// LLVM: define{{.*}} void @_Z15test_permlane16Pjjjjj
+// LLVM: call i32 @llvm.amdgcn.permlane16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+// OGCG: define{{.*}} void @_Z15test_permlane16Pjjjjj
+// OGCG: call i32 @llvm.amdgcn.permlane16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+__device__ void test_permlane16(unsigned int* out, unsigned int a, unsigned int b, unsigned int c, unsigned int d) {
+  *out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0);
+}
+
+
+// CIR-LABEL: @_Z16test_permlanex16Pjjjjj
+// CIR: cir.llvm.intrinsic "amdgcn.permlanex16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i
+// LLVM: define{{.*}} void @_Z16test_permlanex16Pjjjjj
+// LLVM: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+// OGCG: define{{.*}} void @_Z16test_permlanex16Pjjjjj
+// OGCG: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+__device__ void test_permlanex16(unsigned int* out, unsigned int a, unsigned int b, unsigned int c, unsigned int d) {
+  *out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx11.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx11.hip
new file mode 100644
index 0000000000000..383db2e0c3744
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx11.hip
@@ -0,0 +1,108 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1101 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1102 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1103 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1150 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1151 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1152 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1101 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1102 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1103 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1150 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1151 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1152 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1153 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1101 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1102 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1103 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1150 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1151 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1152 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1153 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z15test_permlane64Pjj
+// CIR: cir.llvm.intrinsic "amdgcn.permlane64" {{.*}} : (!u32i) -> !u32i
+// LLVM: define{{.*}} void @_Z15test_permlane64Pjj
+// LLVM: call i32 @llvm.amdgcn.permlane64.i32(i32 %{{.*}})
+// OGCG: define{{.*}} void @_Z15test_permlane64Pjj
+// OGCG: call i32 @llvm.amdgcn.permlane64.i32(i32 %{{.*}})
+__device__ void test_permlane64(unsigned int* out, unsigned int a) {
+  *out = __builtin_amdgcn_permlane64(a);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx1250.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx1250.hip
new file mode 100644
index 0000000000000..5997d64c61deb
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-gfx1250.hip
@@ -0,0 +1,51 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z13test_rcp_bf16PDF16bDF16b
+// CIR: cir.llvm.intrinsic "amdgcn.rcp" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @_Z13test_rcp_bf16PDF16bDF16b
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.rcp.bf16(bfloat %{{.*}})
+// OGCG: define{{.*}} void @_Z13test_rcp_bf16PDF16bDF16b
+// OGCG: call{{.*}} bfloat @llvm.amdgcn.rcp.bf16(bfloat %{{.*}})
+__device__ void test_rcp_bf16(__bf16* out, __bf16 a)
+{
+  *out = __builtin_amdgcn_rcp_bf16(a);
+}
+
+// CIR-LABEL: @_Z14test_sqrt_bf16PDF16bDF16b
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @_Z14test_sqrt_bf16PDF16bDF16b
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}})
+// OGCG: define{{.*}} void @_Z14test_sqrt_bf16PDF16bDF16b
+// OGCG: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}})
+__device__ void test_sqrt_bf16(__bf16* out, __bf16 a)
+{
+  *out = __builtin_amdgcn_sqrt_bf16(a);
+}
+
+// CIR-LABEL: @_Z13test_rsq_bf16PDF16bDF16b
+// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @_Z13test_rsq_bf16PDF16bDF16b
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.rsq.bf16(bfloat %{{.*}})
+// OGCG: define{{.*}} void @_Z13test_rsq_bf16PDF16bDF16b
+// OGCG: call{{.*}} bfloat @llvm.amdgcn.rsq.bf16(bfloat %{{.*}})
+__device__ void test_rsq_bf16(__bf16* out, __bf16 a)
+{
+  *out = __builtin_amdgcn_rsq_bf16(a);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-image-sample.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-image-sample.hip
new file mode 100644
index 0000000000000..7f79f2c9144f3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-image-sample.hip
@@ -0,0 +1,256 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -target-feature +extended-image-insts \
+// RUN:            -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -target-feature +extended-image-insts \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1100 -target-feature +extended-image-insts \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU extended image builtins in HIP
+//===----------------------------------------------------------------------===//
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+// CIR-LABEL: @_Z{{.*}}test_image_gather4_lz_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.gather4.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_gather4_lz_2d_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.gather4.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_gather4_lz_2d_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.gather4.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_gather4_lz_2d_v4f32(float4* out, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_1d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.1d" {{.*}} : (!s32i, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_1d_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.1d.{{.*}}(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_1d_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.1d.{{.*}}(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_1d_v4f32(float4* out, float s, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(100, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_1d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.1d" {{.*}} : (!s32i, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_1d_v4f16
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.1d.{{.*}}(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_1d_v4f16
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.1d.{{.*}}(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_1d_v4f16(half4* out, float s, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(100, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_l_1d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.l.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_l_1d_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.l.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_l_1d_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.l.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_l_1d_v4f32(float4* out, float s, float lod, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, s, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_l_1d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.l.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_l_1d_v4f16
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.l.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_l_1d_v4f16
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.l.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_l_1d_v4f16(half4* out, float s, float lod, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_l_1d_v4f16_f32(100, s, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_d_1d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.d.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_d_1d_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.d.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_d_1d_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.d.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_d_1d_v4f32(float4* out, float dsdx, float dsdy, float s, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, dsdx, dsdy, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_d_1d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.d.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_d_1d_v4f16
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.d.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_d_1d_v4f16
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.d.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_d_1d_v4f16(half4* out, float dsdx, float dsdy, float s, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_d_1d_v4f16_f32(100, dsdx, dsdy, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2d_f32
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2d_f32
+// OGCG: call {{.*}}float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_2d_f32(float* out, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2d_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2d_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_2d_v4f32(float4* out, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_2d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2d_v4f16
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2d_v4f16
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_2d_v4f16(half4* out, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_l_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_l_2d_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.l.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_l_2d_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.l.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_l_2d_v4f32(float4* out, float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, s, t, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_l_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_l_2d_f32
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_l_2d_f32
+// OGCG: call {{.*}}float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_l_2d_f32(float* out, float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_l_2d_f32_f32(1, s, t, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_d_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_d_2d_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.d.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_d_2d_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.d.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_d_2d_v4f32(float4* out, float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_d_2d_v4f32_f32(100, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_d_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_d_2d_f32
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_d_2d_f32
+// OGCG: call {{.*}}float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_d_2d_f32(float* out, float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_d_2d_f32_f32(1, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_3d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_3d_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.3d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_3d_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.3d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_3d_v4f32(float4* out, float s, float t, float r, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(100, s, t, r, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_3d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_3d_v4f16
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.3d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_3d_v4f16
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.3d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_3d_v4f16(half4* out, float s, float t, float r, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, s, t, r, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_cube_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_cube_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.cube.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_cube_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.cube.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_cube_v4f32(float4* out, float s, float t, float face, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, s, t, face, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_cube_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_cube_v4f16
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.cube.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_cube_v4f16
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.cube.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_cube_v4f16(half4* out, float s, float t, float face, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, s, t, face, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_1darray_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_1darray_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.1darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_1darray_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.1darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_1darray_v4f32(float4* out, float s, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, s, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_1darray_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_1darray_v4f16
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.1darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_1darray_v4f16
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.1darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_1darray_v4f16(half4* out, float s, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(100, s, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_2darray_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2darray_f32
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2darray_f32
+// OGCG: call {{.*}}float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_2darray_f32(float* out, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, s, t, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_2darray_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2darray_v4f32
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.2darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2darray_v4f32
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.2darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_2darray_v4f32(float4* out, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(100, s, t, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @_Z{{.*}}test_image_sample_lz_2darray_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2darray_v4f16
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.2darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG-LABEL: define{{.*}} void @_Z{{.*}}test_image_sample_lz_2darray_v4f16
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.2darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_sample_lz_2darray_v4f16(half4* out, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, s, t, slice, tex, samp, 0, 120, 110);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-image.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-image.hip
new file mode 100644
index 0000000000000..fee405fbc168d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-image.hip
@@ -0,0 +1,122 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir\
+// RUN: -target-cpu gfx1100 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir\
+// RUN: -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11\
+// RUN: -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU image load/store builtins in HIP
+//===----------------------------------------------------------------------===//
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+// CIR-LABEL: @_Z22test_image_load_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @_Z22test_image_load_2d_f32Pfiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z22test_image_load_2d_f32Pfiiu18__amdgpu_texture_t(
+// OGCG: call {{.*}}float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_load_2d_f32(float* out, int x, int y, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2d_f32_i32(15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @_Z24test_image_load_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @_Z24test_image_load_2d_v4f32PDv4_fiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z24test_image_load_2d_v4f32PDv4_fiiu18__amdgpu_texture_t(
+// OGCG: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_load_2d_v4f32(float4* out, int x, int y, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2d_v4f32_i32(15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @_Z24test_image_load_2d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @_Z24test_image_load_2d_v4f16PDv4_DF16_iiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z24test_image_load_2d_v4f16PDv4_DF16_iiu18__amdgpu_texture_t(
+// OGCG: call {{.*}}<4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_load_2d_v4f16(half4* out, int x, int y, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2d_v4f16_i32(15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @_Z23test_image_store_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.float, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @_Z23test_image_store_2d_f32fiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z23test_image_store_2d_f32fiiu18__amdgpu_texture_t(
+// OGCG: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_2d_f32(float val, int x, int y, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2d_f32_i32(val, 15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @_Z25test_image_store_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.vector<!cir.float x 4>, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @_Z25test_image_store_2d_v4f32Dv4_fiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z25test_image_store_2d_v4f32Dv4_fiiu18__amdgpu_texture_t(
+// OGCG: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_2d_v4f32(float4 val, int x, int y, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2d_v4f32_i32(val, 15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @_Z25test_image_store_2d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.vector<!cir.f16 x 4>, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @_Z25test_image_store_2d_v4f16Dv4_DF16_iiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z25test_image_store_2d_v4f16Dv4_DF16_iiu18__amdgpu_texture_t(
+// OGCG: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_2d_v4f16(half4 val, int x, int y, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2d_v4f16_i32(val, 15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @_Z27test_image_load_2darray_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2darray" {{.*}} : (!s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @_Z27test_image_load_2darray_f32Pfiiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z27test_image_load_2darray_f32Pfiiiu18__amdgpu_texture_t(
+// OGCG: call {{.*}}float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_load_2darray_f32(float* out, int x, int y, int slice, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2darray_f32_i32(15, x, y, slice, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @_Z29test_image_load_2darray_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2darray" {{.*}} : (!s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @_Z29test_image_load_2darray_v4f32PDv4_fiiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z29test_image_load_2darray_v4f32PDv4_fiiiu18__amdgpu_texture_t(
+// OGCG: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_load_2darray_v4f32(float4* out, int x, int y, int slice, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2darray_v4f32_i32(15, x, y, slice, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @_Z28test_image_store_2darray_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2darray" {{.*}} : (!cir.float, !s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @_Z28test_image_store_2darray_f32fiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z28test_image_store_2darray_f32fiiiu18__amdgpu_texture_t(
+// OGCG: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_2darray_f32(float val, int x, int y, int slice, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2darray_f32_i32(val, 15, x, y, slice, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @_Z30test_image_store_2darray_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2darray" {{.*}} : (!cir.vector<!cir.float x 4>, !s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @_Z30test_image_store_2darray_v4f32Dv4_fiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @_Z30test_image_store_2darray_v4f32Dv4_fiiiu18__amdgpu_texture_t(
+// OGCG: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_2darray_v4f32(float4 val, int x, int y, int slice, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2darray_v4f32_i32(val, 15, x, y, slice, rsrc, 0, 0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-logb-scalbn.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-logb-scalbn.hip
new file mode 100644
index 0000000000000..2919eb9b80ed3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-logb-scalbn.hip
@@ -0,0 +1,74 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test logb/logbf builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z11test_logbfff
+// CIR: cir.call @logbf({{.*}}) : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} float @_Z11test_logbfff(
+// LLVM: call {{.*}}float @logbf(float %{{.*}})
+// OGCG: define{{.*}} float @_Z11test_logbfff(
+// OGCG: call { float, i32 } @llvm.frexp.f32.i32(float %{{.*}})
+// OGCG: extractvalue { float, i32 } %{{.*}}, 1
+// OGCG: add nsw i32 %{{.*}}, -1
+// OGCG: sitofp i32 %{{.*}} to float
+// OGCG: call {{.*}}float @llvm.fabs.f32(float %{{.*}})
+// OGCG: fcmp {{.*}}one float %{{.*}}, 0x7FF0000000000000
+// OGCG: select {{.*}}i1 %{{.*}}, float %{{.*}}, float %{{.*}}
+// OGCG: fcmp {{.*}}oeq float %{{.*}}, 0.000000e+00
+// OGCG: select {{.*}}i1 %{{.*}}, float 0xFFF0000000000000, float %{{.*}}
+__device__ float test_logbff(float a) {
+  return __builtin_logbf(a);
+}
+
+// CIR-LABEL: @_Z11test_logbddd
+// CIR: cir.call @logb({{.*}}) : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} double @_Z11test_logbddd(
+// LLVM: call {{.*}}double @logb(double %{{.*}})
+// OGCG: define{{.*}} double @_Z11test_logbddd(
+// OGCG: call { double, i32 } @llvm.frexp.f64.i32(double %{{.*}})
+// OGCG: extractvalue { double, i32 } %{{.*}}, 1
+// OGCG: add nsw i32 %{{.*}}, -1
+// OGCG: sitofp i32 %{{.*}} to double
+// OGCG: call {{.*}}double @llvm.fabs.f64(double %{{.*}})
+// OGCG: fcmp {{.*}}one double %{{.*}}, 0x7FF0000000000000
+// OGCG: select {{.*}}i1 %{{.*}}, double %{{.*}}, double %{{.*}}
+// OGCG: fcmp {{.*}}oeq double %{{.*}}, 0.000000e+00
+// OGCG: select {{.*}}i1 %{{.*}}, double 0xFFF0000000000000, double %{{.*}}
+__device__ double test_logbdd(double a) {
+  return __builtin_logb(a);
+}
+
+// CIR-LABEL: @_Z14test_scalbnffifi
+// CIR: cir.call @scalbnf({{.*}}) : (!cir.float, !s32i) -> !cir.float
+// LLVM: define{{.*}} float @_Z14test_scalbnffifi(
+// LLVM: call {{.*}}float @scalbnf(float %{{.*}}, i32 %{{.*}})
+// OGCG: define{{.*}} float @_Z14test_scalbnffifi(
+// OGCG: call {{.*}}float @llvm.ldexp.f32.i32(float %{{.*}}, i32 %{{.*}})
+__device__ float test_scalbnffi(float a, int b) {
+  return __builtin_scalbnf(a, b);
+}
+
+// CIR-LABEL: @_Z14test_scalbnfdidi
+// CIR: cir.call @scalbn({{.*}}) : (!cir.double, !s32i) -> !cir.double
+// LLVM: define{{.*}} double @_Z14test_scalbnfdidi(
+// LLVM: call {{.*}}double @scalbn(double %{{.*}}, i32 %{{.*}})
+// OGCG: define{{.*}} double @_Z14test_scalbnfdidi(
+// OGCG: call {{.*}}double @llvm.ldexp.f64.i32(double %{{.*}}, i32 %{{.*}})
+__device__ double test_scalbnfdi(double a, int b) {
+  return __builtin_scalbn(a, b);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-raw-buffer-atomics.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-raw-buffer-atomics.hip
new file mode 100644
index 0000000000000..d80bcd02f9c16
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-raw-buffer-atomics.hip
@@ -0,0 +1,93 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx90a -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx90a -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx90a -fcuda-is-device \
+// RUN:            -target-feature +atomic-fmin-fmax-global-f32 \
+// RUN:            -target-feature +atomic-fmin-fmax-global-f64 \
+// RUN:            -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test raw buffer atomic builtins
+//===----------------------------------------------------------------------===//
+
+typedef _Float16 __attribute__((ext_vector_type(2))) float16x2_t;
+
+// CIR-LABEL: @_Z19test_atomic_add_i32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.add" {{.*}} : (!s32i, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !s32i
+// LLVM-LABEL: define{{.*}} i32 @_Z19test_atomic_add_i32
+// LLVM: call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} i32 @_Z19test_atomic_add_i32
+// OGCG: call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+__device__ int test_atomic_add_i32(int x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @_Z20test_atomic_fadd_f32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fadd" {{.*}} : (!cir.float, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} float @_Z20test_atomic_fadd_f32
+// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} float @_Z20test_atomic_fadd_f32
+// OGCG: call {{.*}}float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+__device__ float test_atomic_fadd_f32(float x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @_Z22test_atomic_fadd_v2f16
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fadd" {{.*}} : (!cir.vector<!cir.f16 x 2>, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.vector<!cir.f16 x 2>
+// LLVM-LABEL: define{{.*}} <2 x half> @_Z22test_atomic_fadd_v2f16
+// LLVM: call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} <2 x half> @_Z22test_atomic_fadd_v2f16
+// OGCG: call {{.*}}<2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+__device__ float16x2_t test_atomic_fadd_v2f16(float16x2_t x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @_Z20test_atomic_fmin_f32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmin" {{.*}} : (!cir.float, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} float @_Z20test_atomic_fmin_f32
+// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} float @_Z20test_atomic_fmin_f32
+// OGCG: call {{.*}}float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+__device__ float test_atomic_fmin_f32(float x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @_Z20test_atomic_fmin_f64
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmin" {{.*}} : (!cir.double, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.double
+// LLVM-LABEL: define{{.*}} double @_Z20test_atomic_fmin_f64
+// LLVM: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} double @_Z20test_atomic_fmin_f64
+// OGCG: call {{.*}}double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+__device__ double test_atomic_fmin_f64(double x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @_Z20test_atomic_fmax_f32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmax" {{.*}} : (!cir.float, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} float @_Z20test_atomic_fmax_f32
+// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} float @_Z20test_atomic_fmax_f32
+// OGCG: call {{.*}}float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+__device__ float test_atomic_fmax_f32(float x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @_Z20test_atomic_fmax_f64
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmax" {{.*}} : (!cir.double, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.double
+// LLVM-LABEL: define{{.*}} double @_Z20test_atomic_fmax_f64
+// LLVM: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} double @_Z20test_atomic_fmax_f64
+// OGCG: call {{.*}}double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+__device__ double test_atomic_fmax_f64(double x, __amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-raw-buffer.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-raw-buffer.hip
new file mode 100644
index 0000000000000..016fdf75803c3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-raw-buffer.hip
@@ -0,0 +1,145 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu verde -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu verde -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu verde -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test raw buffer load/store builtins
+//===----------------------------------------------------------------------===//
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned int v2u32 __attribute__((ext_vector_type(2)));
+typedef unsigned int v3u32 __attribute__((ext_vector_type(3)));
+typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
+
+// CIR-LABEL: @_Z24test_raw_buffer_store_b8
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u8i, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_store_b8
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_store_b8
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_store_b8(u8 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b8(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z25test_raw_buffer_store_b16
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u16i, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b16
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b16
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_store_b16(u16 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b16(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z25test_raw_buffer_store_b32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u32i, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b32
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b32
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_store_b32(u32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b32(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z25test_raw_buffer_store_b64
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector<!u32i x 2>, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b64
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b64
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_store_b64(v2u32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b64(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z25test_raw_buffer_store_b96
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector<!u32i x 3>, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b96
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_store_b96
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_store_b96(v3u32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b96(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z26test_raw_buffer_store_b128
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector<!u32i x 4>, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @_Z26test_raw_buffer_store_b128
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z26test_raw_buffer_store_b128
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_store_b128(v4u32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b128(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z23test_raw_buffer_load_b8
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !u8i
+// LLVM-LABEL: define{{.*}} void @_Z23test_raw_buffer_load_b8
+// LLVM: call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z23test_raw_buffer_load_b8
+// OGCG: call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_load_b8(u8* out, __amdgpu_buffer_rsrc_t rsrc) {
+  *out = __builtin_amdgcn_raw_buffer_load_b8(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z24test_raw_buffer_load_b16
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !u16i
+// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b16
+// LLVM: call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b16
+// OGCG: call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_load_b16(u16* out, __amdgpu_buffer_rsrc_t rsrc) {
+  *out = __builtin_amdgcn_raw_buffer_load_b16(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z24test_raw_buffer_load_b32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !u32i
+// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b32
+// LLVM: call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b32
+// OGCG: call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_load_b32(u32* out, __amdgpu_buffer_rsrc_t rsrc) {
+  *out = __builtin_amdgcn_raw_buffer_load_b32(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z24test_raw_buffer_load_b64
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.vector<!u32i x 2>
+// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b64
+// LLVM: call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b64
+// OGCG: call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_load_b64(v2u32* out, __amdgpu_buffer_rsrc_t rsrc) {
+  *out = __builtin_amdgcn_raw_buffer_load_b64(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z24test_raw_buffer_load_b96
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.vector<!u32i x 3>
+// LLVM-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b96
+// LLVM: call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z24test_raw_buffer_load_b96
+// OGCG: call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_load_b96(v3u32* out, __amdgpu_buffer_rsrc_t rsrc) {
+  *out = __builtin_amdgcn_raw_buffer_load_b96(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @_Z25test_raw_buffer_load_b128
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.vector<!u32i x 4>
+// LLVM-LABEL: define{{.*}} void @_Z25test_raw_buffer_load_b128
+// LLVM: call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @_Z25test_raw_buffer_load_b128
+// OGCG: call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+__device__ void test_raw_buffer_load_b128(v4u32* out, __amdgpu_buffer_rsrc_t rsrc) {
+  *out = __builtin_amdgcn_raw_buffer_load_b128(rsrc, 0, 0, 0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-vi.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-vi.hip
new file mode 100644
index 0000000000000..c82ed6a3fca2c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn-vi.hip
@@ -0,0 +1,100 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_
+// LLVM: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}})
+// OGCG: define{{.*}} void @_Z18test_div_fixup_f16PDF16_DF16_DF16_DF16_
+// OGCG: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}})
+__device__ void test_div_fixup_f16(_Float16* out, _Float16 a, _Float16 b, _Float16 c) {
+  *out = __builtin_amdgcn_div_fixuph(a, b, c);
+}
+
+// CIR-LABEL: @_Z12test_rcp_f16PDF16_DF16_
+// CIR: cir.llvm.intrinsic "amdgcn.rcp" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z12test_rcp_f16PDF16_DF16_
+// LLVM: call{{.*}} half @llvm.amdgcn.rcp.f16(half %{{.*}})
+// OGCG: define{{.*}} void @_Z12test_rcp_f16PDF16_DF16_
+// OGCG: call{{.*}} half @llvm.amdgcn.rcp.f16(half %{{.*}})
+__device__ void test_rcp_f16(_Float16* out, _Float16 a)
+{
+  *out = __builtin_amdgcn_rcph(a);
+}
+
+// CIR-LABEL: @_Z13test_sqrt_f16PDF16_DF16_
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z13test_sqrt_f16PDF16_DF16_
+// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16(half %{{.*}})
+// OGCG: define{{.*}} void @_Z13test_sqrt_f16PDF16_DF16_
+// OGCG: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16(half %{{.*}})
+__device__ void test_sqrt_f16(_Float16* out, _Float16 a)
+{
+  *out = __builtin_amdgcn_sqrth(a);
+}
+
+// CIR-LABEL: @_Z10test_rsq_hPDF16_DF16_
+// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @_Z10test_rsq_hPDF16_DF16_
+// LLVM: call{{.*}} half @llvm.amdgcn.rsq.f16(half %{{.*}})
+// OGCG: define{{.*}} void @_Z10test_rsq_hPDF16_DF16_
+// OGCG: call{{.*}} half @llvm.amdgcn.rsq.f16(half %{{.*}})
+__device__ void test_rsq_h(_Float16* out, _Float16 a)
+{
+  *out = __builtin_amdgcn_rsqh(a);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn.hip b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn.hip
new file mode 100644
index 0000000000000..47d103631b275
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/builtins-amdgcn.hip
@@ -0,0 +1,429 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tahiti -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z28test_wave_reduce_add_u32_i32Pi
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @_Z28test_wave_reduce_add_u32_i32Pii(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z28test_wave_reduce_add_u32_i32Pii(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_add_u32_i32(int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_add_u32(in, 0);
+}
+
+// CIR-LABEL: @_Z28test_wave_reduce_add_u64_i64Pl
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u64i, !s32i) -> !u64i
+// LLVM: define{{.*}} void @_Z28test_wave_reduce_add_u64_i64Pll(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z28test_wave_reduce_add_u64_i64Pll(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_add_u64_i64(long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_add_u64(in, 0);
+}
+
+// CIR-LABEL: @_Z28test_wave_reduce_sub_u32_i32Pi
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.sub" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @_Z28test_wave_reduce_sub_u32_i32Pii(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z28test_wave_reduce_sub_u32_i32Pii(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_sub_u32_i32(int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_sub_u32(in, 0);
+}
+
+// CIR-LABEL: @_Z28test_wave_reduce_sub_u64_i64Pl
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.sub" {{.*}} : (!u64i, !s32i) -> !u64i
+// LLVM: define{{.*}} void @_Z28test_wave_reduce_sub_u64_i64Pll(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z28test_wave_reduce_sub_u64_i64Pll(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_sub_u64_i64(long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_sub_u64(in, 0);
+}
+
+// CIR-LABEL: @_Z29test_wave_reduce_min_i32_signPii
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.min" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @_Z29test_wave_reduce_min_i32_signPii(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z29test_wave_reduce_min_i32_signPii(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_min_i32_sign(int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_min_i32(in, 0);
+}
+
+// CIR-LABEL: @_Z31test_wave_reduce_min_u32_unsignPjj
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umin" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @_Z31test_wave_reduce_min_u32_unsignPjj(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z31test_wave_reduce_min_u32_unsignPjj(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_min_u32_unsign(unsigned int* out, unsigned int in) {
+  *out = __builtin_amdgcn_wave_reduce_min_u32(in, 0);
+}
+
+// CIR-LABEL: @_Z29test_wave_reduce_min_i64_signPll
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.min" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @_Z29test_wave_reduce_min_i64_signPll(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z29test_wave_reduce_min_i64_signPll(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_min_i64_sign(long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_min_i64(in, 0);
+}
+
+// CIR-LABEL: @_Z31test_wave_reduce_min_u64_unsignPmm
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umin" {{.*}} : (!u64i, !s32i) -> !u64i
+// LLVM: define{{.*}} void @_Z31test_wave_reduce_min_u64_unsignPmm(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z31test_wave_reduce_min_u64_unsignPmm(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_min_u64_unsign(unsigned long* out, unsigned long in) {
+  *out = __builtin_amdgcn_wave_reduce_min_u64(in, 0);
+}
+
+// CIR-LABEL: @_Z29test_wave_reduce_max_i32_signPii
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.max" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @_Z29test_wave_reduce_max_i32_signPii(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z29test_wave_reduce_max_i32_signPii(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_max_i32_sign(int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_max_i32(in, 0);
+}
+
+// CIR-LABEL: @_Z31test_wave_reduce_max_u32_unsignPjj
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umax" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @_Z31test_wave_reduce_max_u32_unsignPjj(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z31test_wave_reduce_max_u32_unsignPjj(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_max_u32_unsign(unsigned int* out, unsigned int in) {
+  *out = __builtin_amdgcn_wave_reduce_max_u32(in, 0);
+}
+
+// CIR-LABEL: @_Z29test_wave_reduce_max_i64_signPll
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.max" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @_Z29test_wave_reduce_max_i64_signPll(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z29test_wave_reduce_max_i64_signPll(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_max_i64_sign(long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_max_i64(in, 0);
+}
+
+// CIR-LABEL: @_Z31test_wave_reduce_max_u64_unsignPmm
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umax" {{.*}} : (!u64i, !s32i) -> !u64i
+// LLVM: define{{.*}} void @_Z31test_wave_reduce_max_u64_unsignPmm(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z31test_wave_reduce_max_u64_unsignPmm(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_max_u64_unsign(unsigned long* out, unsigned long in) {
+  *out = __builtin_amdgcn_wave_reduce_max_u64(in, 0);
+}
+
+// CIR-LABEL: @_Z28test_wave_reduce_and_b32_i32Pii
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.and" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @_Z28test_wave_reduce_and_b32_i32Pii(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z28test_wave_reduce_and_b32_i32Pii(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_and_b32_i32(int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_and_b32(in, 0);
+}
+
+// CIR-LABEL: @_Z28test_wave_reduce_and_b64_i64Pll
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.and" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @_Z28test_wave_reduce_and_b64_i64Pll(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z28test_wave_reduce_and_b64_i64Pll(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_and_b64_i64(long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_and_b64(in, 0);
+}
+
+// CIR-LABEL: @_Z27test_wave_reduce_or_b32_i32Pii
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.or" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @_Z27test_wave_reduce_or_b32_i32Pii(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z27test_wave_reduce_or_b32_i32Pii(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_or_b32_i32(int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_or_b32(in, 0);
+}
+
+// CIR-LABEL: @_Z27test_wave_reduce_or_b64_i64Pll
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.or" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @_Z27test_wave_reduce_or_b64_i64Pll(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z27test_wave_reduce_or_b64_i64Pll(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_or_b64_i64(long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_or_b64(in, 0);
+}
+
+// CIR-LABEL: @_Z28test_wave_reduce_xor_b32_i32Pii
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.xor" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @_Z28test_wave_reduce_xor_b32_i32Pii(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z28test_wave_reduce_xor_b32_i32Pii(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_xor_b32_i32(int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 0);
+}
+
+// CIR-LABEL: @_Z28test_wave_reduce_xor_b64_i64Pll
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.xor" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @_Z28test_wave_reduce_xor_b64_i64Pll(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @_Z28test_wave_reduce_xor_b64_i64Pll(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %{{.*}}, i32 0)
+__device__ void test_wave_reduce_xor_b64_i64(long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 0);
+}
+
+// CIR-LABEL: @_Z38test_wave_reduce_add_u32_iterative_i32Pii
+// CIR: cir.const #cir.int<1> : !s32i
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @_Z38test_wave_reduce_add_u32_iterative_i32Pii(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 1)
+// OGCG: define{{.*}} void @_Z38test_wave_reduce_add_u32_iterative_i32Pii(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 1)
+__device__ void test_wave_reduce_add_u32_iterative_i32(int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_add_u32(in, 1);
+}
+
+// CIR-LABEL: @_Z32test_wave_reduce_add_u32_dpp_i32Pii
+// CIR: cir.const #cir.int<2> : !s32i
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @_Z32test_wave_reduce_add_u32_dpp_i32Pii(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 2)
+// OGCG: define{{.*}} void @_Z32test_wave_reduce_add_u32_dpp_i32Pii(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 2)
+__device__ void test_wave_reduce_add_u32_dpp_i32(int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_add_u32(in, 2);
+}
+
+// CIR-LABEL: @_Z18test_div_scale_f64PdPidd
+// CIR: cir.llvm.intrinsic "amdgcn.div.scale" {{.*}} : (!cir.double, !cir.double, !cir.bool)
+// LLVM: define{{.*}} void @_Z18test_div_scale_f64PdPidd
+// LLVM: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %{{.+}}, double %{{.+}}, i1 true)
+// OGCG: define{{.*}} void @_Z18test_div_scale_f64PdPidd
+// OGCG: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %{{.+}}, double %{{.+}}, i1 true)
+__device__ void test_div_scale_f64(double* out, int* flagout, double a, double b)
+{
+  bool flag;
+  *out = __builtin_amdgcn_div_scale(a, b, true, &flag);
+  *flagout = flag;
+}
+
+// CIR-LABEL: @_Z18test_div_scale_f32PfPbff
+// CIR: cir.llvm.intrinsic "amdgcn.div.scale" {{.*}} : (!cir.float, !cir.float, !cir.bool)
+// LLVM: define{{.*}} void @_Z18test_div_scale_f32PfPbff
+// LLVM: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+// OGCG: define{{.*}} void @_Z18test_div_scale_f32PfPbff
+// OGCG: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+__device__ void test_div_scale_f32(float* out, bool* flagout, float a, float b)
+{
+  bool flag;
+  *out = __builtin_amdgcn_div_scalef(a, b, true, &flag);
+  *flagout = flag;
+}
+
+// CIR-LABEL: @_Z27test_div_scale_f32_with_ptrPfPiPbff
+// CIR: cir.llvm.intrinsic "amdgcn.div.scale" {{.*}} : (!cir.float, !cir.float, !cir.bool)
+// LLVM: define{{.*}} void @_Z27test_div_scale_f32_with_ptrPfPiPbff
+// LLVM: {{.*}}call{{.*}} { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+// OGCG: define{{.*}} void @_Z27test_div_scale_f32_with_ptrPfPiPbff
+// OGCG: {{.*}}call{{.*}} { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+__device__ void test_div_scale_f32_with_ptr(float* out, int* flagout, bool* flag, float a, float b)
+{
+  *out = __builtin_amdgcn_div_scalef(a, b, true, flag);
+}
+
+// CIR-LABEL: @_Z17test_div_fmas_f32Pdfffi
+// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.float, !cir.float, !cir.float, !cir.bool) -> !cir.float
+// LLVM: define{{.*}} void @_Z17test_div_fmas_f32Pdfffi
+// LLVM: call float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}})
+// OGCG: define{{.*}} void @_Z17test_div_fmas_f32Pdfffi
+// OGCG: call {{.*}} float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}})
+__device__ void test_div_fmas_f32(double* out, float a, float b, float c, int d) {
+  *out = __builtin_amdgcn_div_fmasf(a, b, c, d);
+}
+
+// CIR-LABEL: @_Z17test_div_fmas_f64Pddddi
+// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.double, !cir.double, !cir.double, !cir.bool) -> !cir.double
+// LLVM: define{{.*}} void @_Z17test_div_fmas_f64Pddddi
+// LLVM: call double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}})
+// OGCG: define{{.*}} void @_Z17test_div_fmas_f64Pddddi
+// OGCG: call {{.*}} double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}})
+__device__ void test_div_fmas_f64(double* out, double a, double b, double c, int d) {
+  *out = __builtin_amdgcn_div_fmas(a, b, c, d);
+}
+
+// CIR-LABEL: @_Z19test_ds_swizzle_i32Pii
+// CIR: cir.llvm.intrinsic "amdgcn.ds.swizzle" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @_Z19test_ds_swizzle_i32Pii
+// LLVM: call i32 @llvm.amdgcn.ds.swizzle(i32 %{{.*}}, i32 32)
+// OGCG: define{{.*}} void @_Z19test_ds_swizzle_i32Pii
+// OGCG: call i32 @llvm.amdgcn.ds.swizzle(i32 %{{.*}}, i32 32)
+__device__ void test_ds_swizzle_i32(int* out, int a) {
+  *out = __builtin_amdgcn_ds_swizzle(a, 32);
+}
+
+// CIR-LABEL: @_Z13test_readlanePiii
+// CIR: cir.llvm.intrinsic "amdgcn.readlane" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @_Z13test_readlanePiii
+// LLVM: call i32 @llvm.amdgcn.readlane.i32(i32 %{{.*}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @_Z13test_readlanePiii
+// OGCG: call i32 @llvm.amdgcn.readlane.i32(i32 %{{.*}}, i32 %{{.*}})
+__device__ void test_readlane(int* out, int a, int b) {
+  *out = __builtin_amdgcn_readlane(a, b);
+}
+
+// CIR-LABEL: @_Z18test_readfirstlanePii
+// CIR: cir.llvm.intrinsic "amdgcn.readfirstlane" {{.*}} : (!s32i) -> !s32i
+// LLVM: define{{.*}} void @_Z18test_readfirstlanePii
+// LLVM: call i32 @llvm.amdgcn.readfirstlane.i32(i32 %{{.*}})
+// OGCG: define{{.*}} void @_Z18test_readfirstlanePii
+// OGCG: call i32 @llvm.amdgcn.readfirstlane.i32(i32 %{{.*}})
+__device__ void test_readfirstlane(int* out, int a) {
+  *out = __builtin_amdgcn_readfirstlane(a);
+}
+
+// CIR-LABEL: @_Z18test_div_fixup_f32Pffff
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.float, !cir.float, !cir.float) -> !cir.float
+// LLVM: define{{.*}} void @_Z18test_div_fixup_f32Pffff
+// LLVM: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}})
+// OGCG: define{{.*}} void @_Z18test_div_fixup_f32Pffff
+// OGCG: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}})
+__device__ void test_div_fixup_f32(float* out, float a, float b, float c) {
+  *out = __builtin_amdgcn_div_fixupf(a, b, c);
+}
+
+// CIR-LABEL: @_Z18test_div_fixup_f64Pdddd
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.double, !cir.double, !cir.double) -> !cir.double
+// LLVM: define{{.*}} void @_Z18test_div_fixup_f64Pdddd
+// LLVM: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}})
+// OGCG: define{{.*}} void @_Z18test_div_fixup_f64Pdddd
+// OGCG: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}})
+__device__ void test_div_fixup_f64(double* out, double a, double b, double c) {
+  *out = __builtin_amdgcn_div_fixup(a, b, c);
+}
+
+// CIR-LABEL: @_Z19test_trig_preop_f32Pffi
+// CIR: cir.llvm.intrinsic "amdgcn.trig.preop" {{.*}} : (!cir.float, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @_Z19test_trig_preop_f32Pffi
+// LLVM: call{{.*}} float @llvm.amdgcn.trig.preop.f32(float %{{.+}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @_Z19test_trig_preop_f32Pffi
+// OGCG: call{{.*}} float @llvm.amdgcn.trig.preop.f32(float %{{.+}}, i32 %{{.*}})
+__device__ void test_trig_preop_f32(float* out, float a, int b) {
+  *out = __builtin_amdgcn_trig_preopf(a, b);
+}
+
+// CIR-LABEL: @_Z19test_trig_preop_f64Pddi
+// CIR: cir.llvm.intrinsic "amdgcn.trig.preop" {{.*}} : (!cir.double, !s32i) -> !cir.double
+// LLVM: define{{.*}} void @_Z19test_trig_preop_f64Pddi
+// LLVM: call{{.*}} double @llvm.amdgcn.trig.preop.f64(double %{{.+}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @_Z19test_trig_preop_f64Pddi
+// OGCG: call{{.*}} double @llvm.amdgcn.trig.preop.f64(double %{{.+}}, i32 %{{.*}})
+__device__ void test_trig_preop_f64(double* out, double a, int b) {
+  *out = __builtin_amdgcn_trig_preop(a, b);
+}
+
+// CIR-LABEL: @_Z12test_rcp_f32Pff
+// CIR: cir.llvm.intrinsic "amdgcn.rcp" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @_Z12test_rcp_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.rcp.f32(float %{{.*}})
+// OGCG: define{{.*}} void @_Z12test_rcp_f32Pff
+// OGCG: call{{.*}} float @llvm.amdgcn.rcp.f32(float %{{.*}})
+__device__ void test_rcp_f32(float* out, float a) {
+  *out = __builtin_amdgcn_rcpf(a);
+}
+
+// CIR-LABEL: @_Z12test_rcp_f64Pdd
+// CIR: cir.llvm.intrinsic "amdgcn.rcp" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @_Z12test_rcp_f64Pdd
+// LLVM: call{{.*}} double @llvm.amdgcn.rcp.f64(double %{{.*}})
+// OGCG: define{{.*}} void @_Z12test_rcp_f64Pdd
+// OGCG: call{{.*}} double @llvm.amdgcn.rcp.f64(double %{{.*}})
+__device__ void test_rcp_f64(double* out, double a) {
+  *out = __builtin_amdgcn_rcp(a);
+}
+
+// CIR-LABEL: @_Z13test_sqrt_f32Pff
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @_Z13test_sqrt_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
+// OGCG: define{{.*}} void @_Z13test_sqrt_f32Pff
+// OGCG: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
+__device__ void test_sqrt_f32(float* out, float a) {
+  *out = __builtin_amdgcn_sqrtf(a);
+}
+
+// CIR-LABEL: @_Z13test_sqrt_f64Pdd
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @_Z13test_sqrt_f64Pdd
+// LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
+// OGCG: define{{.*}} void @_Z13test_sqrt_f64Pdd
+// OGCG: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
+__device__ void test_sqrt_f64(double* out, double a) {
+  *out = __builtin_amdgcn_sqrt(a);
+}
+
+// CIR-LABEL: @_Z12test_rsq_f32Pff
+// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @_Z12test_rsq_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.rsq.f32(float %{{.*}})
+// OGCG: define{{.*}} void @_Z12test_rsq_f32Pff
+// OGCG: call{{.*}} float @llvm.amdgcn.rsq.f32(float %{{.*}})
+__device__ void test_rsq_f32(float* out, float a)
+{
+  *out = __builtin_amdgcn_rsqf(a);
+}
+
+// CIR-LABEL: @_Z12test_rsq_f64Pdd
+// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @_Z12test_rsq_f64Pdd
+// LLVM: call{{.*}} double @llvm.amdgcn.rsq.f64(double %{{.*}})
+// OGCG: define{{.*}} void @_Z12test_rsq_f64Pdd
+// OGCG: call{{.*}} double @llvm.amdgcn.rsq.f64(double %{{.*}})
+__device__ void test_rsq_f64(double* out, double a) {
+  *out = __builtin_amdgcn_rsq(a);
+}
+
+// CIR-LABEL: @_Z18test_rsq_clamp_f32Pff
+// CIR: cir.llvm.intrinsic "amdgcn.rsq.clamp" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @_Z18test_rsq_clamp_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.rsq.clamp.f32(float %{{.*}})
+// OGCG: define{{.*}} void @_Z18test_rsq_clamp_f32Pff
+// OGCG: call{{.*}} float @llvm.amdgcn.rsq.clamp.f32(float %{{.*}})
+__device__ void test_rsq_clamp_f32(float* out, float a)
+{
+  *out = __builtin_amdgcn_rsq_clampf(a);
+}
+
+// CIR-LABEL: @_Z18test_rsq_clamp_f64Pdd
+// CIR: cir.llvm.intrinsic "amdgcn.rsq.clamp" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @_Z18test_rsq_clamp_f64Pdd
+// LLVM: call{{.*}} double @llvm.amdgcn.rsq.clamp.f64(double %{{.*}})
+// OGCG: define{{.*}} void @_Z18test_rsq_clamp_f64Pdd
+// OGCG: call{{.*}} double @llvm.amdgcn.rsq.clamp.f64(double %{{.*}})
+__device__ void test_rsq_clamp_f64(double* out, double a) {
+  *out = __builtin_amdgcn_rsq_clamp(a);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/calling-conv-lowering-amdgpu.hip b/clang/test/CIR/Incubator/CodeGen/HIP/calling-conv-lowering-amdgpu.hip
new file mode 100644
index 0000000000000..ac69eb451e4eb
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/calling-conv-lowering-amdgpu.hip
@@ -0,0 +1,42 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:   -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:   -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll
+
+// Test that AMDGPU kernel calling convention is correctly propagated
+// from CIR through LLVM dialect to LLVM IR
+
+// CIR: cir.func{{.*}}@kernel_simple(){{.*}}cc(amdgpu_kernel)
+// LLVM: define{{.*}}amdgpu_kernel void @kernel_simple()
+extern "C" __global__ void kernel_simple() {}
+
+// CIR: cir.func{{.*}}@kernel_with_arg(%arg{{[0-9]+}}: !s32i{{.*}}){{.*}}cc(amdgpu_kernel)
+// LLVM: define{{.*}}amdgpu_kernel void @kernel_with_arg(i32{{.*}}%{{[0-9]+}})
+extern "C" __global__ void kernel_with_arg(int x) {}
+
+// Test device function
+// CIR: cir.func{{.*}}@device_func(%arg{{[0-9]+}}: !s32i{{.*}})
+// CIR-NOT: cc(amdgpu_kernel)
+// LLVM: define{{.*}}void @device_func(i32{{.*}}%{{[0-9]+}})
+extern "C" __device__ void device_func(int x) {}
+
+// Test kernel with multiple arguments
+// CIR: cir.func{{.*}}@kernel_multi_arg(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}){{.*}}cc(amdgpu_kernel)
+// LLVM: define{{.*}}amdgpu_kernel void @kernel_multi_arg(i32{{.*}}%{{[0-9]+}}, float{{.*}}%{{[0-9]+}})
+extern "C" __global__ void kernel_multi_arg(int a, float b) {}
+
+// Test that kernel can call device functions
+// CIR: cir.func{{.*}}@kernel_calls_device(){{.*}}cc(amdgpu_kernel)
+// LLVM: define{{.*}}amdgpu_kernel void @kernel_calls_device()
+extern "C" __global__ void kernel_calls_device() {
+  // CIR: cir.call @device_func{{.*}}(!s32i)
+  // LLVM: call{{.*}}void @device_func(i32{{.*}}42)
+  device_func(42);
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/global-vars.cpp b/clang/test/CIR/Incubator/CodeGen/HIP/global-vars.cpp
new file mode 100644
index 0000000000000..2b4c5575feb1e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/global-vars.cpp
@@ -0,0 +1,101 @@
+#include "cuda.h"
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \
+// RUN:            -fcuda-is-device -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ -x hip %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ -x hip %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x hip -emit-cir -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x hip -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:            -x hip -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN:            -fcuda-is-device -emit-llvm -target-sdk-version=12.3 \
+// RUN:            -I%S/../Inputs/ -x hip %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+__shared__ int shared;
+// CIR-DEVICE: cir.global external{{.*}}lang_address_space(offload_local) @shared = #cir.undef
+// LLVM-DEVICE: @shared = addrspace(3) global i32 undef, align 4
+// CIR-HOST: cir.global{{.*}}@shared = #cir.undef : !s32i {alignment = 4 : i64}
+// CIR-HOST-NOT: cu.shadow_name
+// LLVM-HOST: @shared = internal global i32 undef, align 4
+// OGCG-HOST: @shared = internal global i32
+// OGCG-DEVICE: @shared = addrspace(3) global i32 undef, align 4
+
+__constant__ int b;
+// CIR-DEVICE: cir.global constant external{{.*}}lang_address_space(offload_constant) @b = #cir.int<0> : !s32i {alignment = 4 : i64, cu.externally_initialized = #cir.cu.externally_initialized, cu.var_registration = #cir.cu.var_registration<Variable, constant>}
+// LLVM-DEVICE: @b = addrspace(4) externally_initialized constant i32 0, align 4
+// CIR-HOST: cir.global{{.*}}"private"{{.*}}internal{{.*}}@b = #cir.undef : !s32i {alignment = 4 : i64, cu.shadow_name = #cir.cu.shadow_name<b>, cu.var_registration = #cir.cu.var_registration<Variable, constant>}
+// LLVM-HOST: @b = internal global i32 undef, align 4
+// OGCG-HOST: @b = internal global i32
+// OGCG-DEVICE: @b = addrspace(4) externally_initialized constant i32 0, align 4
+
+// External device variables should remain external on host side (they're just declarations)
+// Note: External declarations may not appear in output if they're not referenced
+extern __device__ int ext_device_var;
+// CIR-HOST-NOT: cir.global{{.*}}@ext_device_var
+// LLVM-HOST-NOT: @ext_device_var
+// OGCG-HOST-NOT: @ext_device_var
+// OGCG-DEVICE-NOT: @ext_device_var
+
+extern __constant__ int ext_constant_var;
+// CIR-HOST-NOT: cir.global{{.*}}@ext_constant_var
+// LLVM-HOST-NOT: @ext_constant_var
+// OGCG-HOST-NOT: @ext_constant_var
+// OGCG-DEVICE-NOT: @ext_constant_var
+
+// External device variables with definitions should be internal on host
+extern __device__ int ext_device_var_def;
+__device__ int ext_device_var_def = 1;
+// CIR-DEVICE: cir.global external{{.*}}lang_address_space(offload_global) @ext_device_var_def = #cir.int<1>
+// LLVM-DEVICE: @ext_device_var_def = addrspace(1) externally_initialized global i32 1, align 4
+// CIR-HOST: cir.global{{.*}}"private"{{.*}}internal{{.*}}@ext_device_var_def = #cir.undef : !s32i {alignment = 4 : i64, cu.shadow_name = #cir.cu.shadow_name<ext_device_var_def>, cu.var_registration = #cir.cu.var_registration<Variable>}
+// LLVM-HOST: @ext_device_var_def = internal global i32 undef, align 4
+// OGCG-HOST: @ext_device_var_def = internal global i32
+// OGCG-DEVICE: @ext_device_var_def = addrspace(1) externally_initialized global i32 1, align 4
+
+extern __constant__ int ext_constant_var_def;
+__constant__ int ext_constant_var_def = 2;
+// CIR-DEVICE: cir.global constant external{{.*}}lang_address_space(offload_constant) @ext_constant_var_def = #cir.int<2>
+// LLVM-DEVICE: @ext_constant_var_def = addrspace(4) externally_initialized constant i32 2, align 4
+// OGCG-DEVICE: @ext_constant_var_def = addrspace(4) externally_initialized constant i32 2, align 4
+// CIR-HOST: cir.global{{.*}}"private"{{.*}}internal{{.*}}@ext_constant_var_def = #cir.undef : !s32i {alignment = 4 : i64, cu.shadow_name = #cir.cu.shadow_name<ext_constant_var_def>, cu.var_registration = #cir.cu.var_registration<Variable, constant>}
+// LLVM-HOST: @ext_constant_var_def = internal global i32 undef, align 4
+// OGCG-HOST: @ext_constant_var_def = internal global i32
+
+// Regular host variables should NOT be internalized
+int host_var;
+// CIR-HOST: cir.global external @host_var = #cir.int<0> : !s32i
+// LLVM-HOST: @host_var = global i32 0, align 4
+// OGCG-HOST: @host_var ={{.*}} global i32
+
+// CIR-DEVICE-NOT: cir.global{{.*}}@host_var
+// LLVM-DEVICE-NOT: @host_var
+// OGCG-DEVICE-NOT: @host_var
+
+// External host variables should remain external (may not appear if not referenced)
+extern int ext_host_var;
+// CIR-HOST-NOT: cir.global{{.*}}@ext_host_var
+// LLVM-HOST-NOT: @ext_host_var
+// OGCG-HOST-NOT: @ext_host_var
+
+// CIR-DEVICE-NOT: cir.global{{.*}}@ext_host_var
+// LLVM-DEVICE-NOT: @ext_host_var
+// OGCG-DEVICE-NOT: @ext_host_var
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/hip-cuid.hip b/clang/test/CIR/Incubator/CodeGen/HIP/hip-cuid.hip
new file mode 100644
index 0000000000000..12faa6e159c33
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/hip-cuid.hip
@@ -0,0 +1,26 @@
+#include "../Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=OGCG %s --input-file=%t.ogcg.ll
+
+// Test that HIP compiler unit ID global is emitted
+
+// CIR: cir.global external lang_address_space(offload_global) @__hip_cuid_{{.*}} = #cir.int<0> : !u8i
+
+// LLVM: @__hip_cuid_{{.*}} = addrspace(1) global i8 0
+// LLVM: @llvm.compiler.used = {{.*}}@__hip_cuid_
+
+// OGCG: @__hip_cuid_{{.*}} = addrspace(1) global i8 0
+// OGCG: @llvm.compiler.used = {{.*}}@__hip_cuid_
+
+__global__ void kernel() {}
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/ptr-diff.cpp b/clang/test/CIR/Incubator/CodeGen/HIP/ptr-diff.cpp
new file mode 100644
index 0000000000000..13593935c840f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/ptr-diff.cpp
@@ -0,0 +1,60 @@
+#include "cuda.h"
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-cir %s -o %t.ll
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip  \
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+__device__ int ptr_diff() {
+  const char c_str[] = "c-string"; 
+  const char* len =  c_str;  
+  return c_str - len;
+}
+
+
+// CIR-DEVICE: %[[#LenLocalAlloca:]] = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>, lang_address_space(offload_private)>, ["len", init]
+// CIR-DEVICE: %[[#LenLocalAddr:]] = cir.cast address_space %[[#LenLocalAlloca]] : !cir.ptr<!cir.ptr<!s8i>, lang_address_space(offload_private)> -> !cir.ptr<!cir.ptr<!s8i>>
+// CIR-DEVICE: %[[#GlobalPtr:]] = cir.get_global @_ZZ8ptr_diffvE5c_str : !cir.ptr<!cir.array<!s8i x 9>, lang_address_space(offload_constant)>
+// CIR-DEVICE: %[[#CastDecay:]] = cir.cast array_to_ptrdecay %[[#GlobalPtr]] : !cir.ptr<!cir.array<!s8i x 9>, lang_address_space(offload_constant)>
+// CIR-DEVICE: %[[#LenLocalAddrCast:]] = cir.cast bitcast %[[#LenLocalAddr]] : !cir.ptr<!cir.ptr<!s8i>> -> !cir.ptr<!cir.ptr<!s8i, lang_address_space(offload_constant)>>
+// CIR-DEVICE: cir.store align(8) %[[#CastDecay]], %[[#LenLocalAddrCast]] : !cir.ptr<!s8i, lang_address_space(offload_constant)>, !cir.ptr<!cir.ptr<!s8i, lang_address_space(offload_constant)>>
+// CIR-DEVICE: %[[#CStr:]] = cir.cast array_to_ptrdecay %[[#GlobalPtr]] : !cir.ptr<!cir.array<!s8i x 9>, lang_address_space(offload_constant)> -> !cir.ptr<!s8i, lang_address_space(offload_constant)>
+// CIR-DEVICE: %[[#LoadedLenAddr:]] = cir.load align(8) %[[#LenLocalAddr]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CIR-DEVICE: %[[#AddrCast:]] = cir.cast address_space %[[#LoadedLenAddr]] : !cir.ptr<!s8i> -> !cir.ptr<!s8i, lang_address_space(offload_constant)>
+// CIR-DEVICE: %[[#DIFF:]] = cir.ptr_diff %[[#CStr]], %[[#AddrCast]] : !cir.ptr<!s8i, lang_address_space(offload_constant)>
+
+// LLVM-DEVICE: define dso_local i32 @_Z8ptr_diffv()
+// LLVM-DEVICE: %[[#RetvalAddr:]] = alloca i32, i64 1, align 4, addrspace(5)
+// LLVM-DEVICE: %[[#LenLocalAddr:]] = alloca ptr, i64 1, align 8, addrspace(5)
+// LLVM-DEVICE: %[[#LenLocalAddrCast:]] = addrspacecast ptr addrspace(5) %[[#LenLocalAddr]] to ptr
+// LLVM-DEVICE: store ptr addrspace(4) @_ZZ8ptr_diffvE5c_str, ptr %[[#LenLocalAddrCast]], align 8
+// LLVM-DEVICE: %[[#LoadedAddr:]] = load ptr, ptr %[[#LenLocalAddrCast]], align 8
+// LLVM-DEVICE: %[[#CastedVal:]] = addrspacecast ptr %[[#LoadedAddr]] to ptr addrspace(4)
+// LLVM-DEVICE: %[[#IntVal:]] = ptrtoint ptr addrspace(4) %[[#CastedVal]] to i64
+// LLVM-DEVICE: %[[#SubVal:]] = sub i64 ptrtoint (ptr addrspace(4) @_ZZ8ptr_diffvE5c_str to i64), %[[#IntVal]]
+
+// OGCG-DEVICE: define dso_local noundef i32 @_Z8ptr_diffv() #0
+// OGCG-DEVICE: %[[RETVAL:.*]] = alloca i32, align 4, addrspace(5)
+// OGCG-DEVICE: %[[C_STR:.*]] = alloca [9 x i8], align 1, addrspace(5)
+// OGCG-DEVICE: %[[LEN:.*]] = alloca ptr, align 8, addrspace(5)
+// OGCG-DEVICE: %[[RETVAL_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[RETVAL]] to ptr
+// OGCG-DEVICE: %[[C_STR_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[C_STR]] to ptr
+// OGCG-DEVICE: %[[LEN_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[LEN]] to ptr
+// OGCG-DEVICE: %[[ARRAYDECAY:.*]] = getelementptr inbounds [9 x i8], ptr %[[C_STR_ASCAST]], i64 0, i64 0
+// OGCG-DEVICE: store ptr %[[ARRAYDECAY]], ptr %[[LEN_ASCAST]], align 8
+// OGCG-DEVICE: %[[ARRAYDECAY1:.*]] = getelementptr inbounds [9 x i8], ptr %[[C_STR_ASCAST]], i64 0, i64 0
+// OGCG-DEVICE: %[[LOADED:.*]] = load ptr, ptr %[[LEN_ASCAST]], align 8
+// OGCG-DEVICE: %[[LHS:.*]] = ptrtoint ptr %[[ARRAYDECAY1]] to i64
+// OGCG-DEVICE: %[[RHS:.*]] = ptrtoint ptr %[[LOADED]] to i64
+// OGCG-DEVICE: %[[SUB:.*]] = sub i64 %[[LHS]], %[[RHS]]
+// OGCG-DEVICE: %[[CONV:.*]] = trunc i64 %[[SUB]] to i32
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/registration.cpp b/clang/test/CIR/Incubator/CodeGen/HIP/registration.cpp
new file mode 100644
index 0000000000000..260d9e2304c3f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/registration.cpp
@@ -0,0 +1,262 @@
+#include "../Inputs/cuda.h"
+
+// RUN: echo "sample fatbin" > %t.fatbin
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x hip -emit-cir -fhip-new-launch-api -I%S/../Inputs/ \
+// RUN:            -fcuda-include-gpubinary %t.fatbin \
+// RUN:            %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x hip -emit-llvm -fhip-new-launch-api  -I%S/../Inputs/ \
+// RUN:            -fcuda-include-gpubinary %t.fatbin \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// OGCG emits LLVM IR in different order than clangir, we add at the end the order of OGCG.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu  \
+// RUN:            -x hip -emit-llvm -fhip-new-launch-api  -I%S/../Inputs/ \
+// RUN:            -fcuda-include-gpubinary %t.fatbin \
+// RUN:            %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+
+// CIR-HOST: module @"{{.*}}" attributes {
+// CIR-HOST:   cir.cu.binary_handle = #cir.cu.binary_handle<{{.*}}.fatbin>,
+// CIR-HOST:   cir.global_ctors = [#cir.global_ctor<"__hip_module_ctor", {{[0-9]+}}>]
+// CIR-HOST: }
+
+// LLVM-HOST: @.strb0 = private constant [2 x i8] c"b\00"
+// LLVM-HOST: @.stra1 = private constant [2 x i8] c"a\00"
+// LLVM-HOST: @.str_Z2fnv = private constant [7 x i8] c"_Z2fnv\00"
+// LLVM-HOST: @__hip_fatbin_str = private constant [14 x i8] c"sample fatbin\0A", section ".hip_fatbin"
+// LLVM-HOST: @__hip_fatbin_wrapper = internal constant {
+// LLVM-HOST:   i32 1212764230, i32 1, ptr @__hip_fatbin_str, ptr null
+// LLVM-HOST: }, section ".hipFatBinSegment"
+// LLVM-HOST: @_Z2fnv = constant ptr @_Z17__device_stub__fnv, align 8
+// LLVM-HOST: @a = internal global i32 undef, align 4
+// LLVM-HOST: @b = internal global i32 undef, align 4
+// LLVM-HOST: @llvm.global_ctors = {{.*}}ptr @__hip_module_ctor
+
+// CIR-HOST:  cir.func internal private @__hip_module_dtor() {
+// CIR-HOST:   %[[#HandleGlobal:]] = cir.get_global @__hip_gpubin_handle
+// CIR-HOST:   %[[#HandleAddr:]] = cir.load %[[#HandleGlobal]] : !cir.ptr<!cir.ptr<!cir.ptr<!void>>>, !cir.ptr<!cir.ptr<!void>> loc(#loc)
+// CIR-HOST:   %[[#NullVal:]] = cir.const #cir.ptr<null> : !cir.ptr<!cir.ptr<!void>> loc(#loc)
+// CIR-HOST:   %3 = cir.cmp(ne, %[[#HandleAddr]], %[[#NullVal]]) : !cir.ptr<!cir.ptr<!void>>, !cir.bool loc(#loc)
+// CIR-HOST:    cir.brcond %3 ^bb1, ^bb2 loc(#loc)
+// CIR-HOST:  ^bb1:
+// CIR-HOST:    cir.call @__hipUnregisterFatBinary(%[[#HandleAddr]]) : (!cir.ptr<!cir.ptr<!void>>) -> () loc(#loc)
+// CIR-HOST:    %[[#HandleAddr:]] = cir.get_global @__hip_gpubin_handle : !cir.ptr<!cir.ptr<!cir.ptr<!void>>> loc(#loc)
+// CIR-HOST:    cir.store %[[#NullVal]], %[[#HandleAddr]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!cir.ptr<!cir.ptr<!void>>> loc(#loc)
+// CIR-HOST:    cir.br ^bb2 loc(#loc)
+// CIR-HOST:  ^bb2:  // 2 preds: ^bb0, ^bb1
+// CIR-HOST:    cir.return loc(#loc)
+// CIR-HOST:  } loc(#loc)
+
+// LLVM-HOST: define internal void @__hip_module_dtor() {
+// LLVM-HOST:    %[[#LLVMHandleVar:]] = load ptr, ptr @__hip_gpubin_handle, align 8
+// LLVM-HOST:    %[[#ICMP:]] = icmp ne ptr %[[#LLVMHandleVar]], null
+// LLVM-HOST:    br i1 %[[#ICMP]], label %[[IFBLOCK:[^,]+]], label %[[EXITBLOCK:[^,]+]]
+// LLVM-HOST:  [[IFBLOCK]]:                                               ; preds = %0
+// LLVM-HOST:    call void @__hipUnregisterFatBinary(ptr %[[#LLVMHandleVar]])
+// LLVM-HOST:    store ptr null, ptr @__hip_gpubin_handle, align 8
+// LLVM-HOST:    br label %[[EXITBLOCK]]
+// LLVM-HOST:  [[EXITBLOCK]]:                                             ; preds = %[[IFBLOCK]], %0
+// LLVM-HOST:    ret void
+// LLVM-HOST:  }
+
+// CIR-HOST: cir.global "private" constant cir_private @".str_Z2fnv" =
+// CIR-HOST-SAME: #cir.const_array<"_Z2fnv", trailing_zeros>
+ 
+__global__ void fn() {}
+
+
+__device__ int a;
+__constant__ int b;
+
+// CIR-HOST: cir.func internal private @__hip_register_globals(%[[FatbinHandle:[a-zA-Z0-9]+]]{{.*}}) {
+// CIR-HOST:   %[[#NULL:]] = cir.const #cir.ptr<null>
+// CIR-HOST:   %[[#T1:]] = cir.get_global @".str_Z2fnv"
+// CIR-HOST:   %[[#DeviceFn:]] = cir.cast bitcast %[[#T1]]
+// CIR-HOST:   %[[#T2:]] = cir.get_global @_Z2fnv
+// CIR-HOST:   %[[#HostFnHandle:]] = cir.cast bitcast %[[#T2]]
+// CIR-HOST:   %[[#MinusOne:]] = cir.const #cir.int<-1>
+// CIR-HOST:   cir.call @__hipRegisterFunction(
+// CIR-HOST-SAME: %[[FatbinHandle]],
+// CIR-HOST-SAME: %[[#HostFnHandle]],
+// CIR-HOST-SAME: %[[#DeviceFn]],
+// CIR-HOST-SAME: %[[#DeviceFn]],
+// CIR-HOST-SAME: %[[#MinusOne]],
+// CIR-HOST-SAME: %[[#NULL]], %[[#NULL]], %[[#NULL]], %[[#NULL]], %[[#NULL]])
+// Registration for __constant__ int b (isConstant=1):
+// CIR-HOST: %[[#T3:]] = cir.get_global @".strb0"
+// CIR-HOST: %[[#DeviceB:]] = cir.cast bitcast %[[#T3]]
+// CIR-HOST: %[[#T4:]] = cir.get_global @b
+// CIR-HOST: %[[#HostB:]] = cir.cast bitcast %[[#T4]]
+// CIR-HOST: %[[#ExtB:]] = cir.const #cir.int<0>
+// CIR-HOST: %[[#SzB:]] = cir.const #cir.int<4>
+// CIR-HOST: %[[#ConstB:]] = cir.const #cir.int<1>
+// CIR-HOST: %[[#ZeroB:]] = cir.const #cir.int<0>
+// CIR-HOST: cir.call @__hipRegisterVar(%[[FatbinHandle]],
+// CIR-HOST-SAME: %[[#HostB]],
+// CIR-HOST-SAME: %[[#DeviceB]],
+// CIR-HOST-SAME: %[[#DeviceB]],
+// CIR-HOST-SAME: %[[#ExtB]],
+// CIR-HOST-SAME: %[[#SzB]],
+// CIR-HOST-SAME: %[[#ConstB]],
+// CIR-HOST-SAME: %[[#ZeroB]])
+//
+// Registration for __device__ int a (isConstant=0):
+// CIR-HOST: %[[#T5:]] = cir.get_global @".stra1"
+// CIR-HOST: %[[#DeviceA:]] = cir.cast bitcast %[[#T5]]
+// CIR-HOST: %[[#T6:]] = cir.get_global @a
+// CIR-HOST: %[[#HostA:]] = cir.cast bitcast %[[#T6]]
+// CIR-HOST: %[[#ExtA:]] = cir.const #cir.int<0>
+// CIR-HOST: %[[#SzA:]] = cir.const #cir.int<4>
+// CIR-HOST: %[[#ConstA:]] = cir.const #cir.int<0>
+// CIR-HOST: %[[#ZeroA:]] = cir.const #cir.int<0>
+// CIR-HOST: cir.call @__hipRegisterVar(%[[FatbinHandle]],
+// CIR-HOST-SAME: %[[#HostA]],
+// CIR-HOST-SAME: %[[#DeviceA]],
+// CIR-HOST-SAME: %[[#DeviceA]],
+// CIR-HOST-SAME: %[[#ExtA]],
+// CIR-HOST-SAME: %[[#SzA]],
+// CIR-HOST-SAME: %[[#ConstA]],
+// CIR-HOST-SAME: %[[#ZeroA]])
+// CIR-HOST: cir.return loc(#loc)
+// CIR-HOST: }
+
+// LLVM-HOST: define internal void @__hip_register_globals(ptr %[[#LLVMFatbin:]]) {
+// LLVM-HOST:   call i32 @__hipRegisterFunction(
+// LLVM-HOST-SAME: ptr %[[#LLVMFatbin]],
+// LLVM-HOST-SAME: ptr @_Z2fnv,
+// LLVM-HOST-SAME: ptr @.str_Z2fnv,
+// LLVM-HOST-SAME: ptr @.str_Z2fnv,
+// LLVM-HOST-SAME: i32 -1,
+// LLVM-HOST-SAME: ptr null, ptr null, ptr null, ptr null, ptr null)
+// LLVM-HOST:   call void @__hipRegisterVar(
+// LLVM-HOST-SAME: ptr %0, ptr @b, ptr @.strb0, ptr @.strb0,
+// LLVM-HOST-SAME: i32 0, i64 4, i32 1, i32 0)
+// LLVM-HOST:   call void @__hipRegisterVar(
+// LLVM-HOST-SAME: ptr %0, ptr @a, ptr @.stra1, ptr @.stra1,
+// LLVM-HOST-SAME: i32 0, i64 4, i32 0, i32 0)
+// LLVM-HOST: }
+
+// The content in const array should be the same as echoed above,
+// with a trailing line break ('\n', 0x0A).
+// CIR-HOST: cir.global "private" constant cir_private @__hip_fatbin_str =
+// CIR-HOST-SAME: #cir.const_array<"sample fatbin\0A">
+// CIR-HOST-SAME: {{.*}}section = ".hip_fatbin"
+
+// The first value is HIP file head magic number.
+// CIR-HOST: cir.global "private" constant internal @__hip_fatbin_wrapper
+// CIR-HOST: = #cir.const_record<{
+// CIR-HOST:   #cir.int<1212764230> : !s32i,
+// CIR-HOST:   #cir.int<1> : !s32i,
+// CIR-HOST:   #cir.global_view<@__hip_fatbin_str> : !cir.ptr<!void>,
+// CIR-HOST:   #cir.ptr<null> : !cir.ptr<!void>
+// CIR-HOST: }>
+// CIR-HOST-SAME: {{.*}}section = ".hipFatBinSegment"
+
+// CIR-HOST: cir.func internal private @__hip_module_ctor() {
+// CIR-HOST:   %[[#HandleGlobalVar:]] = cir.get_global @__hip_gpubin_handle : !cir.ptr<!cir.ptr<!cir.ptr<!void>>> loc(#loc)
+// CIR-HOST:   %[[#HandleAddr:]] = cir.load %[[#HandleGlobalVar]] : !cir.ptr<!cir.ptr<!cir.ptr<!void>>>, !cir.ptr<!cir.ptr<!void>> loc(#loc)
+// CIR-HOST:   %[[#NullVal:]] = cir.const #cir.ptr<null> : !cir.ptr<!cir.ptr<!void>> loc(#loc)
+// CIR-HOST:   %[[#ICMP:]] = cir.cmp(eq, %[[#HandleAddr]], %[[#NullVal]]) : !cir.ptr<!cir.ptr<!void>>, !cir.bool loc(#loc)
+// CIR-HOST:   cir.brcond %[[#ICMP]] ^bb1, ^bb2 loc(#loc)
+// CIR-HOST: ^bb1:
+// CIR-HOST:   %[[#FatBinWrapper:]] = cir.get_global @__hip_fatbin_wrapper : !cir.ptr<!rec_anon_struct> loc(#loc)
+// CIR-HOST:   %[[#CastGlobalFatBin:]] = cir.cast bitcast %[[#FatBinWrapper]] : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!void> loc(#loc)
+// CIR-HOST:   %[[#RTVal:]] = cir.call @__hipRegisterFatBinary(%[[#CastGlobalFatBin]]) : (!cir.ptr<!void>) -> !cir.ptr<!cir.ptr<!void>> loc(#loc)
+// CIR-HOST:   %[[#HandleGlobalVar:]] = cir.get_global @__hip_gpubin_handle : !cir.ptr<!cir.ptr<!cir.ptr<!void>>> loc(#loc)
+// CIR-HOST:   cir.store %[[#RTVal]], %[[#HandleGlobalVar]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!cir.ptr<!cir.ptr<!void>>> loc(#loc)
+// CIR-HOST:   cir.br ^bb2 loc(#loc)
+// CIR-HOST: ^bb2:
+// CIR-HOST:   %[[#HandleGlobalVar:]] = cir.get_global @__hip_gpubin_handle : !cir.ptr<!cir.ptr<!cir.ptr<!void>>> loc(#loc)
+// CIR-HOST:   %[[#HandleVal:]] = cir.load %8 : !cir.ptr<!cir.ptr<!cir.ptr<!void>>>, !cir.ptr<!cir.ptr<!void>> loc(#loc)
+// CIR-HOST:   cir.call @__hip_register_globals(%[[#HandleVal]]) : (!cir.ptr<!cir.ptr<!void>>) -> () loc(#loc)
+// CIR-HOST:   %[[#DTOR:]] = cir.get_global @__hip_module_dtor : !cir.ptr<!cir.func<()>> loc(#loc)
+// CIR-HOST:   %11 = cir.call @atexit(%[[#DTOR]]) : (!cir.ptr<!cir.func<()>>) -> !s32i loc(#loc)
+// CIR-HOST:   cir.return loc(#loc)
+// CIR-HOST: } loc(#loc)
+
+// LLVM-HOST: define internal void @__hip_module_ctor() {
+// LLVM-HOST:  %[[#LLVMHandleVar:]] = load ptr, ptr @__hip_gpubin_handle, align 8
+// LLVM-HOST:  %[[#ICMP:]] = icmp eq ptr %[[#LLVMHandleVar]], null
+// LLVM-HOST:  br i1 %[[#ICMP]], label %[[IFBLOCK:[^,]+]], label %[[EXITBLOCK:[^,]+]]
+// LLVM-HOST: [[IFBLOCK]]:
+// LLVM-HOST:  %[[#Value:]] = call ptr @__hipRegisterFatBinary(ptr @__hip_fatbin_wrapper)
+// LLVM-HOST:  store ptr %[[#Value]], ptr @__hip_gpubin_handle, align 8
+// LLVM-HOST:  br label %[[EXITBLOCK]]
+// LLVM-HOST: [[EXITBLOCK]]:
+// LLVM-HOST:  %[[#HandleValue:]] = load ptr, ptr @__hip_gpubin_handle, align 8
+// LLVM-HOST:  call void @__hip_register_globals(ptr %[[#HandleValue]])
+// LLVM-HOST:  call i32 @atexit(ptr @__hip_module_dtor)
+// LLVM-HOST:  ret void
+
+// OGCG-HOST: @_Z2fnv = constant ptr @_Z17__device_stub__fnv, align 8
+// OGCG-HOST: @a = internal global i32 undef, align 4
+// OGCG-HOST: @b = internal global i32 undef, align 4
+// OGCG-HOST: @0 = private unnamed_addr constant [7 x i8] c"_Z2fnv\00", align 1
+// OGCG-HOST: @1 = private unnamed_addr constant [2 x i8] c"a\00", align 1
+// OGCG-HOST: @2 = private unnamed_addr constant [2 x i8] c"b\00", align 1
+// OGCG-HOST: @3 = private constant [14 x i8] c"sample fatbin\0A", section ".hip_fatbin", align 4096
+// OGCG-HOST: @__hip_fatbin_wrapper = internal constant { i32, i32, ptr, ptr } { i32 1212764230, i32 1, ptr @3, ptr null }, section ".hipFatBinSegment", align 8
+// OGCG-HOST: @__hip_gpubin_handle = internal global ptr null, align 8
+// OGCG-HOST: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__hip_module_ctor, ptr null }]
+
+// OGCG-HOST: define internal void @__hip_register_globals(ptr %[[#LLVMFatbin:]]) {
+// OGCG-HOST: entry:
+// OGCG-HOST:   call i32 @__hipRegisterFunction(
+// OGCG-HOST-SAME: ptr %[[#LLVMFatbin]],
+// OGCG-HOST-SAME: ptr @_Z2fnv,
+// OGCG-HOST-SAME: ptr @0,
+// OGCG-HOST-SAME: ptr @0,
+// OGCG-HOST-SAME: i32 -1,
+// OGCG-HOST-SAME: ptr null, ptr null, ptr null, ptr null, ptr null)
+// OGCG-HOST:   call void @__hipRegisterVar(
+// OGCG-HOST-SAME: ptr %[[#LLVMFatbin]],
+// OGCG-HOST-SAME: ptr @a,
+// OGCG-HOST-SAME: ptr @1,
+// OGCG-HOST-SAME: ptr @1,
+// OGCG-HOST-SAME: i32 0,
+// OGCG-HOST-SAME: i64 4,
+// OGCG-HOST-SAME: i32 0, i32 0)
+// OGCG-HOST:   call void @__hipRegisterVar(
+// OGCG-HOST-SAME: ptr %[[#LLVMFatbin]],
+// OGCG-HOST-SAME: ptr @b,
+// OGCG-HOST-SAME: ptr @2,
+// OGCG-HOST-SAME: ptr @2,
+// OGCG-HOST-SAME: i32 0,
+// OGCG-HOST-SAME: i64 4,
+// OGCG-HOST-SAME: i32 1, i32 0)
+// OGCG-HOST: ret void
+// OGCG-HOST: }
+
+// OGCG-HOST: define internal void @__hip_module_ctor() {
+// OGCG-HOST:  %[[#LLVMHandleVar:]] = load ptr, ptr @__hip_gpubin_handle, align 8
+// OGCG-HOST:  %[[#ICMP:]] = icmp eq ptr %[[#LLVMHandleVar]], null
+// OGCG-HOST:  br i1 %[[#ICMP]], label %[[IFBLOCK:[^,]+]], label %[[EXITBLOCK:[^,]+]]
+// OGCG-HOST: [[IFBLOCK]]:
+// OGCG-HOST:  %[[#Value:]] = call ptr @__hipRegisterFatBinary(ptr @__hip_fatbin_wrapper)
+// OGCG-HOST:  store ptr %[[#Value]], ptr @__hip_gpubin_handle, align 8
+// OGCG-HOST:  br label %[[EXITBLOCK]]
+// OGCG-HOST: [[EXITBLOCK]]:
+// OGCG-HOST:  %[[#HandleValue:]] = load ptr, ptr @__hip_gpubin_handle, align 8
+// OGCG-HOST:  call void @__hip_register_globals(ptr %[[#HandleValue]])
+// OGCG-HOST:  call i32 @atexit(ptr @__hip_module_dtor)
+// OGCG-HOST:  ret void
+
+// OGCG-HOST: define internal void @__hip_module_dtor() {
+// OGCG-HOST:  entry:
+// OGCG-HOST:    %[[#LLVMHandleVar:]] = load ptr, ptr @__hip_gpubin_handle, align 8
+// OGCG-HOST:    %[[#ICMP:]] = icmp ne ptr %[[#LLVMHandleVar]], null
+// OGCG-HOST:    br i1 %[[#ICMP]], label %[[IFBLOCK:[^,]+]], label %[[EXITBLOCK:[^,]+]]
+// OGCG-HOST:  [[IFBLOCK]]:                                               ; preds = %entry
+// OGCG-HOST:    call void @__hipUnregisterFatBinary(ptr %[[#LLVMHandleVar]])
+// OGCG-HOST:    store ptr null, ptr @__hip_gpubin_handle, align 8
+// OGCG-HOST:    br label %[[EXITBLOCK]]
+// OGCG-HOST:  [[EXITBLOCK]]:                                             ; preds = %[[IFBLOCK]], %entry
+// OGCG-HOST:    ret void
+// OGCG-HOST:  }
+
diff --git a/clang/test/CIR/Incubator/CodeGen/HIP/simple.cpp b/clang/test/CIR/Incubator/CodeGen/HIP/simple.cpp
new file mode 100644
index 0000000000000..4ed4013140d37
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/HIP/simple.cpp
@@ -0,0 +1,144 @@
+#include "cuda.h"
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x hip -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -x hip -emit-llvm -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN:            -fcuda-is-device -fhip-new-launch-api    \
+// RUN:            -fvisibility=hidden                      \
+// RUN:            -I%S/../Inputs/ -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu  \
+// RUN:            -x  hip -emit-llvm -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
+// RUN:            -fcuda-is-device -fhip-new-launch-api \
+// RUN:            -fvisibility=hidden                      \
+// RUN:            -I%S/../Inputs/ -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s
+
+// Per Thread Stream test cases:
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -fgpu-default-stream=per-thread -DHIP_API_PER_THREAD_DEFAULT_STREAM \
+// RUN:            -x hip -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-HOST-PTH --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:            -fgpu-default-stream=per-thread -DHIP_API_PER_THREAD_DEFAULT_STREAM \
+// RUN:            -x hip -emit-llvm -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM-HOST-PTH --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu  \
+// RUN:            -fgpu-default-stream=per-thread -DHIP_API_PER_THREAD_DEFAULT_STREAM \
+// RUN:            -x  hip -emit-llvm -fhip-new-launch-api \
+// RUN:            -I%S/../Inputs/ %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG-HOST-PTH --input-file=%t.ll %s
+
+
+// Attribute for global_fn
+// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}
+
+
+__host__ void host_fn(int *a, int *b, int *c) {}
+// CIR-HOST: cir.func {{.*}} @_Z7host_fnPiS_S_
+// CIR-DEVICE-NOT: cir.func {{.*}} @_Z7host_fnPiS_S_
+
+__device__ void device_fn(int* a, double b, float c) {}
+// CIR-HOST-NOT: cir.func {{.*}} @_Z9device_fnPidf
+// CIR-DEVICE: cir.func {{.*}} @_Z9device_fnPidf
+
+__global__ void global_fn(int a) {}
+// CIR-DEVICE: @_Z9global_fni{{.*}} cc(amdgpu_kernel)
+// LLVM-DEVICE: define protected amdgpu_kernel void @_Z9global_fni
+// OGCG-DEVICE: define protected amdgpu_kernel void @_Z9global_fni
+
+// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]])
+// CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args"
+// CIR-HOST: %[[#Decayed:]] = cir.cast array_to_ptrdecay %[[#CIRKernelArgs]]
+// CIR-HOST: cir.call @__hipPopCallConfiguration
+// CIR-HOST: cir.get_global @_Z9global_fni : !cir.ptr<!cir.ptr<!cir.func<(!s32i)>>>
+// CIR-HOST: cir.call @hipLaunchKernel
+// CIR-HOST-PTH: cir.call @hipLaunchKernel_spt
+
+// LLVM-HOST: void @_Z24__device_stub__global_fni
+// LLVM-HOST: %[[#KernelArgs:]] = alloca [1 x ptr], i64 1, align 16
+// LLVM-HOST: %[[#GEP1:]] = getelementptr ptr, ptr %[[#KernelArgs]], i32 0
+// LLVM-HOST: %[[#GEP2:]] = getelementptr [1 x ptr], ptr %[[#KernelArgs]], i32 0, i64 0
+// LLVM-HOST: call i32 @__hipPopCallConfiguration
+// LLVM-HOST: call i32 @hipLaunchKernel(ptr @_Z9global_fni 
+// LLVM-HOST-PTH: call i32 @hipLaunchKernel_spt(ptr @_Z9global_fni 
+//
+// OGCG-HOST: define dso_local void @_Z24__device_stub__global_fni
+// OGCG-HOST: %kernel_args = alloca ptr, i64 1, align 16
+// OGCG-HOST: getelementptr ptr, ptr %kernel_args, i32 0
+// OGCG-HOST: call i32 @__hipPopCallConfiguration
+// OGCG-HOST: %call = call noundef i32 @hipLaunchKernel(ptr noundef @_Z9global_fni
+// OGCG-HOST-PTH: %call = call noundef i32 @hipLaunchKernel_spt(ptr noundef @_Z9global_fni
+
+
+int main() {
+  global_fn<<<1, 1>>>(1);
+}
+// CIR-DEVICE-NOT: cir.func {{.*}} @main()
+
+// CIR-HOST: cir.func {{.*}} @main()
+// CIR-HOST: cir.call @_ZN4dim3C1Ejjj
+// CIR-HOST: cir.call @_ZN4dim3C1Ejjj
+// CIR-HOST: [[Push:%[0-9]+]] = cir.call @__hipPushCallConfiguration
+// CIR-HOST: [[ConfigOK:%[0-9]+]] = cir.cast int_to_bool [[Push]]
+// CIR-HOST: cir.if [[ConfigOK]] {
+// CIR-HOST: } else {
+// CIR-HOST:   [[Arg:%[0-9]+]] = cir.const #cir.int<1>
+// CIR-HOST:   cir.call @_Z24__device_stub__global_fni([[Arg]])
+// CIR-HOST: }
+
+// LLVM-HOST: define dso_local i32 @main
+// LLVM-HOST: alloca %struct.dim3
+// LLVM-HOST: alloca %struct.dim3
+// LLVM-HOST: call void @_ZN4dim3C1Ejjj
+// LLVM-HOST: call void @_ZN4dim3C1Ejjj
+// LLVM-HOST: %[[#ConfigOK:]] = call i32 @__hipPushCallConfiguration
+// LLVM-HOST: %[[#ConfigCond:]] = icmp ne i32 %[[#ConfigOK]], 0
+// LLVM-HOST: br i1 %[[#ConfigCond]], label %[[#Good:]], label %[[#Bad:]]
+// LLVM-HOST: [[#Good]]:
+// LLVM-HOST:   br label %[[#End:]]
+// LLVM-HOST: [[#Bad]]:
+// LLVM-HOST:   call void @_Z24__device_stub__global_fni(i32 1)
+// LLVM-HOST:   br label %[[#End:]]
+// LLVM-HOST: [[#End]]:
+// LLVM-HOST:   %[[#]] = load i32
+// LLVM-HOST:   ret i32
+
+// OGCG-HOST:  define dso_local noundef i32 @main
+// OGCG-HOST:  %agg.tmp = alloca %struct.dim3, align 4
+// OGCG-HOST:  %agg.tmp1 = alloca %struct.dim3, align 4
+// OGCG-HOST:  call void @_ZN4dim3C1Ejjj
+// OGCG-HOST:  call void @_ZN4dim3C1Ejjj
+// OGCG-HOST:  %call = call i32 @__hipPushCallConfiguration
+// OGCG-HOST:  %tobool = icmp ne i32 %call, 0
+// OGCG-HOST:  br i1 %tobool, label %kcall.end, label %kcall.configok
+// OGCG-HOST: kcall.configok:
+// OGCG-HOST:  call void @_Z24__device_stub__global_fni(i32 noundef 1)
+// OGCG-HOST:  br label %kcall.end
+// OGCG-HOST: kcall.end:
+// OGCG-HOST:  %{{[0-9]+}} = load i32, ptr %retval, align 4
+// OGCG-HOST:  ret i32 %8
+
diff --git a/clang/test/CIR/CodeGenCUDA/Inputs/cuda.h b/clang/test/CIR/Incubator/CodeGen/Inputs/cuda.h
similarity index 89%
rename from clang/test/CIR/CodeGenCUDA/Inputs/cuda.h
rename to clang/test/CIR/Incubator/CodeGen/Inputs/cuda.h
index 225c7dfdcf0db..204bf2972088d 100644
--- a/clang/test/CIR/CodeGenCUDA/Inputs/cuda.h
+++ b/clang/test/CIR/Incubator/CodeGen/Inputs/cuda.h
@@ -37,9 +37,6 @@ int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
 extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
                                                  size_t sharedSize = 0,
                                                  hipStream_t stream = 0);
-extern "C" int __hipPopCallConfiguration(dim3 *gridSize, dim3 *blockSize,
-                                         size_t *sharedSize,
-                                         hipStream_t *stream);
 #ifndef __HIP_API_PER_THREAD_DEFAULT_STREAM__
 extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
                                       dim3 blockDim, void **args,
@@ -65,9 +62,6 @@ extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
 extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
                                            size_t sharedSize = 0,
                                            cudaStream_t stream = 0);
-extern "C" int __cudaPopCallConfiguration(dim3 *gridSize, dim3 *blockSize,
-                                          size_t *sharedSize,
-                                          cudaStream_t *stream);
 extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
                                         dim3 blockDim, void **args,
                                         size_t sharedMem, cudaStream_t stream);
diff --git a/clang/test/CIR/Incubator/CodeGen/Inputs/std-compare.h b/clang/test/CIR/Incubator/CodeGen/Inputs/std-compare.h
new file mode 100644
index 0000000000000..f7f0c9b06db68
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/Inputs/std-compare.h
@@ -0,0 +1,324 @@
+#ifndef STD_COMPARE_H
+#define STD_COMPARE_H
+
+namespace std {
+inline namespace __1 {
+
+#ifdef NON_CANONICAL_CMP_RESULTS
+
+// exposition only
+enum class _EqResult : unsigned char {
+  __equal = 2,
+  __equiv = __equal,
+};
+
+enum class _OrdResult : signed char {
+  __less = 1,
+  __greater = 3
+};
+
+#else
+
+// exposition only
+enum class _EqResult : unsigned char {
+  __equal = 0,
+  __equiv = __equal,
+};
+
+enum class _OrdResult : signed char {
+  __less = -1,
+  __greater = 1
+};
+
+#endif
+
+enum class _NCmpResult : signed char {
+  __unordered = -127
+};
+
+struct _CmpUnspecifiedType;
+using _CmpUnspecifiedParam = void (_CmpUnspecifiedType::*)();
+
+class partial_ordering {
+  using _ValueT = signed char;
+  explicit constexpr partial_ordering(_EqResult __v) noexcept
+      : __value_(_ValueT(__v)) {}
+  explicit constexpr partial_ordering(_OrdResult __v) noexcept
+      : __value_(_ValueT(__v)) {}
+  explicit constexpr partial_ordering(_NCmpResult __v) noexcept
+      : __value_(_ValueT(__v)) {}
+
+  constexpr bool __is_ordered() const noexcept {
+    return __value_ != _ValueT(_NCmpResult::__unordered);
+  }
+
+public:
+  // valid values
+  static const partial_ordering less;
+  static const partial_ordering equivalent;
+  static const partial_ordering greater;
+  static const partial_ordering unordered;
+
+  // comparisons
+  friend constexpr bool operator==(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator!=(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<=(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>=(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator==(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator!=(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator<(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator<=(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator>(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+  friend constexpr bool operator>=(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+
+  friend constexpr partial_ordering operator<=>(partial_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr partial_ordering operator<=>(_CmpUnspecifiedParam, partial_ordering __v) noexcept;
+
+  // test helper
+  constexpr bool test_eq(partial_ordering const &other) const noexcept {
+    return __value_ == other.__value_;
+  }
+
+private:
+  _ValueT __value_;
+};
+
+inline constexpr partial_ordering partial_ordering::less(_OrdResult::__less);
+inline constexpr partial_ordering partial_ordering::equivalent(_EqResult::__equiv);
+inline constexpr partial_ordering partial_ordering::greater(_OrdResult::__greater);
+inline constexpr partial_ordering partial_ordering::unordered(_NCmpResult ::__unordered);
+constexpr bool operator==(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ == 0;
+}
+constexpr bool operator<(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ < 0;
+}
+constexpr bool operator<=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ <= 0;
+}
+constexpr bool operator>(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ > 0;
+}
+constexpr bool operator>=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__is_ordered() && __v.__value_ >= 0;
+}
+constexpr bool operator==(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 == __v.__value_;
+}
+constexpr bool operator<(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 < __v.__value_;
+}
+constexpr bool operator<=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 <= __v.__value_;
+}
+constexpr bool operator>(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 > __v.__value_;
+}
+constexpr bool operator>=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v.__is_ordered() && 0 >= __v.__value_;
+}
+constexpr bool operator!=(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return !__v.__is_ordered() || __v.__value_ != 0;
+}
+constexpr bool operator!=(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return !__v.__is_ordered() || __v.__value_ != 0;
+}
+
+constexpr partial_ordering operator<=>(partial_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v;
+}
+constexpr partial_ordering operator<=>(_CmpUnspecifiedParam, partial_ordering __v) noexcept {
+  return __v < 0 ? partial_ordering::greater : (__v > 0 ? partial_ordering::less : __v);
+}
+
+class weak_ordering {
+  using _ValueT = signed char;
+  explicit constexpr weak_ordering(_EqResult __v) noexcept : __value_(_ValueT(__v)) {}
+  explicit constexpr weak_ordering(_OrdResult __v) noexcept : __value_(_ValueT(__v)) {}
+
+public:
+  static const weak_ordering less;
+  static const weak_ordering equivalent;
+  static const weak_ordering greater;
+
+  // conversions
+  constexpr operator partial_ordering() const noexcept {
+    return __value_ == 0 ? partial_ordering::equivalent
+                         : (__value_ < 0 ? partial_ordering::less : partial_ordering::greater);
+  }
+
+  // comparisons
+  friend constexpr bool operator==(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator!=(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<=(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>=(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator==(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator!=(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator<(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator<=(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator>(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+  friend constexpr bool operator>=(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+
+  friend constexpr weak_ordering operator<=>(weak_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr weak_ordering operator<=>(_CmpUnspecifiedParam, weak_ordering __v) noexcept;
+
+  // test helper
+  constexpr bool test_eq(weak_ordering const &other) const noexcept {
+    return __value_ == other.__value_;
+  }
+
+private:
+  _ValueT __value_;
+};
+
+inline constexpr weak_ordering weak_ordering::less(_OrdResult::__less);
+inline constexpr weak_ordering weak_ordering::equivalent(_EqResult::__equiv);
+inline constexpr weak_ordering weak_ordering::greater(_OrdResult::__greater);
+constexpr bool operator==(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ == 0;
+}
+constexpr bool operator!=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ != 0;
+}
+constexpr bool operator<(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ < 0;
+}
+constexpr bool operator<=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ <= 0;
+}
+constexpr bool operator>(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ > 0;
+}
+constexpr bool operator>=(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ >= 0;
+}
+constexpr bool operator==(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 == __v.__value_;
+}
+constexpr bool operator!=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 != __v.__value_;
+}
+constexpr bool operator<(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 < __v.__value_;
+}
+constexpr bool operator<=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 <= __v.__value_;
+}
+constexpr bool operator>(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 > __v.__value_;
+}
+constexpr bool operator>=(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return 0 >= __v.__value_;
+}
+
+constexpr weak_ordering operator<=>(weak_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v;
+}
+constexpr weak_ordering operator<=>(_CmpUnspecifiedParam, weak_ordering __v) noexcept {
+  return __v < 0 ? weak_ordering::greater : (__v > 0 ? weak_ordering::less : __v);
+}
+
+class strong_ordering {
+  using _ValueT = signed char;
+  explicit constexpr strong_ordering(_EqResult __v) noexcept : __value_(static_cast<signed char>(__v)) {}
+  explicit constexpr strong_ordering(_OrdResult __v) noexcept : __value_(static_cast<signed char>(__v)) {}
+
+public:
+  static const strong_ordering less;
+  static const strong_ordering equal;
+  static const strong_ordering equivalent;
+  static const strong_ordering greater;
+
+  // conversions
+  constexpr operator partial_ordering() const noexcept {
+    return __value_ == 0 ? partial_ordering::equivalent
+                         : (__value_ < 0 ? partial_ordering::less : partial_ordering::greater);
+  }
+  constexpr operator weak_ordering() const noexcept {
+    return __value_ == 0 ? weak_ordering::equivalent
+                         : (__value_ < 0 ? weak_ordering::less : weak_ordering::greater);
+  }
+
+  // comparisons
+  friend constexpr bool operator==(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator!=(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator<=(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator>=(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr bool operator==(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator!=(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator<(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator<=(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator>(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+  friend constexpr bool operator>=(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+
+  friend constexpr strong_ordering operator<=>(strong_ordering __v, _CmpUnspecifiedParam) noexcept;
+  friend constexpr strong_ordering operator<=>(_CmpUnspecifiedParam, strong_ordering __v) noexcept;
+
+  // test helper
+  constexpr bool test_eq(strong_ordering const &other) const noexcept {
+    return __value_ == other.__value_;
+  }
+
+private:
+  _ValueT __value_;
+};
+
+inline constexpr strong_ordering strong_ordering::less(_OrdResult::__less);
+inline constexpr strong_ordering strong_ordering::equal(_EqResult::__equal);
+inline constexpr strong_ordering strong_ordering::equivalent(_EqResult::__equiv);
+inline constexpr strong_ordering strong_ordering::greater(_OrdResult::__greater);
+
+constexpr bool operator==(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ == 0;
+}
+constexpr bool operator!=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ != 0;
+}
+constexpr bool operator<(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ < 0;
+}
+constexpr bool operator<=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ <= 0;
+}
+constexpr bool operator>(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ > 0;
+}
+constexpr bool operator>=(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v.__value_ >= 0;
+}
+constexpr bool operator==(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 == __v.__value_;
+}
+constexpr bool operator!=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 != __v.__value_;
+}
+constexpr bool operator<(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 < __v.__value_;
+}
+constexpr bool operator<=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 <= __v.__value_;
+}
+constexpr bool operator>(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 > __v.__value_;
+}
+constexpr bool operator>=(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return 0 >= __v.__value_;
+}
+
+constexpr strong_ordering operator<=>(strong_ordering __v, _CmpUnspecifiedParam) noexcept {
+  return __v;
+}
+constexpr strong_ordering operator<=>(_CmpUnspecifiedParam, strong_ordering __v) noexcept {
+  return __v < 0 ? strong_ordering::greater : (__v > 0 ? strong_ordering::less : __v);
+}
+
+} // namespace __1
+} // end namespace std
+
+#endif // STD_COMPARE_H
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/addrspace-alloca.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/addrspace-alloca.cl
new file mode 100644
index 0000000000000..c93595faf0796
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/addrspace-alloca.cl
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+// CIR:  cir.global "private" constant cir_private lang_address_space(offload_private) @__const.func2.s1 = #cir.const_array<"Hello\00" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6>
+// LLVM: @__const.func2.s1 = private constant [6 x i8] c"Hello\00"
+
+// CIR: cir.func{{.*}} @func(%arg0: !cir.ptr<!s32i, lang_address_space(offload_local)>
+// LLVM: @func(ptr addrspace(3)
+kernel void func(local int *p) {
+  // CIR-NEXT: %[[#ALLOCA_P:]] = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_local)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_local)>, lang_address_space(offload_private)>, ["p", init] {alignment = 8 : i64}
+  // LLVM-NEXT: %[[#ALLOCA_P:]] = alloca ptr addrspace(3), i64 1, align 8
+
+  int x;
+  // CIR-NEXT: %[[#ALLOCA_X:]] = cir.alloca !s32i, !cir.ptr<!s32i, lang_address_space(offload_private)>, ["x"] {alignment = 4 : i64}
+  // LLVM-NEXT: %[[#ALLOCA_X:]] = alloca i32, i64 1, align 4
+
+  global char *b;
+  // CIR-NEXT: %[[#ALLOCA_B:]] = cir.alloca !cir.ptr<!s8i, lang_address_space(offload_global)>, !cir.ptr<!cir.ptr<!s8i, lang_address_space(offload_global)>, lang_address_space(offload_private)>, ["b"] {alignment = 8 : i64}
+  // LLVM-NEXT: %[[#ALLOCA_B:]] = alloca ptr addrspace(1), i64 1, align 8
+
+  private int *ptr;
+  // CIR-NEXT: %[[#ALLOCA_PTR:]] = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_private)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_private)>, lang_address_space(offload_private)>, ["ptr"] {alignment = 8 : i64}
+  // LLVM-NEXT: %[[#ALLOCA_PTR:]] = alloca ptr, i64 1, align 8
+
+  // Store of the argument `p`
+  // CIR-NEXT: cir.store{{.*}} %arg0, %[[#ALLOCA_P]] : !cir.ptr<!s32i, lang_address_space(offload_local)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_local)>, lang_address_space(offload_private)>
+  // LLVM-NEXT: store ptr addrspace(3) %{{[0-9]+}}, ptr %[[#ALLOCA_P]], align 8
+
+  ptr = &x;
+  // CIR-NEXT: cir.store{{.*}} %[[#ALLOCA_X]], %[[#ALLOCA_PTR]] : !cir.ptr<!s32i, lang_address_space(offload_private)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_private)>, lang_address_space(offload_private)>
+  // LLVM-NEXT: store ptr %[[#ALLOCA_X]], ptr %[[#ALLOCA_PTR]]
+
+  return;
+}
+
+// CIR: cir.func{{.*}} @func2()
+// LLVM: @func2()
+kernel void func2() {
+  char s1[] = "Hello";
+  // CIR-DAG:  %[[#ALLOCA_STR:]] = cir.alloca !cir.array<!s8i x 6>, !cir.ptr<!cir.array<!s8i x 6>, lang_address_space(offload_private)>, ["s1", init] {alignment = 1 : i64}
+  // CIR-DAG:  %[[#GET_GLOBAL:]] = cir.get_global @__const.func2.s1 : !cir.ptr<!cir.array<!s8i x 6>, lang_address_space(offload_private)>
+  // CIR:  cir.copy %[[#GET_GLOBAL]] to %[[#ALLOCA_STR]] : !cir.ptr<!cir.array<!s8i x 6>, lang_address_space(offload_private)>
+
+  // LLVM-NEXT: %[[#ALLOCA_STR:]] = alloca [6 x i8], i64 1, align 1
+  // LLVM-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr %[[#ALLOCA_STR]], ptr @__const.func2.s1, i32 6, i1 false)
+  return;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/addrspace_cast.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/addrspace_cast.cl
new file mode 100644
index 0000000000000..413702e58b150
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/addrspace_cast.cl
@@ -0,0 +1,20 @@
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -emit-cir -fclangir -o - %s | FileCheck %s --check-prefix=CIR
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O2 -emit-llvm -fclangir -o - %s | FileCheck %s --check-prefix=LLVM
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=OG-LLVM
+
+unsigned int test(local int* x) {
+    return *(local unsigned int*)x;
+}
+
+// CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!s32i, lang_address_space(offload_local)> -> !cir.ptr<!u32i, lang_address_space(offload_local)>
+// LLVM: load i32, ptr addrspace(3) %{{.*}}, align 4
+// OG-LLVM: load i32, ptr addrspace(3) %{{.*}}, align 4
+
+void atomic_flag_clear(volatile __global atomic_flag *obj, memory_order ord, memory_scope scp)
+{
+  __atomic_store_n((volatile __global uint *)obj, 0, ord);
+}
+
+// CIR: cir.cast bitcast %{{.*}} : !cir.ptr<!s32i, lang_address_space(offload_global)> -> !cir.ptr<!u32i, lang_address_space(offload_global)>
+// LLVM-COUNT-3: store atomic volatile i32 0, ptr addrspace(1) 
+// OG-LLVM-COUNT-3: store atomic volatile i32 0, ptr addrspace(1) 
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/amdgpu-kernel-abi.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/amdgpu-kernel-abi.cl
new file mode 100644
index 0000000000000..aee39944e4f81
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/amdgpu-kernel-abi.cl
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \
+// RUN:            -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir \
+// RUN:            -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN:            -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test ABI lowering from CIR to LLVM IR for AMDGPU OpenCL kernels
+//===----------------------------------------------------------------------===//
+
+// Test simple kernel
+// CIR: cir.func{{.*}} @simple_kernel{{.*}} cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @simple_kernel()
+// OGCG: define{{.*}} amdgpu_kernel void @simple_kernel()
+__kernel void simple_kernel() {}
+
+// Test kernel with int argument
+// CIR: cir.func{{.*}} @kernel_with_int(%arg{{[0-9]+}}: !s32i{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @kernel_with_int(i32 %{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @kernel_with_int(i32{{.*}} %{{.*}})
+__kernel void kernel_with_int(int x) {}
+
+// Test kernel with pointer argument
+// CIR: cir.func{{.*}} @kernel_with_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_global)>{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @kernel_with_ptr(ptr addrspace(1){{.*}}%{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @kernel_with_ptr(ptr addrspace(1){{.*}} %{{.*}})
+__kernel void kernel_with_ptr(global int *ptr) {}
+
+// Test kernel with multiple args
+// CIR: cir.func{{.*}} @kernel_multi_arg(%arg{{[0-9]+}}: !s32i{{.*}}, %arg{{[0-9]+}}: !cir.float{{.*}}, %arg{{[0-9]+}}: !cir.ptr<!cir.float, lang_address_space(offload_global)>{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @kernel_multi_arg(i32 %{{.*}}, float %{{.*}}, ptr addrspace(1){{.*}}%{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @kernel_multi_arg(i32{{.*}} %{{.*}}, float{{.*}} %{{.*}}, ptr addrspace(1){{.*}} %{{.*}})
+__kernel void kernel_multi_arg(int a, float b, global float *c) {}
+
+// Test device function
+// CIR: cir.func{{.*}} @device_fn(%arg{{[0-9]+}}: !s32i{{.*}})
+// CIR-NOT: cc(amdgpu_kernel)
+// LLVM: define{{.*}} void @device_fn(i32 %{{.*}})
+// LLVM-NOT: amdgpu_kernel
+// OGCG: define{{.*}} void @device_fn(i32{{.*}} %{{.*}})
+// OGCG-NOT: amdgpu_kernel
+void device_fn(int x) {}
+
+// Test device function with return value
+// CIR: cir.func{{.*}} @device_fn_float(%arg{{[0-9]+}}: !cir.float{{.*}}) -> !cir.float
+// LLVM: define{{.*}} float @device_fn_float(float %{{.*}})
+// OGCG: define{{.*}} float @device_fn_float(float{{.*}} %{{.*}})
+float device_fn_float(float f) { return f * 2.0f; }
+
+// Test kernel with local address space pointer (addrspace 3)
+// CIR: cir.func{{.*}} @kernel_local_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_local)>{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @kernel_local_ptr(ptr addrspace(3){{.*}}%{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @kernel_local_ptr(ptr addrspace(3){{.*}} %{{.*}})
+__kernel void kernel_local_ptr(local int *ptr) {}
+
+// Test kernel with constant address space pointer (addrspace 4)
+// CIR: cir.func{{.*}} @kernel_constant_ptr(%arg{{[0-9]+}}: !cir.ptr<!s32i, lang_address_space(offload_constant)>{{.*}}) cc(amdgpu_kernel)
+// LLVM: define{{.*}} amdgpu_kernel void @kernel_constant_ptr(ptr addrspace(4){{.*}}%{{.*}})
+// OGCG: define{{.*}} amdgpu_kernel void @kernel_constant_ptr(ptr addrspace(4){{.*}} %{{.*}})
+__kernel void kernel_constant_ptr(constant int *ptr) {}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/array-decay.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/array-decay.cl
new file mode 100644
index 0000000000000..6cfcc83ec1d2c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/array-decay.cl
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+// CIR: @func1
+// LLVM: @func1
+kernel void func1(global int *data) {
+    local int arr[32];
+
+    local int *ptr = arr;
+    // CIR:      cir.cast array_to_ptrdecay %{{[0-9]+}} : !cir.ptr<!cir.array<!s32i x 32>, lang_address_space(offload_local)> -> !cir.ptr<!s32i, lang_address_space(offload_local)>
+    // CIR-NEXT: cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.ptr<!s32i, lang_address_space(offload_local)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_local)>, lang_address_space(offload_private)>
+
+    // LLVM: store ptr addrspace(3) @func1.arr, ptr %{{[0-9]+}}
+}
+
+// CIR: @func2
+// LLVM: @func2
+kernel void func2(global int *data) {
+    private int arr[32] = {data[2]};
+    // CIR: %{{[0-9]+}} = cir.get_element %{{[0-9]+}}[%{{[0-9]+}}] : (!cir.ptr<!cir.array<!s32i x 32>, lang_address_space(offload_private)>, !s32i) -> !cir.ptr<!s32i, lang_address_space(offload_private)>
+
+    // LLVM: %{{[0-9]+}} = getelementptr [32 x i32], ptr %3, i32 0, i64 0
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/as_type.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/as_type.cl
new file mode 100644
index 0000000000000..cb07dcdb11747
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/as_type.cl
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -fclangir -emit-cir -triple spirv64-unknown-unknown -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=CIR
+
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -fclangir -emit-llvm -triple spirv64-unknown-unknown -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -triple spirv64-unknown-unknown -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=OG-LLVM
+
+typedef __attribute__(( ext_vector_type(4) )) char char4;
+
+// CIR: cir.func @f4(%{{.*}}: !s32i loc({{.*}})) -> !cir.vector<!s8i x 4>
+// CIR: %[[x:.*]] = cir.load align(4) %{{.*}} : !cir.ptr<!s32i, lang_address_space(offload_private)>
+// CIR: cir.cast bitcast %[[x]] : !s32i -> !cir.vector<!s8i x 4>
+// LLVM: define spir_func <4 x i8> @f4(i32 %[[x:.*]])
+// LLVM: %[[astype:.*]] = bitcast i32 %[[x]]  to <4 x i8>
+// LLVM-NOT: shufflevector
+// LLVM: ret <4 x i8> %[[astype]]
+// OG-LLVM: define spir_func noundef <4 x i8> @f4(i32 noundef %[[x:.*]])
+// OG-LLVM: %[[astype:.*]] = bitcast i32 %[[x]] to <4 x i8>
+// OG-LLVM-NOT: shufflevector
+// OG-LLVM: ret <4 x i8> %[[astype]]
+char4 f4(int x) {
+  return __builtin_astype(x, char4);
+}
+
+// CIR: cir.func @f6(%{{.*}}: !cir.vector<!s8i x 4> loc({{.*}})) -> !s32i
+// CIR: %[[x:.*]] = cir.load align(4) %{{.*}} : !cir.ptr<!cir.vector<!s8i x 4>, lang_address_space(offload_private)>, !cir.vector<!s8i x 4>
+// CIR: cir.cast bitcast %[[x]] : !cir.vector<!s8i x 4> -> !s32i
+// LLVM: define{{.*}} spir_func i32 @f6(<4 x i8> %[[x:.*]])
+// LLVM: %[[astype:.*]] = bitcast <4 x i8> %[[x]] to i32
+// LLVM-NOT: shufflevector
+// LLVM: ret i32 %[[astype]]
+// OG-LLVM: define{{.*}} spir_func noundef i32 @f6(<4 x i8> noundef %[[x:.*]])
+// OG-LLVM: %[[astype:.*]] = bitcast <4 x i8> %[[x]] to i32
+// OG-LLVM-NOT: shufflevector
+// OG-LLVM: ret i32 %[[astype]]
+int f6(char4 x) {
+  return __builtin_astype(x, int);
+}
+
+// CIR: cir.func @f4_ptr(%{{.*}}: !cir.ptr<!s32i, lang_address_space(offload_global)> loc({{.*}})) -> !cir.ptr<!cir.vector<!s8i x 4>, lang_address_space(offload_local)>
+// CIR: %[[x:.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_global)>, lang_address_space(offload_private)>, !cir.ptr<!s32i, lang_address_space(offload_global)>
+// CIR: cir.cast address_space %[[x]] : !cir.ptr<!s32i, lang_address_space(offload_global)> -> !cir.ptr<!cir.vector<!s8i x 4>, lang_address_space(offload_local)>
+// LLVM: define spir_func ptr addrspace(3) @f4_ptr(ptr addrspace(1) readnone captures(ret: address, provenance) %[[x:.*]])
+// LLVM: %[[astype:.*]] = addrspacecast ptr addrspace(1) %[[x]] to ptr addrspace(3)
+// LLVM-NOT: shufflevector
+// LLVM: ret ptr addrspace(3) %[[astype]]
+// OG-LLVM: define spir_func ptr addrspace(3) @f4_ptr(ptr addrspace(1) noundef readnone captures(ret: address, provenance) %[[x:.*]])
+// OG-LLVM: %[[astype:.*]] = addrspacecast ptr addrspace(1) %[[x]] to ptr addrspace(3)
+// OG-LLVM-NOT: shufflevector
+// OG-LLVM: ret ptr addrspace(3) %[[astype]]
+__local char4* f4_ptr(__global int* x) {
+  return __builtin_astype(x, __local char4*);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/async_copy.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/async_copy.cl
new file mode 100644
index 0000000000000..7fe6e061b10de
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/async_copy.cl
@@ -0,0 +1,34 @@
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -emit-cir -o - %s -fclangir | FileCheck %s --check-prefix=CIR-SPIR
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -emit-llvm -o - %s -fclangir | FileCheck %s --check-prefix=LLVM-SPIR
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -emit-llvm -o - %s | FileCheck %s --check-prefix=OG-LLVM-SPIR
+
+// RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -finclude-default-header -emit-cir -o - %s -fclangir | FileCheck %s --check-prefix=CIR-AMDGCN
+// RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -finclude-default-header -emit-llvm -o - %s -fclangir | FileCheck %s --check-prefix=LLVM-AMDGCN
+// RUN: %clang -cc1 -triple amdgcn-amd-amdhsa -cl-std=CL2.0 -finclude-default-header -emit-llvm -o - %s | FileCheck %s --check-prefix=OG-LLVM-AMDGCN
+
+
+// Simple kernel using async_work_group_copy + wait_group_events
+
+__kernel void test_async_copy(__global int *g_in, __local int *l_in, int size) {
+    // int gid = get_global_id(0);
+
+    // Trigger async copy: global to local
+    // event_t e_in = 
+    async_work_group_copy(
+        l_in,                          // local destination
+        g_in,// + gid * size,             // global source
+        size,                          // number of elements
+        (event_t)0                     // no dependency
+    );
+
+    // Wait for the async operation to complete
+    // wait_group_events(1, &e_in);
+}
+
+// CIR-SPIR: cir.call @_Z21async_work_group_copyPU3AS3iPU3AS1Kim9ocl_event(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!cir.ptr<!s32i, lang_address_space(offload_local)>, !cir.ptr<!s32i, lang_address_space(offload_global)>, !u64i, !cir.opaque<"event">) -> !cir.opaque<"event">
+// LLVM-SPIR: call spir_func target("spirv.Event") @_Z21async_work_group_copyPU3AS3iPU3AS1Kim9ocl_event(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}}, i64 %{{.*}}, target("spirv.Event") zeroinitializer)
+// OG-LLVM-SPIR: call spir_func target("spirv.Event") @_Z21async_work_group_copyPU3AS3iPU3AS1Kim9ocl_event(ptr addrspace(3) noundef %{{.*}}, ptr addrspace(1) noundef %{{.*}}, i64 noundef %{{.*}}, target("spirv.Event") zeroinitializer
+
+// CIR-AMDGCN: cir.call @_Z21async_work_group_copyPU3AS3iPU3AS1Kim9ocl_event(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!cir.ptr<!s32i, lang_address_space(offload_local)>, !cir.ptr<!s32i, lang_address_space(offload_global)>, !u64i, !cir.opaque<"event">) -> !cir.opaque<"event">
+// LLVM-AMDGCN: call ptr @_Z21async_work_group_copyPU3AS3iPU3AS1Kim9ocl_event(ptr addrspace(3) %{{.*}}, ptr addrspace(1) %{{.*}}, i64 %{{.*}}, ptr null)
+// OG-LLVM-AMDGCN: call ptr @_Z21async_work_group_copyPU3AS3iPU3AS1Kim9ocl_event(ptr addrspace(3) noundef %{{.*}}, ptr addrspace(1) noundef %{{.*}}, i64 noundef %{{.*}}, ptr null)
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx10.cl
new file mode 100644
index 0000000000000..49f51eb55b111
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx10.cl
@@ -0,0 +1,65 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1010 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1011 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1012 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1010 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1011 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1012 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1010 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1011 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1012 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+// CIR-LABEL: @test_permlane16
+// CIR: cir.llvm.intrinsic "amdgcn.permlane16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i
+// LLVM: define{{.*}} void @test_permlane16
+// LLVM: call i32 @llvm.amdgcn.permlane16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+// OGCG: define{{.*}} void @test_permlane16
+// OGCG: call i32 @llvm.amdgcn.permlane16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) {
+  *out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0);
+}
+
+// CIR-LABEL: @test_permlanex16
+// CIR: cir.llvm.intrinsic "amdgcn.permlanex16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i
+// LLVM: define{{.*}} void @test_permlanex16
+// LLVM: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+// OGCG: define{{.*}} void @test_permlanex16
+// OGCG: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
+  *out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx11.cl
new file mode 100644
index 0000000000000..6a429573d0256
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx11.cl
@@ -0,0 +1,115 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1100 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1101 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1102 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1103 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1150 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1151 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1152 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1153 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1100 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1101 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1102 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1103 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1150 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1151 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1152 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1153 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1100 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1101 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1102 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1103 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1150 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1151 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1152 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1153 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+// CIR-LABEL: @test_permlanex16
+// CIR: cir.llvm.intrinsic "amdgcn.permlanex16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i
+// LLVM: define{{.*}} void @test_permlanex16
+// LLVM: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+// OGCG: define{{.*}} void @test_permlanex16
+// OGCG: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false)
+void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
+  *out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx1250.cl
new file mode 100644
index 0000000000000..de932525ccdf4
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-gfx1250.cl
@@ -0,0 +1,51 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1250 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1250 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1250 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// CIR-LABEL: @test_rcp_bf16
+// CIR: cir.llvm.intrinsic "amdgcn.rcp" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @test_rcp_bf16
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.rcp.bf16(bfloat %{{.*}})
+// OGCG: define{{.*}} void @test_rcp_bf16
+// OGCG: call{{.*}} bfloat @llvm.amdgcn.rcp.bf16(bfloat %{{.*}})
+void test_rcp_bf16(global __bf16* out, __bf16 a)
+{
+  *out = __builtin_amdgcn_rcp_bf16(a);
+}
+
+// CIR-LABEL: @test_sqrt_bf16
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @test_sqrt_bf16
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}})
+// OGCG: define{{.*}} void @test_sqrt_bf16
+// OGCG: call{{.*}} bfloat @llvm.amdgcn.sqrt.bf16(bfloat %{{.*}})
+void test_sqrt_bf16(global __bf16* out, __bf16 a)
+{
+  *out = __builtin_amdgcn_sqrt_bf16(a);
+}
+
+// CIR-LABEL: @test_rsq_bf16
+// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @test_rsq_bf16
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.rsq.bf16(bfloat %{{.*}})
+// OGCG: define{{.*}} void @test_rsq_bf16
+// OGCG: call{{.*}} bfloat @llvm.amdgcn.rsq.bf16(bfloat %{{.*}})
+void test_rsq_bf16(__bf16* out, __bf16 a)
+{
+  *out = __builtin_amdgcn_rsq_bf16(a);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-image-sample.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-image-sample.cl
new file mode 100644
index 0000000000000..8f08ad0a36c7d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-image-sample.cl
@@ -0,0 +1,253 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1100 -target-feature +extended-image-insts \
+// RUN:            -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1100 -target-feature +extended-image-insts \
+// RUN:            -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1100 -target-feature +extended-image-insts \
+// RUN:            -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU extended image builtins in OpenCL
+//===----------------------------------------------------------------------===//
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+// CIR-LABEL: @test_image_gather4_lz_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.gather4.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_gather4_lz_2d_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.gather4.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_gather4_lz_2d_v4f32(
+// OGCG: call {{.*}}<4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_gather4_lz_2d_v4f32(global float4* out, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_1d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.1d" {{.*}} : (!s32i, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_1d_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.1d.{{.*}}(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_1d_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.1d.{{.*}}(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_1d_v4f32(global float4* out, float s, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(100, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_1d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.1d" {{.*}} : (!s32i, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_1d_v4f16(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.1d.{{.*}}(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_1d_v4f16(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.1d.{{.*}}(i32 {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_1d_v4f16(global half4* out, float s, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(100, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_l_1d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.l.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_l_1d_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.l.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_l_1d_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.l.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_l_1d_v4f32(global float4* out, float s, float lod, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, s, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_l_1d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.l.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @test_image_sample_l_1d_v4f16(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.l.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_l_1d_v4f16(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.l.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_l_1d_v4f16(global half4* out, float s, float lod, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_l_1d_v4f16_f32(100, s, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_d_1d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.d.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_d_1d_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.d.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_d_1d_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.d.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_d_1d_v4f32(global float4* out, float dsdx, float dsdy, float s, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, dsdx, dsdy, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_d_1d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.d.1d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @test_image_sample_d_1d_v4f16(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.d.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_d_1d_v4f16(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.d.1d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_d_1d_v4f16(global half4* out, float dsdx, float dsdy, float s, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_d_1d_v4f16_f32(100, dsdx, dsdy, s, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @test_image_sample_lz_2d_f32(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_2d_f32(
+// OGCG: call {{.*}}float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_2d_f32(global float* out, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_2d_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_2d_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_2d_v4f32(global float4* out, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_2d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_2d_v4f16(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_2d_v4f16(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_2d_v4f16(global half4* out, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_l_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_l_2d_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.l.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_l_2d_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.l.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_l_2d_v4f32(global float4* out, float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, s, t, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_l_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.l.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @test_image_sample_l_2d_f32(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_l_2d_f32(
+// OGCG: call {{.*}}float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_l_2d_f32(global float* out, float s, float t, float lod, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_l_2d_f32_f32(1, s, t, lod, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_d_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_d_2d_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.d.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_d_2d_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.d.2d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_d_2d_v4f32(global float4* out, float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_d_2d_v4f32_f32(100, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_d_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.d.2d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @test_image_sample_d_2d_f32(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_d_2d_f32(
+// OGCG: call {{.*}}float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_d_2d_f32(global float* out, float dsdx, float dtdx, float dsdy, float dtdy, float s, float t, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_d_2d_f32_f32(1, dsdx, dtdx, dsdy, dtdy, s, t, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_3d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_3d_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.3d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_3d_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.3d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_3d_v4f32(global float4* out, float s, float t, float r, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(100, s, t, r, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_3d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.3d" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_3d_v4f16(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.3d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_3d_v4f16(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.3d.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_3d_v4f16(global half4* out, float s, float t, float r, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, s, t, r, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_cube_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_cube_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.cube.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_cube_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.cube.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_cube_v4f32(global float4* out, float s, float t, float face, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, s, t, face, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_cube_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.cube" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_cube_v4f16(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.cube.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_cube_v4f16(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.cube.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_cube_v4f16(global half4* out, float s, float t, float face, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, s, t, face, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_1darray_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_1darray_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.1darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_1darray_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.1darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_1darray_v4f32(global float4* out, float s, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, s, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_1darray_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.1darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_1darray_v4f16(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.1darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_1darray_v4f16(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.1darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_1darray_v4f16(global half4* out, float s, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(100, s, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_2darray_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @test_image_sample_lz_2darray_f32(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_2darray_f32(
+// OGCG: call {{.*}}float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_2darray_f32(global float* out, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, s, t, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_2darray_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_2darray_v4f32(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.2darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_2darray_v4f32(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.2darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_2darray_v4f32(global float4* out, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(100, s, t, slice, tex, samp, 0, 120, 110);
+}
+
+// CIR-LABEL: @test_image_sample_lz_2darray_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.sample.lz.2darray" {{.*}} : (!s32i, !cir.float, !cir.float, !cir.float, !cir.vector<!s32i x 8>, !cir.vector<!s32i x 4>, !cir.bool, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @test_image_sample_lz_2darray_v4f16(
+// LLVM: call {{.*}}@llvm.amdgcn.image.sample.lz.2darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_sample_lz_2darray_v4f16(
+// OGCG: call {{.*}}@llvm.amdgcn.image.sample.lz.2darray.{{.*}}(i32 {{.*}}, float {{.*}}, float {{.*}}, float {{.*}}, <8 x i32> {{.*}}, <4 x i32> {{.*}}, i1 {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_sample_lz_2darray_v4f16(global half4* out, float s, float t, float slice, __amdgpu_texture_t tex, int4 samp) {
+  *out = __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, s, t, slice, tex, samp, 0, 120, 110);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-image.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-image.cl
new file mode 100644
index 0000000000000..6d502d3043b3d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-image.cl
@@ -0,0 +1,119 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir\
+// RUN: -target-cpu gfx1100 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir\
+// RUN: -target-cpu gfx1100 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0\
+// RUN: -target-cpu gfx1100 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU image load/store builtins
+//===----------------------------------------------------------------------===//
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+// CIR-LABEL: @test_image_load_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @test_image_load_2d_f32(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_load_2d_f32(
+// OGCG: call {{.*}}float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_load_2d_f32(global float* out, int x, int y, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2d_f32_i32(15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @test_image_load_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_load_2d_v4f32(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_load_2d_v4f32(
+// OGCG: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_load_2d_v4f32(global float4* out, int x, int y, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2d_v4f32_i32(15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @test_image_load_2d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
+// LLVM: define{{.*}} void @test_image_load_2d_v4f16(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_load_2d_v4f16(
+// OGCG: call {{.*}}<4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_load_2d_v4f16(global half4* out, int x, int y, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2d_v4f16_i32(15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @test_image_store_2d_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.float, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @test_image_store_2d_f32(
+// LLVM: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_store_2d_f32(
+// OGCG: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_store_2d_f32(float val, int x, int y, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2d_f32_i32(val, 15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @test_image_store_2d_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.vector<!cir.float x 4>, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @test_image_store_2d_v4f32(
+// LLVM: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_store_2d_v4f32(
+// OGCG: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_store_2d_v4f32(float4 val, int x, int y, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2d_v4f32_i32(val, 15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @test_image_store_2d_v4f16
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.vector<!cir.f16 x 4>, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @test_image_store_2d_v4f16(
+// LLVM: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_store_2d_v4f16(
+// OGCG: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_store_2d_v4f16(half4 val, int x, int y, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2d_v4f16_i32(val, 15, x, y, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @test_image_load_2darray_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2darray" {{.*}} : (!s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @test_image_load_2darray_f32(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_load_2darray_f32(
+// OGCG: call {{.*}}float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_load_2darray_f32(global float* out, int x, int y, int slice, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2darray_f32_i32(15, x, y, slice, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @test_image_load_2darray_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.load.2darray" {{.*}} : (!s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
+// LLVM: define{{.*}} void @test_image_load_2darray_v4f32(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_load_2darray_v4f32(
+// OGCG: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_load_2darray_v4f32(global float4* out, int x, int y, int slice, __amdgpu_texture_t rsrc) {
+  *out = __builtin_amdgcn_image_load_2darray_v4f32_i32(15, x, y, slice, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @test_image_store_2darray_f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2darray" {{.*}} : (!cir.float, !s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @test_image_store_2darray_f32(
+// LLVM: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_store_2darray_f32(
+// OGCG: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_store_2darray_f32(float val, int x, int y, int slice, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2darray_f32_i32(val, 15, x, y, slice, rsrc, 0, 0);
+}
+
+// CIR-LABEL: @test_image_store_2darray_v4f32
+// CIR: cir.llvm.intrinsic "amdgcn.image.store.2darray" {{.*}} : (!cir.vector<!cir.float x 4>, !s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void @test_image_store_2darray_v4f32(
+// LLVM: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+// OGCG: define{{.*}} void @test_image_store_2darray_v4f32(
+// OGCG: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+void test_image_store_2darray_v4f32(float4 val, int x, int y, int slice, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2darray_v4f32_i32(val, 15, x, y, slice, rsrc, 0, 0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-logb-scalbn.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-logb-scalbn.cl
new file mode 100644
index 0000000000000..385bb59aee71e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-logb-scalbn.cl
@@ -0,0 +1,74 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1100 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1100 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1100 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test logb/logbf and scalbn/scalbnf builtins
+//===----------------------------------------------------------------------===//
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+// CIR-LABEL: @test_logbf
+// CIR: cir.call @logbf({{.*}}) : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @test_logbf(
+// LLVM: call {{.*}}float @logbf(float %{{.*}})
+// OGCG: define{{.*}} void @test_logbf(
+// OGCG: call { float, i32 } @llvm.frexp.f32.i32(float %{{.*}})
+// OGCG: extractvalue { float, i32 } %{{.*}}, 1
+// OGCG: add nsw i32 %{{.*}}, -1
+// OGCG: sitofp i32 %{{.*}} to float
+// OGCG: call {{.*}}float @llvm.fabs.f32(float %{{.*}})
+// OGCG: fcmp {{.*}}one float %{{.*}}, 0x7FF0000000000000
+// OGCG: select {{.*}}i1 %{{.*}}, float %{{.*}}, float %{{.*}}
+// OGCG: fcmp {{.*}}oeq float %{{.*}}, 0.000000e+00
+// OGCG: select {{.*}}i1 %{{.*}}, float 0xFFF0000000000000, float %{{.*}}
+void test_logbf(global float* out, float a) {
+  *out = __builtin_logbf(a);
+}
+
+// CIR-LABEL: @test_logb
+// CIR: cir.call @logb({{.*}}) : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @test_logb(
+// LLVM: call {{.*}}double @logb(double %{{.*}})
+// OGCG: define{{.*}} void @test_logb(
+// OGCG: call { double, i32 } @llvm.frexp.f64.i32(double %{{.*}})
+// OGCG: extractvalue { double, i32 } %{{.*}}, 1
+// OGCG: add nsw i32 %{{.*}}, -1
+// OGCG: sitofp i32 %{{.*}} to double
+// OGCG: call {{.*}}double @llvm.fabs.f64(double %{{.*}})
+// OGCG: fcmp {{.*}}one double %{{.*}}, 0x7FF0000000000000
+// OGCG: select {{.*}}i1 %{{.*}}, double %{{.*}}, double %{{.*}}
+// OGCG: fcmp {{.*}}oeq double %{{.*}}, 0.000000e+00
+// OGCG: select {{.*}}i1 %{{.*}}, double 0xFFF0000000000000, double %{{.*}}
+void test_logb(global double* out, double a) {
+  *out = __builtin_logb(a);
+}
+
+// CIR-LABEL: @test_scalbnf
+// CIR: cir.call @scalbnf({{.*}}) : (!cir.float, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @test_scalbnf(
+// LLVM: call {{.*}}float @scalbnf(float %{{.*}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @test_scalbnf(
+// OGCG: call {{.*}}float @llvm.ldexp.f32.i32(float %{{.*}}, i32 %{{.*}})
+void test_scalbnf(global float* out, float a, int b) {
+  *out = __builtin_scalbnf(a, b);
+}
+
+// CIR-LABEL: @test_scalbn
+// CIR: cir.call @scalbn({{.*}}) : (!cir.double, !s32i) -> !cir.double
+// LLVM: define{{.*}} void @test_scalbn(
+// LLVM: call {{.*}}double @scalbn(double %{{.*}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @test_scalbn(
+// OGCG: call {{.*}}double @llvm.ldexp.f64.i32(double %{{.*}}, i32 %{{.*}})
+void test_scalbn(global double* out, double a, int b) {
+  *out = __builtin_scalbn(a, b);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-raw-buffer-atomics.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-raw-buffer-atomics.cl
new file mode 100644
index 0000000000000..d170561a2cb13
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-raw-buffer-atomics.cl
@@ -0,0 +1,91 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx90a -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx90a -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx90a \
+// RUN:            -target-feature +atomic-fmin-fmax-global-f32 \
+// RUN:            -target-feature +atomic-fmin-fmax-global-f64 \
+// RUN:            -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test raw buffer atomic builtins
+//===----------------------------------------------------------------------===//
+
+typedef half __attribute__((ext_vector_type(2))) float16x2_t;
+
+// CIR-LABEL: @test_atomic_add_i32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.add" {{.*}} : (!s32i, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !s32i
+// LLVM-LABEL: define{{.*}} i32 @test_atomic_add_i32
+// LLVM: call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} i32 @test_atomic_add_i32
+// OGCG: call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+int test_atomic_add_i32(__amdgpu_buffer_rsrc_t rsrc, int x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_add_i32(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @test_atomic_fadd_f32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fadd" {{.*}} : (!cir.float, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} float @test_atomic_fadd_f32
+// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} float @test_atomic_fadd_f32
+// OGCG: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+float test_atomic_fadd_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @test_atomic_fadd_v2f16
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fadd" {{.*}} : (!cir.vector<!cir.f16 x 2>, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.vector<!cir.f16 x 2>
+// LLVM-LABEL: define{{.*}} <2 x half> @test_atomic_fadd_v2f16
+// LLVM: call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} <2 x half> @test_atomic_fadd_v2f16
+// OGCG: call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+float16x2_t test_atomic_fadd_v2f16(__amdgpu_buffer_rsrc_t rsrc, float16x2_t x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fadd_v2f16(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @test_atomic_fmin_f32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmin" {{.*}} : (!cir.float, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} float @test_atomic_fmin_f32
+// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} float @test_atomic_fmin_f32
+// OGCG: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+float test_atomic_fmin_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @test_atomic_fmin_f64
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmin" {{.*}} : (!cir.double, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.double
+// LLVM-LABEL: define{{.*}} double @test_atomic_fmin_f64
+// LLVM: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} double @test_atomic_fmin_f64
+// OGCG: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+double test_atomic_fmin_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmin_f64(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @test_atomic_fmax_f32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmax" {{.*}} : (!cir.float, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.float
+// LLVM-LABEL: define{{.*}} float @test_atomic_fmax_f32
+// LLVM: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} float @test_atomic_fmax_f32
+// OGCG: call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+float test_atomic_fmax_f32(__amdgpu_buffer_rsrc_t rsrc, float x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f32(x, rsrc, offset, soffset, 0);
+}
+
+// CIR-LABEL: @test_atomic_fmax_f64
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.atomic.fmax" {{.*}} : (!cir.double, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.double
+// LLVM-LABEL: define{{.*}} double @test_atomic_fmax_f64
+// LLVM: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+// OGCG-LABEL: define{{.*}} double @test_atomic_fmax_f64
+// OGCG: call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %{{.*}}, ptr addrspace(8) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 0)
+double test_atomic_fmax_f64(__amdgpu_buffer_rsrc_t rsrc, double x, int offset, int soffset) {
+  return __builtin_amdgcn_raw_ptr_buffer_atomic_fmax_f64(x, rsrc, offset, soffset, 0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-raw-buffer.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-raw-buffer.cl
new file mode 100644
index 0000000000000..90fac2330522e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-raw-buffer.cl
@@ -0,0 +1,143 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu verde -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu verde -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu verde -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test raw buffer load/store builtins
+//===----------------------------------------------------------------------===//
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned int v2u32 __attribute__((ext_vector_type(2)));
+typedef unsigned int v3u32 __attribute__((ext_vector_type(3)));
+typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
+
+// CIR-LABEL: @test_raw_buffer_store_b8
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u8i, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b8
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b8
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+void test_raw_buffer_store_b8(u8 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b8(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_store_b16
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u16i, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b16
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b16
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+void test_raw_buffer_store_b16(u16 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b16(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_store_b32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!u32i, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b32
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b32
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+void test_raw_buffer_store_b32(u32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b32(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_store_b64
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector<!u32i x 2>, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b64
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b64
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+void test_raw_buffer_store_b64(v2u32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b64(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_store_b96
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector<!u32i x 3>, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b96
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b96
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+void test_raw_buffer_store_b96(v3u32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b96(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_store_b128
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.store" {{.*}} : (!cir.vector<!u32i x 4>, !cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i)
+// LLVM-LABEL: define{{.*}} void @test_raw_buffer_store_b128
+// LLVM: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} void @test_raw_buffer_store_b128
+// OGCG: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %{{.*}}, ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+void test_raw_buffer_store_b128(v4u32 vdata, __amdgpu_buffer_rsrc_t rsrc) {
+  __builtin_amdgcn_raw_buffer_store_b128(vdata, rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_load_b8
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !u8i
+// LLVM-LABEL: define{{.*}} i8 @test_raw_buffer_load_b8
+// LLVM: call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} i8 @test_raw_buffer_load_b8
+// OGCG: call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+u8 test_raw_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc) {
+  return __builtin_amdgcn_raw_buffer_load_b8(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_load_b16
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !u16i
+// LLVM-LABEL: define{{.*}} i16 @test_raw_buffer_load_b16
+// LLVM: call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} i16 @test_raw_buffer_load_b16
+// OGCG: call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+u16 test_raw_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc) {
+  return __builtin_amdgcn_raw_buffer_load_b16(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_load_b32
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !u32i
+// LLVM-LABEL: define{{.*}} i32 @test_raw_buffer_load_b32
+// LLVM: call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} i32 @test_raw_buffer_load_b32
+// OGCG: call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+u32 test_raw_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc) {
+  return __builtin_amdgcn_raw_buffer_load_b32(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_load_b64
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.vector<!u32i x 2>
+// LLVM-LABEL: define{{.*}} <2 x i32> @test_raw_buffer_load_b64
+// LLVM: call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} <2 x i32> @test_raw_buffer_load_b64
+// OGCG: call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+v2u32 test_raw_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc) {
+  return __builtin_amdgcn_raw_buffer_load_b64(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_load_b96
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.vector<!u32i x 3>
+// LLVM-LABEL: define{{.*}} <3 x i32> @test_raw_buffer_load_b96
+// LLVM: call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} <3 x i32> @test_raw_buffer_load_b96
+// OGCG: call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+v3u32 test_raw_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc) {
+  return __builtin_amdgcn_raw_buffer_load_b96(rsrc, 0, 0, 0);
+}
+
+// CIR-LABEL: @test_raw_buffer_load_b128
+// CIR: cir.llvm.intrinsic "amdgcn.raw.ptr.buffer.load" {{.*}} : (!cir.ptr<!void, target_address_space(8)>, !s32i, !s32i, !s32i) -> !cir.vector<!u32i x 4>
+// LLVM-LABEL: define{{.*}} <4 x i32> @test_raw_buffer_load_b128
+// LLVM: call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+// OGCG-LABEL: define{{.*}} <4 x i32> @test_raw_buffer_load_b128
+// OGCG: call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %{{.*}}, i32 0, i32 0, i32 0)
+v4u32 test_raw_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc) {
+  return __builtin_amdgcn_raw_buffer_load_b128(rsrc, 0, 0, 0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-vi.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-vi.cl
new file mode 100644
index 0000000000000..e7479a24edfee
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins-amdgcn-vi.cl
@@ -0,0 +1,100 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu tonga -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx900 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1010 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1012 -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu tonga -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx900 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1010 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu gfx1012 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu tonga -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx900 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1010 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu gfx1012 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// CIR-LABEL: @test_div_fixup_f16
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.f16, !cir.f16, !cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @test_div_fixup_f16
+// LLVM: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}})
+// OGCG: define{{.*}} void @test_div_fixup_f16
+// OGCG: call{{.*}} half @llvm.amdgcn.div.fixup.f16(half %{{.+}}, half %{{.+}}, half %{{.+}})
+void test_div_fixup_f16(global half* out, half a, half b, half c) {
+  *out = __builtin_amdgcn_div_fixuph(a, b, c);
+}
+
+// CIR-LABEL: @test_rcp_f16
+// CIR: cir.llvm.intrinsic "amdgcn.rcp" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @test_rcp_f16
+// LLVM: call{{.*}} half @llvm.amdgcn.rcp.f16(half %{{.*}})
+// OGCG: define{{.*}} void @test_rcp_f16
+// OGCG: call{{.*}} half @llvm.amdgcn.rcp.f16(half %{{.*}})
+void test_rcp_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rcph(a);
+}
+
+// CIR-LABEL: @test_sqrt_f16
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @test_sqrt_f16
+// LLVM: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16(half %{{.*}})
+// OGCG: define{{.*}} void @test_sqrt_f16
+// OGCG: call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16(half %{{.*}})
+void test_sqrt_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_sqrth(a);
+}
+
+// CIR-LABEL: @test_rsq_f16
+// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.f16) -> !cir.f16
+// LLVM: define{{.*}} void @test_rsq_f16
+// LLVM: call{{.*}} half @llvm.amdgcn.rsq.f16(half %{{.*}})
+// OGCG: define{{.*}} void @test_rsq_f16
+// OGCG: call{{.*}} half @llvm.amdgcn.rsq.f16(half %{{.*}})
+void test_rsq_f16(global half* out, half a)
+{
+  *out = __builtin_amdgcn_rsqh(a);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins_amdgcn.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins_amdgcn.cl
new file mode 100644
index 0000000000000..4bf6ffe959282
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/builtins_amdgcn.cl
@@ -0,0 +1,442 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu tahiti -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \
+// RUN:            -target-cpu tahiti -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \
+// RUN:            -target-cpu tahiti -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+typedef unsigned long ulong;
+typedef unsigned int uint;
+
+// CIR-LABEL: @test_wave_reduce_add_u32
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @test_wave_reduce_add_u32(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_add_u32(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 0)
+void test_wave_reduce_add_u32(global int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_add_u32(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_add_u64
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u64i, !s32i) -> !u64i
+// LLVM: define{{.*}} void @test_wave_reduce_add_u64(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_add_u64(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %{{.*}}, i32 0)
+void test_wave_reduce_add_u64(global long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_add_u64(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_sub_u32
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.sub" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @test_wave_reduce_sub_u32(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_sub_u32(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %{{.*}}, i32 0)
+void test_wave_reduce_sub_u32(global int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_sub_u32(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_sub_u64
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.sub" {{.*}} : (!u64i, !s32i) -> !u64i
+// LLVM: define{{.*}} void @test_wave_reduce_sub_u64(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_sub_u64(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %{{.*}}, i32 0)
+void test_wave_reduce_sub_u64(global long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_sub_u64(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_min_i32
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.min" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @test_wave_reduce_min_i32(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_min_i32(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 %{{.*}}, i32 0)
+void test_wave_reduce_min_i32(global int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_min_i32(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_min_u32
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umin" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @test_wave_reduce_min_u32(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_min_u32(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %{{.*}}, i32 0)
+void test_wave_reduce_min_u32(global uint* out, uint in) {
+  *out = __builtin_amdgcn_wave_reduce_min_u32(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_min_i64
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.min" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @test_wave_reduce_min_i64(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_min_i64(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 %{{.*}}, i32 0)
+void test_wave_reduce_min_i64(global long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_min_i64(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_min_u64
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umin" {{.*}} : (!u64i, !s32i) -> !u64i
+// LLVM: define{{.*}} void @test_wave_reduce_min_u64(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_min_u64(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 %{{.*}}, i32 0)
+void test_wave_reduce_min_u64(global ulong* out, ulong in) {
+  *out = __builtin_amdgcn_wave_reduce_min_u64(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_max_i32
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.max" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @test_wave_reduce_max_i32(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_max_i32(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 %{{.*}}, i32 0)
+void test_wave_reduce_max_i32(global int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_max_i32(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_max_u32
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umax" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @test_wave_reduce_max_u32(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_max_u32(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %{{.*}}, i32 0)
+void test_wave_reduce_max_u32(global uint* out, uint in) {
+  *out = __builtin_amdgcn_wave_reduce_max_u32(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_max_i64
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.max" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @test_wave_reduce_max_i64(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_max_i64(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 %{{.*}}, i32 0)
+void test_wave_reduce_max_i64(global long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_max_i64(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_max_u64
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umax" {{.*}} : (!u64i, !s32i) -> !u64i
+// LLVM: define{{.*}} void @test_wave_reduce_max_u64(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_max_u64(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 %{{.*}}, i32 0)
+void test_wave_reduce_max_u64(global ulong* out, ulong in) {
+  *out = __builtin_amdgcn_wave_reduce_max_u64(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_and_b32
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.and" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @test_wave_reduce_and_b32(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_and_b32(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 %{{.*}}, i32 0)
+void test_wave_reduce_and_b32(global int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_and_b32(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_and_b64
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.and" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @test_wave_reduce_and_b64(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_and_b64(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 %{{.*}}, i32 0)
+void test_wave_reduce_and_b64(global long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_and_b64(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_or_b32
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.or" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @test_wave_reduce_or_b32(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_or_b32(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 %{{.*}}, i32 0)
+void test_wave_reduce_or_b32(global int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_or_b32(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_or_b64
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.or" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @test_wave_reduce_or_b64(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_or_b64(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 %{{.*}}, i32 0)
+void test_wave_reduce_or_b64(global long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_or_b64(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_xor_b32
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.xor" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @test_wave_reduce_xor_b32(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_xor_b32(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %{{.*}}, i32 0)
+void test_wave_reduce_xor_b32(global int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_xor_b64
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.xor" {{.*}} : (!s64i, !s32i) -> !s64i
+// LLVM: define{{.*}} void @test_wave_reduce_xor_b64(
+// LLVM: call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %{{.*}}, i32 0)
+// OGCG: define{{.*}} void @test_wave_reduce_xor_b64(
+// OGCG: call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %{{.*}}, i32 0)
+void test_wave_reduce_xor_b64(global long* out, long in) {
+  *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 0);
+}
+
+// CIR-LABEL: @test_wave_reduce_add_u32_iterative
+// CIR: cir.const #cir.int<1> : !s32i
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @test_wave_reduce_add_u32_iterative(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 1)
+// OGCG: define{{.*}} void @test_wave_reduce_add_u32_iterative(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 1)
+void test_wave_reduce_add_u32_iterative(global int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_add_u32(in, 1);
+}
+
+// CIR: cir.const #cir.int<2> : !s32i
+// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u32i, !s32i) -> !u32i
+// LLVM: define{{.*}} void @test_wave_reduce_add_u32_dpp(
+// LLVM: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 2)
+// OGCG: define{{.*}} void @test_wave_reduce_add_u32_dpp(
+// OGCG: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 2)
+void test_wave_reduce_add_u32_dpp(global int* out, int in) {
+  *out = __builtin_amdgcn_wave_reduce_add_u32(in, 2);
+}
+
+// CIR-LABEL: @test_div_scale_f64
+// CIR: cir.llvm.intrinsic "amdgcn.div.scale" {{.*}} : (!cir.double, !cir.double, !cir.bool)
+// LLVM: define{{.*}} void @test_div_scale_f64
+// LLVM: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %{{.+}}, double %{{.+}}, i1 true)
+// OGCG: define{{.*}} void @test_div_scale_f64
+// OGCG: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %{{.+}}, double %{{.+}}, i1 true)
+void test_div_scale_f64(global double* out, global int* flagout, double a, double b, global bool* flag)
+{
+  *out = __builtin_amdgcn_div_scale(a, b, true, flag);
+  *flagout = *flag;
+}
+
+// CIR-LABEL: @test_div_scale_f32
+// CIR: cir.llvm.intrinsic "amdgcn.div.scale" {{.*}} : (!cir.float, !cir.float, !cir.bool)
+// LLVM: define{{.*}} void @test_div_scale_f32
+// LLVM: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+// OGCG: define{{.*}} void @test_div_scale_f32
+// OGCG: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+void test_div_scale_f32(global float* out, global bool* flagout, float a, float b, global bool* flag)
+{
+  *out = __builtin_amdgcn_div_scalef(a, b, true, flag);
+  *flagout = *flag;
+}
+
+// CIR-LABEL: @test_div_scale_f32_global_ptr
+// CIR: cir.llvm.intrinsic "amdgcn.div.scale" {{.*}} : (!cir.float, !cir.float, !cir.bool)
+// LLVM: define{{.*}} void @test_div_scale_f32_global_ptr
+// LLVM: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+// OGCG: define{{.*}} void @test_div_scale_f32_global_ptr
+// OGCG: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+void test_div_scale_f32_global_ptr(global float* out, global int* flagout, float a, float b, global bool* flag)
+{
+  *out = __builtin_amdgcn_div_scalef(a, b, true, flag);
+}
+
+// CIR-LABEL: @test_div_scale_f32_generic_ptr
+// CIR: cir.llvm.intrinsic "amdgcn.div.scale" {{.*}} : (!cir.float, !cir.float, !cir.bool)
+// LLVM: define{{.*}} void @test_div_scale_f32_generic_ptr
+// LLVM: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+// OGCG: define{{.*}} void @test_div_scale_f32_generic_ptr
+// OGCG: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %{{.+}}, float %{{.+}}, i1 true)
+void test_div_scale_f32_generic_ptr(global float* out, global int* flagout, float a, float b, generic bool* flag)
+{
+  *out = __builtin_amdgcn_div_scalef(a, b, true, flag);
+}
+
+// CIR-LABEL: @test_div_fmas_f32
+// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.float, !cir.float, !cir.float, !cir.bool) -> !cir.float
+// LLVM: define{{.*}} void @test_div_fmas_f32
+// LLVM: call float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}})
+// OGCG: define{{.*}} void @test_div_fmas_f32
+// OGCG: call float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}})
+void test_div_fmas_f32(global float* out, float a, float b, float c, int d)
+{
+  *out = __builtin_amdgcn_div_fmasf(a, b, c, d);
+}
+
+// CIR-LABEL: @test_div_fmas_f64
+// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.double, !cir.double, !cir.double, !cir.bool) -> !cir.double
+// LLVM: define{{.*}} void @test_div_fmas_f64
+// LLVM: call double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}})
+// OGCG: define{{.*}} void @test_div_fmas_f64
+// OGCG: call double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}})
+void test_div_fmas_f64(global double* out, double a, double b, double c, int d)
+{
+  *out = __builtin_amdgcn_div_fmas(a, b, c, d);
+}
+
+// CIR-LABEL: @test_ds_swizzle
+// CIR: cir.llvm.intrinsic "amdgcn.ds.swizzle" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @test_ds_swizzle
+// LLVM: call i32 @llvm.amdgcn.ds.swizzle(i32 %{{.*}}, i32 32)
+// OGCG: define{{.*}} void @test_ds_swizzle
+// OGCG: call i32 @llvm.amdgcn.ds.swizzle(i32 %{{.*}}, i32 32)
+void test_ds_swizzle(global int* out, int a) {
+  *out = __builtin_amdgcn_ds_swizzle(a, 32);
+}
+
+// CIR-LABEL: @test_readlane
+// CIR: cir.llvm.intrinsic "amdgcn.readlane" {{.*}} : (!s32i, !s32i) -> !s32i
+// LLVM: define{{.*}} void @test_readlane
+// LLVM: call i32 @llvm.amdgcn.readlane.i32(i32 %{{.*}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @test_readlane
+// OGCG: call i32 @llvm.amdgcn.readlane.i32(i32 %{{.*}}, i32 %{{.*}})
+void test_readlane(global int* out, int a, int b) {
+  *out = __builtin_amdgcn_readlane(a, b);
+}
+
+// CIR-LABEL: @test_readfirstlane
+// CIR: cir.llvm.intrinsic "amdgcn.readfirstlane" {{.*}} : (!s32i) -> !s32i
+// LLVM: define{{.*}} void @test_readfirstlane
+// LLVM: call i32 @llvm.amdgcn.readfirstlane.i32(i32 %{{.*}})
+// OGCG: define{{.*}} void @test_readfirstlane
+// OGCG: call i32 @llvm.amdgcn.readfirstlane.i32(i32 %{{.*}})
+void test_readfirstlane(global int* out, int a) {
+  *out = __builtin_amdgcn_readfirstlane(a);
+}
+
+// CIR-LABEL: @test_div_fixup_f32
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.float, !cir.float, !cir.float) -> !cir.float
+// LLVM: define{{.*}} void @test_div_fixup_f32
+// LLVM: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}})
+// OGCG: define{{.*}} void @test_div_fixup_f32
+// OGCG: call{{.*}} float @llvm.amdgcn.div.fixup.f32(float %{{.+}}, float %{{.+}}, float %{{.+}})
+void test_div_fixup_f32(global float* out, float a, float b, float c)
+{
+  *out = __builtin_amdgcn_div_fixupf(a, b, c);
+}
+
+// CIR-LABEL: @test_div_fixup_f64
+// CIR: cir.llvm.intrinsic "amdgcn.div.fixup" {{.*}} : (!cir.double, !cir.double, !cir.double) -> !cir.double
+// LLVM: define{{.*}} void @test_div_fixup_f64
+// LLVM: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}})
+// OGCG: define{{.*}} void @test_div_fixup_f64
+// OGCG: call{{.*}} double @llvm.amdgcn.div.fixup.f64(double %{{.+}}, double %{{.+}}, double %{{.+}})
+void test_div_fixup_f64(global double* out, double a, double b, double c)
+{
+  *out = __builtin_amdgcn_div_fixup(a, b, c);
+}
+
+// CIR-LABEL: @test_trig_preop_f32
+// CIR: cir.llvm.intrinsic "amdgcn.trig.preop" {{.*}} : (!cir.float, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @test_trig_preop_f32
+// LLVM: call{{.*}} float @llvm.amdgcn.trig.preop.f32(float %{{.+}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @test_trig_preop_f32
+// OGCG: call{{.*}} float @llvm.amdgcn.trig.preop.f32(float %{{.+}}, i32 %{{.*}})
+void test_trig_preop_f32(global float* out, float a, int b) {
+  *out = __builtin_amdgcn_trig_preopf(a, b);
+}
+
+// CIR-LABEL: @test_trig_preop_f64
+// CIR: cir.llvm.intrinsic "amdgcn.trig.preop" {{.*}} : (!cir.double, !s32i) -> !cir.double
+// LLVM: define{{.*}} void @test_trig_preop_f64
+// LLVM: call{{.*}} double @llvm.amdgcn.trig.preop.f64(double %{{.+}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @test_trig_preop_f64
+// OGCG: call{{.*}} double @llvm.amdgcn.trig.preop.f64(double %{{.+}}, i32 %{{.*}})
+void test_trig_preop_f64(global double* out, double a, int b) {
+  *out = __builtin_amdgcn_trig_preop(a, b);
+}
+
+// CIR-LABEL: @test_rcp_f32
+// CIR: cir.llvm.intrinsic "amdgcn.rcp" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @test_rcp_f32
+// LLVM: call{{.*}} float @llvm.amdgcn.rcp.f32(float %{{.*}})
+// OGCG: define{{.*}} void @test_rcp_f32
+// OGCG: call{{.*}} float @llvm.amdgcn.rcp.f32(float %{{.*}})
+void test_rcp_f32(global float* out, float a) {
+  *out = __builtin_amdgcn_rcpf(a);
+}
+
+// CIR-LABEL: @test_rcp_f64
+// CIR: cir.llvm.intrinsic "amdgcn.rcp" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @test_rcp_f64
+// LLVM: call{{.*}} double @llvm.amdgcn.rcp.f64(double %{{.*}})
+// OGCG: define{{.*}} void @test_rcp_f64
+// OGCG: call{{.*}} double @llvm.amdgcn.rcp.f64(double %{{.*}})
+void test_rcp_f64(global double* out, double a) {
+  *out = __builtin_amdgcn_rcp(a);
+}
+
+// CIR-LABEL: @test_sqrt_f32
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @test_sqrt_f32
+// LLVM: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
+// OGCG: define{{.*}} void @test_sqrt_f32
+// OGCG: call{{.*}} float @llvm.amdgcn.sqrt.f32(float %{{.*}})
+void test_sqrt_f32(global float* out, float a) {
+  *out = __builtin_amdgcn_sqrtf(a);
+}
+
+// CIR-LABEL: @test_sqrt_f64
+// CIR: cir.llvm.intrinsic "amdgcn.sqrt" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @test_sqrt_f64
+// LLVM: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
+// OGCG: define{{.*}} void @test_sqrt_f64
+// OGCG: call{{.*}} double @llvm.amdgcn.sqrt.f64(double %{{.*}})
+void test_sqrt_f64(global double* out, double a) {
+  *out = __builtin_amdgcn_sqrt(a);
+}
+
+// CIR-LABEL: @test_rsq_f32
+// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @test_rsq_f32
+// LLVM: call{{.*}} float @llvm.amdgcn.rsq.f32(float %{{.*}})
+// OGCG: define{{.*}} void @test_rsq_f32
+// OGCG: call{{.*}} float @llvm.amdgcn.rsq.f32(float %{{.*}})
+void test_rsq_f32(global float* out, float a) {
+  *out = __builtin_amdgcn_rsqf(a);
+}
+
+// CIR-LABEL: @test_rsq_f64
+// CIR: cir.llvm.intrinsic "amdgcn.rsq" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @test_rsq_f64
+// LLVM: call{{.*}} double @llvm.amdgcn.rsq.f64(double %{{.*}})
+// OGCG: define{{.*}} void @test_rsq_f64
+// OGCG: call{{.*}} double @llvm.amdgcn.rsq.f64(double %{{.*}})
+void test_rsq_f64(global double* out, double a) {
+  *out = __builtin_amdgcn_rsq(a);
+}
+
+// CIR-LABEL: @test_rsq_clamp_f32
+// CIR: cir.llvm.intrinsic "amdgcn.rsq.clamp" {{.*}} : (!cir.float) -> !cir.float
+// LLVM: define{{.*}} void @test_rsq_clamp_f32
+// LLVM: call{{.*}} float @llvm.amdgcn.rsq.clamp.f32(float %{{.*}})
+// OGCG: define{{.*}} void @test_rsq_clamp_f32
+// OGCG: call{{.*}} float @llvm.amdgcn.rsq.clamp.f32(float %{{.*}})
+void test_rsq_clamp_f32(global float* out, float a) {
+  *out = __builtin_amdgcn_rsq_clampf(a);
+}
+
+// CIR-LABEL: @test_rsq_clamp_f64
+// CIR: cir.llvm.intrinsic "amdgcn.rsq.clamp" {{.*}} : (!cir.double) -> !cir.double
+// LLVM: define{{.*}} void @test_rsq_clamp_f64
+// LLVM: call{{.*}} double @llvm.amdgcn.rsq.clamp.f64(double %{{.*}})
+// OGCG: define{{.*}} void @test_rsq_clamp_f64
+// OGCG: call{{.*}} double @llvm.amdgcn.rsq.clamp.f64(double %{{.*}})
+void test_rsq_clamp_f64(global double* out, double a) {
+  *out = __builtin_amdgcn_rsq_clamp(a);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/cl-uniform-wg-size.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/cl-uniform-wg-size.cl
new file mode 100644
index 0000000000000..8a6e09f8676ef
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/cl-uniform-wg-size.cl
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL1.2 -o %t.cl12.cir %s
+// RUN: FileCheck %s -input-file=%t.cl12.cir -check-prefixes CIR,CIR-UNIFORM
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL2.0 -o %t.cl20.cir %s
+// RUN: FileCheck %s -input-file=%t.cl20.cir -check-prefixes CIR,CIR-NONUNIFORM
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o %t.cl20.uniform1.cir %s
+// RUN: FileCheck %s -input-file=%t.cl20.uniform1.cir -check-prefixes CIR,CIR-UNIFORM
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -O0 -cl-std=CL2.0 -foffload-uniform-block -o %t.cl20.uniform2.cir %s
+// RUN: FileCheck %s -input-file=%t.cl20.uniform2.cir -check-prefixes CIR,CIR-UNIFORM
+
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -fno-clangir-call-conv-lowering -O0 -cl-std=CL1.2 -o %t.cl12.ll %s
+// RUN: FileCheck %s -input-file=%t.cl12.ll -check-prefixes LLVM,LLVM-UNIFORM
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -fno-clangir-call-conv-lowering -O0 -cl-std=CL2.0 -o %t.cl20.ll %s
+// RUN: FileCheck %s -input-file=%t.cl20.ll -check-prefixes LLVM,LLVM-NONUNIFORM
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -fno-clangir-call-conv-lowering -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o %t.cl20.uniform1.ll %s
+// RUN: FileCheck %s -input-file=%t.cl20.uniform1.ll -check-prefixes LLVM,LLVM-UNIFORM
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -fno-clangir-call-conv-lowering -O0 -cl-std=CL2.0 -foffload-uniform-block -o %t.cl20.uniform2.ll %s
+// RUN: FileCheck %s -input-file=%t.cl20.uniform2.ll -check-prefixes LLVM,LLVM-UNIFORM
+
+// CIR-LABEL: #fn_attr =
+// CIR: cl.kernel = #cir.cl.kernel
+// CIR-UNIFORM: cl.uniform_work_group_size = #cir.cl.uniform_work_group_size
+// CIR-NONUNIFORM-NOT: cl.uniform_work_group_size = #cir.cl.uniform_work_group_size
+
+// CIR-LABEL: #fn_attr1 =
+// CIR-NOT: cl.kernel = #cir.cl.kernel
+// CIR-NOT: cl.uniform_work_group_size
+
+kernel void ker() {};
+// CIR: cir.func{{.*}} @ker{{.*}} extra(#fn_attr)
+// LLVM: define{{.*}}@ker() #0
+
+void foo() {};
+// CIR: cir.func{{.*}} @foo{{.*}} extra(#fn_attr1)
+// LLVM: define{{.*}}@foo() #1
+
+// LLVM-LABEL: attributes #0
+// LLVM-UNIFORM: "uniform-work-group-size"="true"
+// LLVM-NONUNIFORM: "uniform-work-group-size"="false"
+
+// LLVM-LABEL: attributes #1
+// LLVM-NOT: uniform-work-group-size
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/convergent.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/convergent.cl
new file mode 100644
index 0000000000000..f999736770320
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/convergent.cl
@@ -0,0 +1,105 @@
+// RUN: %clang_cc1 -fclangir -triple spirv64-unknown-unknown -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -fclangir -triple spirv64-unknown-unknown -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+// In ClangIR for OpenCL, all functions should be marked convergent.
+// In LLVM IR, it is initially assumed convergent, but can be deduced to not require it.
+
+// CIR: #fn_attr = #cir<extra({convergent = #cir.convergent
+
+__attribute__((noinline))
+void non_convfun(void) {
+  volatile int* p;
+  *p = 0;
+}
+// CIR: cir.func no_inline @non_convfun(){{.*}} extra(#fn_attr)
+// LLVM: define{{.*}} spir_func void @non_convfun() local_unnamed_addr #[[NON_CONV_ATTR:[0-9]+]]
+// LLVM: ret void
+
+// External functions should be assumed convergent.
+void f(void);
+// CIR: cir.func{{.+}} @f(){{.*}} extra(#fn_attr)
+// LLVM: declare spir_func void @f() local_unnamed_addr #[[CONV_ATTR:[0-9]+]]
+void g(void);
+// CIR: cir.func{{.+}} @g(){{.*}} extra(#fn_attr)
+// LLVM: declare spir_func void @g() local_unnamed_addr #[[CONV_ATTR]]
+
+// Test two if's are merged and non_convfun duplicated.
+void test_merge_if(int a) {
+  if (a) {
+    f();
+  }
+  non_convfun();
+  if (a) {
+    g();
+  }
+}
+// CIR: cir.func{{.*}} @test_merge_if{{.*}} extra(#fn_attr)
+
+// The LLVM IR below is equivalent to:
+//    if (a) {
+//      f();
+//      non_convfun();
+//      g();
+//    } else {
+//      non_convfun();
+//    }
+
+// LLVM-LABEL: define{{.*}} spir_func void @test_merge_if
+// LLVM:         %[[tobool:.+]] = icmp eq i32 %[[ARG:.+]], 0
+// LLVM:         br i1 %[[tobool]], label %[[if_end3_critedge:[^,]+]], label %[[if_then:[^,]+]]
+
+// LLVM:       [[if_end3_critedge]]:
+// LLVM:         tail call spir_func void @non_convfun()
+// LLVM:         br label %[[if_end3:[^,]+]]
+
+// LLVM:       [[if_then]]:
+// LLVM:         tail call spir_func void @f()
+// LLVM:         tail call spir_func void @non_convfun()
+// LLVM:         tail call spir_func void @g()
+
+// LLVM:         br label %[[if_end3]]
+
+// LLVM:       [[if_end3]]:
+// LLVM:         ret void
+
+
+void convfun(void) __attribute__((convergent));
+// CIR: cir.func{{.+}} @convfun(){{.*}} extra(#fn_attr)
+// LLVM: declare spir_func void @convfun() local_unnamed_addr #[[CONV_ATTR]]
+
+// Test two if's are not merged.
+void test_no_merge_if(int a) {
+  if (a) {
+    f();
+  }
+  convfun();
+  if(a) {
+    g();
+  }
+}
+// CIR: cir.func{{.*}} @test_no_merge_if{{.*}} extra(#fn_attr)
+
+// LLVM-LABEL: define{{.*}} spir_func void @test_no_merge_if
+// LLVM:         %[[tobool:.+]] = icmp eq i32 %[[ARG:.+]], 0
+// LLVM:         br i1 %[[tobool]], label %[[if_end:[^,]+]], label %[[if_then:[^,]+]]
+// LLVM:       [[if_then]]:
+// LLVM:         tail call spir_func void @f()
+// LLVM-NOT:     call spir_func void @convfun()
+// LLVM-NOT:     call spir_func void @g()
+// LLVM:         br label %[[if_end]]
+// LLVM:       [[if_end]]:
+// LLVM-NOT:     phi i1
+// LLVM:         tail call spir_func void @convfun()
+// LLVM:         br i1 %[[tobool]], label %[[if_end3:[^,]+]], label %[[if_then2:[^,]+]]
+// LLVM:       [[if_then2]]:
+// LLVM:         tail call spir_func void @g()
+// LLVM:         br label %[[if_end3:[^,]+]]
+// LLVM:       [[if_end3]]:
+// LLVM:         ret void
+
+
+// LLVM attribute definitions.
+// LLVM-NOT: attributes #[[NON_CONV_ATTR]] = { {{.*}}convergent{{.*}} }
+// LLVM:     attributes #[[CONV_ATTR]] = { {{.*}}convergent{{.*}} }
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/elemwise-ops.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/elemwise-ops.cl
new file mode 100644
index 0000000000000..0d4d112445261
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/elemwise-ops.cl
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -fclangir -emit-cir -triple spirv64-unknown-unknown -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=CIR
+
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -fclangir -emit-llvm -triple spirv64-unknown-unknown -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -triple spirv64-unknown-unknown -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=OG-LLVM
+
+typedef __attribute__(( ext_vector_type(2) )) int int2;
+
+// CIR: %[[LHS:.*]] = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 2>
+// CIR: %[[WIDTH:.*]] = cir.const #cir.const_vector<[#cir.int<31> : !s32i, #cir.int<31> : !s32i]> : !cir.vector<!s32i x 2>
+// CIR: %[[MASK:.*]] = cir.binop(and, %[[LHS]], %[[WIDTH]]) : !cir.vector<!s32i x 2>
+// CIR: cir.shift(right, %{{.*}} : !cir.vector<!s32i x 2>, %[[MASK]] : !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+// LLVM: ashr <2 x i32> %{{.*}}, splat (i32 3)
+// OG-LLVM: ashr <2 x i32> %x, splat (i32 3)
+int2 shr(int2 x)
+{
+    return x >> 3;
+}
+
+// CIR: %[[LHS:.*]] = cir.const #cir.int<5> : !s32i
+// CIR: %[[WIDTH:.*]] = cir.const #cir.int<31> : !s32i
+// CIR: %[[MASK:.*]] = cir.binop(and, %[[LHS]], %[[WIDTH]]) : !s32i
+// CIR: cir.shift(left, %{{.*}} : !s32i, %[[MASK]] : !s32i) -> !s32i
+// LLVM: shl i16 %{{.*}}, 5
+// OG-LLVM: shl i16 %x, 5
+short shl(short x)
+{
+    return x << 5;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/global-var-with-ctor.clcpp b/clang/test/CIR/Incubator/CodeGen/OpenCL/global-var-with-ctor.clcpp
new file mode 100644
index 0000000000000..47b185360b9c4
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/global-var-with-ctor.clcpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -cl-std=clc++ -O0 -fclangir -emit-cir \
+// RUN:            -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -cl-std=clc++ -O0 -fclangir -emit-llvm \
+// RUN:            -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -cl-std=clc++ -O0 -emit-llvm \
+// RUN:            -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+struct S {
+  int x;
+  S() { x = 42; }
+};
+
+// Global variable in global address space - constructor expects generic AS
+__global S globalVar;
+
+kernel void foo() {
+  int y = globalVar.x;
+}
+
+// CIR: cir.global external lang_address_space(offload_global) @globalVar
+// CIR: cir.func internal private @__cxx_global_var_init()
+// CIR:   %[[GLOBAL:.*]] = cir.get_global @globalVar : !cir.ptr<!rec_S, lang_address_space(offload_global)>
+// CIR:   %[[ASCAST:.*]] = cir.cast address_space %[[GLOBAL]] : !cir.ptr<!rec_S, lang_address_space(offload_global)> -> !cir.ptr<{{.*}}, lang_address_space(offload_generic)>
+// CIR:   cir.call @_ZNU3AS41SC1Ev(%{{.*}}) : (!cir.ptr<!rec_S, lang_address_space(offload_generic)>)
+
+// LLVM: @globalVar = addrspace(1) global %struct.S zeroinitializer
+// LLVM: define internal void @__cxx_global_var_init()
+// LLVM:   call spir_func void @_ZNU3AS41SC1Ev(ptr addrspace(4) addrspacecast (ptr addrspace(1) @globalVar to ptr addrspace(4)))
+
+// OGCG: @globalVar = addrspace(1) global %struct.S zeroinitializer
+// OGCG: define internal spir_func void @__cxx_global_var_init()
+// OGCG:   call spir_func void @_ZNU3AS41SC1Ev(ptr addrspace(4) {{.*}}addrspacecast (ptr addrspace(1) @globalVar to ptr addrspace(4)))
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/global.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/global.cl
new file mode 100644
index 0000000000000..bb5afa8b7e9d2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/global.cl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+global int a = 13;
+// CIR-DAG: cir.global external lang_address_space(offload_global) @a = #cir.int<13> : !s32i
+// LLVM-DAG: @a = addrspace(1) global i32 13
+
+global int b = 15;
+// CIR-DAG: cir.global external lang_address_space(offload_global) @b = #cir.int<15> : !s32i
+// LLVM-DAG: @b = addrspace(1) global i32 15
+
+constant int c[2] = {18, 21};
+// CIR-DAG: cir.global constant {{.*}}lang_address_space(offload_constant) {{.*}}@c
+// LLVM-DAG: @c = addrspace(2) constant
+
+kernel void test_get_global() {
+  a = b;
+  // CIR:      %[[#ADDRB:]] = cir.get_global @b : !cir.ptr<!s32i, lang_address_space(offload_global)>
+  // CIR-NEXT: %[[#LOADB:]] = cir.load{{.*}} %[[#ADDRB]] : !cir.ptr<!s32i, lang_address_space(offload_global)>, !s32i
+  // CIR-NEXT: %[[#ADDRA:]] = cir.get_global @a : !cir.ptr<!s32i, lang_address_space(offload_global)>
+  // CIR-NEXT: cir.store{{.*}} %[[#LOADB]], %[[#ADDRA]] : !s32i, !cir.ptr<!s32i, lang_address_space(offload_global)>
+
+  // LLVM:      %[[#LOADB:]] = load i32, ptr addrspace(1) @b, align 4
+  // LLVM-NEXT: store i32 %[[#LOADB]], ptr addrspace(1) @a, align 4
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-info-single-as.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-info-single-as.cl
new file mode 100644
index 0000000000000..f07a2cc0c81e0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-info-single-as.cl
@@ -0,0 +1,14 @@
+// Test that the kernel argument info always refers to SPIR address spaces,
+// even if the target has only one address space like x86_64 does.
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-cir -o - -triple x86_64-unknown-linux-gnu -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-llvm -fno-clangir-call-conv-lowering -o - -triple x86_64-unknown-linux-gnu -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+kernel void foo(__global int * G, __constant int *C, __local int *L) {
+  *G = *C + *L;
+}
+// CIR: cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 2 : i32, 3 : i32]
+// LLVM: !kernel_arg_addr_space ![[MD123:[0-9]+]]
+// LLVM: ![[MD123]] = !{i32 1, i32 2, i32 3}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-info.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-info.cl
new file mode 100644
index 0000000000000..d38056efc031c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-info.cl
@@ -0,0 +1,90 @@
+// See also clang/test/CodeGenOpenCL/kernel-arg-info.cl
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-cir -o - -triple spirv64-unknown-unknown -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-cir -o - -triple spirv64-unknown-unknown -cl-kernel-arg-info -o %t.arginfo.cir
+// RUN: FileCheck %s --input-file=%t.arginfo.cir --check-prefix=CIR-ARGINFO
+
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-llvm -fno-clangir-call-conv-lowering -o - -triple spirv64-unknown-unknown -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+// RUN: %clang_cc1 -fclangir %s -cl-std=CL2.0 -emit-llvm -fno-clangir-call-conv-lowering -o - -triple spirv64-unknown-unknown -cl-kernel-arg-info -o %t.arginfo.ll
+// RUN: FileCheck %s --input-file=%t.arginfo.ll --check-prefix=LLVM-ARGINFO
+
+kernel void foo(global int * globalintp, global int * restrict globalintrestrictp,
+                global const int * globalconstintp,
+                global const int * restrict globalconstintrestrictp,
+                constant int * constantintp, constant int * restrict constantintrestrictp,
+                global const volatile int * globalconstvolatileintp,
+                global const volatile int * restrict globalconstvolatileintrestrictp,
+                global volatile int * globalvolatileintp,
+                global volatile int * restrict globalvolatileintrestrictp,
+                local int * localintp, local int * restrict localintrestrictp,
+                local const int * localconstintp,
+                local const int * restrict localconstintrestrictp,
+                local const volatile int * localconstvolatileintp,
+                local const volatile int * restrict localconstvolatileintrestrictp,
+                local volatile int * localvolatileintp,
+                local volatile int * restrict localvolatileintrestrictp,
+                int X, const int constint, const volatile int constvolatileint,
+                volatile int volatileint) {
+  *globalintrestrictp = constint + volatileint;
+}
+// CIR-DAG: #fn_attr[[KERNEL0:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 1 : i32, 1 : i32, 1 : i32, 2 : i32, 2 : i32, 1 : i32, 1 : i32, 1 : i32, 1 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32], access_qual = ["none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none"], type = ["int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int", "int", "int", "int"], base_type = ["int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int", "int", "int", "int"], type_qual = ["", "restrict", "const", "restrict const", "const", "restrict const", "const volatile", "restrict const volatile", "volatile", "restrict volatile", "", "restrict", "const", "restrict const", "const volatile", "restrict const volatile", "volatile", "restrict volatile", "", "", "", ""]>
+// CIR-DAG: cir.func{{.*}} @foo({{.+}}) extra(#fn_attr[[KERNEL0]])
+// CIR-ARGINFO-DAG: #fn_attr[[KERNEL0:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 1 : i32, 1 : i32, 1 : i32, 2 : i32, 2 : i32, 1 : i32, 1 : i32, 1 : i32, 1 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 3 : i32, 0 : i32, 0 : i32, 0 : i32, 0 : i32], access_qual = ["none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none"], type = ["int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int", "int", "int", "int"], base_type = ["int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int*", "int", "int", "int", "int"], type_qual = ["", "restrict", "const", "restrict const", "const", "restrict const", "const volatile", "restrict const volatile", "volatile", "restrict volatile", "", "restrict", "const", "restrict const", "const volatile", "restrict const volatile", "volatile", "restrict volatile", "", "", "", ""], name = ["globalintp", "globalintrestrictp", "globalconstintp", "globalconstintrestrictp", "constantintp", "constantintrestrictp", "globalconstvolatileintp", "globalconstvolatileintrestrictp", "globalvolatileintp", "globalvolatileintrestrictp", "localintp", "localintrestrictp", "localconstintp", "localconstintrestrictp", "localconstvolatileintp", "localconstvolatileintrestrictp", "localvolatileintp", "localvolatileintrestrictp", "X", "constint", "constvolatileint", "volatileint"]>
+// CIR-ARGINFO-DAG: cir.func{{.*}} @foo({{.+}}) extra(#fn_attr[[KERNEL0]])
+
+// LLVM-DAG: define{{.*}} void @foo{{.+}} !kernel_arg_addr_space ![[MD11:[0-9]+]] !kernel_arg_access_qual ![[MD12:[0-9]+]] !kernel_arg_type ![[MD13:[0-9]+]] !kernel_arg_base_type ![[MD13]] !kernel_arg_type_qual ![[MD14:[0-9]+]] {
+// LLVM-ARGINFO-DAG: define{{.*}} void @foo{{.+}} !kernel_arg_addr_space ![[MD11:[0-9]+]] !kernel_arg_access_qual ![[MD12:[0-9]+]] !kernel_arg_type ![[MD13:[0-9]+]] !kernel_arg_base_type ![[MD13]] !kernel_arg_type_qual ![[MD14:[0-9]+]] !kernel_arg_name ![[MD15:[0-9]+]] {
+
+// LLVM-DAG: ![[MD11]] = !{i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 1, i32 1, i32 1, i32 1, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 0, i32 0, i32 0, i32 0}
+// LLVM-DAG: ![[MD12]] = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"}
+// LLVM-DAG: ![[MD13]] = !{!"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int*", !"int", !"int", !"int", !"int"}
+// LLVM-DAG: ![[MD14]] = !{!"", !"restrict", !"const", !"restrict const", !"const", !"restrict const", !"const volatile", !"restrict const volatile", !"volatile", !"restrict volatile", !"", !"restrict", !"const", !"restrict const", !"const volatile", !"restrict const volatile", !"volatile", !"restrict volatile", !"", !"", !"", !""}
+// LLVM-ARGINFO-DAG: ![[MD15]] = !{!"globalintp", !"globalintrestrictp", !"globalconstintp", !"globalconstintrestrictp", !"constantintp", !"constantintrestrictp", !"globalconstvolatileintp", !"globalconstvolatileintrestrictp", !"globalvolatileintp", !"globalvolatileintrestrictp", !"localintp", !"localintrestrictp", !"localconstintp", !"localconstintrestrictp", !"localconstvolatileintp", !"localconstvolatileintrestrictp", !"localvolatileintp", !"localvolatileintrestrictp", !"X", !"constint", !"constvolatileint", !"volatileint"}
+
+typedef unsigned int myunsignedint;
+kernel void foo4(__global unsigned int * X, __global myunsignedint * Y) {
+}
+
+// CIR-DAG: #fn_attr[[KERNEL4:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 1 : i32], access_qual = ["none", "none"], type = ["uint*", "myunsignedint*"], base_type = ["uint*", "uint*"], type_qual = ["", ""]>
+// CIR-DAG: cir.func{{.*}} @foo4({{.+}}) extra(#fn_attr[[KERNEL4]])
+// CIR-ARGINFO-DAG: #fn_attr[[KERNEL4:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32, 1 : i32], access_qual = ["none", "none"], type = ["uint*", "myunsignedint*"], base_type = ["uint*", "uint*"], type_qual = ["", ""], name = ["X", "Y"]>
+// CIR-ARGINFO-DAG: cir.func{{.*}} @foo4({{.+}}) extra(#fn_attr[[KERNEL4]])
+
+// LLVM-DAG: define{{.*}} void @foo4{{.+}} !kernel_arg_addr_space ![[MD41:[0-9]+]] !kernel_arg_access_qual ![[MD42:[0-9]+]] !kernel_arg_type ![[MD43:[0-9]+]] !kernel_arg_base_type ![[MD44:[0-9]+]] !kernel_arg_type_qual ![[MD45:[0-9]+]] {
+// LLVM-ARGINFO-DAG: define{{.*}} void @foo4{{.+}} !kernel_arg_addr_space ![[MD41:[0-9]+]] !kernel_arg_access_qual ![[MD42:[0-9]+]] !kernel_arg_type ![[MD43:[0-9]+]] !kernel_arg_base_type ![[MD44:[0-9]+]] !kernel_arg_type_qual ![[MD45:[0-9]+]] !kernel_arg_name ![[MD46:[0-9]+]] {
+
+// LLVM-DAG: ![[MD41]] = !{i32 1, i32 1}
+// LLVM-DAG: ![[MD42]] = !{!"none", !"none"}
+// LLVM-DAG: ![[MD43]] = !{!"uint*", !"myunsignedint*"}
+// LLVM-DAG: ![[MD44]] = !{!"uint*", !"uint*"}
+// LLVM-DAG: ![[MD45]] = !{!"", !""}
+// LLVM-ARGINFO-DAG: ![[MD46]] = !{!"X", !"Y"}
+
+typedef char char16 __attribute__((ext_vector_type(16)));
+__kernel void foo6(__global char16 arg[]) {}
+
+// CIR-DAG: #fn_attr[[KERNEL6:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32], access_qual = ["none"], type = ["char16*"], base_type = ["char __attribute__((ext_vector_type(16)))*"], type_qual = [""]>
+// CIR-DAG: cir.func{{.*}} @foo6({{.+}}) extra(#fn_attr[[KERNEL6]])
+// CIR-ARGINFO-DAG: #fn_attr[[KERNEL6:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [1 : i32], access_qual = ["none"], type = ["char16*"], base_type = ["char __attribute__((ext_vector_type(16)))*"], type_qual = [""], name = ["arg"]>
+// CIR-ARGINFO-DAG: cir.func{{.*}} @foo6({{.+}}) extra(#fn_attr[[KERNEL6]])
+
+// LLVM-DAG: !kernel_arg_type ![[MD61:[0-9]+]]
+// LLVM-ARGINFO-DAG: !kernel_arg_name ![[MD62:[0-9]+]]
+// LLVM-DAG: ![[MD61]] = !{!"char16*"}
+// LLVM-ARGINFO-DAG: ![[MD62]] = !{!"arg"}
+
+kernel void foo9(signed char sc1,  global const signed char* sc2) {}
+
+// CIR-DAG: #fn_attr[[KERNEL9:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [0 : i32, 1 : i32], access_qual = ["none", "none"], type = ["char", "char*"], base_type = ["char", "char*"], type_qual = ["", "const"]>
+// CIR-DAG: cir.func{{.*}} @foo9({{.+}}) extra(#fn_attr[[KERNEL9]])
+// CIR-ARGINFO-DAG: #fn_attr[[KERNEL9:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [0 : i32, 1 : i32], access_qual = ["none", "none"], type = ["char", "char*"], base_type = ["char", "char*"], type_qual = ["", "const"], name = ["sc1", "sc2"]>
+// CIR-ARGINFO-DAG: cir.func{{.*}} @foo9({{.+}}) extra(#fn_attr[[KERNEL9]])
+
+// LLVM-DAG: define{{.*}} void @foo9{{.+}} !kernel_arg_addr_space ![[SCHAR_AS_QUAL:[0-9]+]] !kernel_arg_access_qual ![[MD42]] !kernel_arg_type ![[SCHAR_TY:[0-9]+]] !kernel_arg_base_type ![[SCHAR_TY]] !kernel_arg_type_qual ![[SCHAR_QUAL:[0-9]+]] {
+// LLVM-ARGINFO-DAG: define{{.*}} void @foo9{{.+}} !kernel_arg_addr_space ![[SCHAR_AS_QUAL:[0-9]+]] !kernel_arg_access_qual ![[MD42]] !kernel_arg_type ![[SCHAR_TY:[0-9]+]] !kernel_arg_base_type ![[SCHAR_TY]] !kernel_arg_type_qual ![[SCHAR_QUAL:[0-9]+]] !kernel_arg_name ![[SCHAR_ARG_NAMES:[0-9]+]] {
+
+// LLVM-DAG: ![[SCHAR_AS_QUAL]] = !{i32 0, i32 1}
+// LLVM-DAG: ![[SCHAR_TY]] = !{!"char", !"char*"}
+// LLVM-DAG: ![[SCHAR_QUAL]] = !{!"", !"const"}
+// LLVM-ARGINFO-DAG: ![[SCHAR_ARG_NAMES]] = !{!"sc1", !"sc2"}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-metadata.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-metadata.cl
new file mode 100644
index 0000000000000..2002c75936c32
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-arg-metadata.cl
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 %s -fclangir -triple spirv64-unknown-unknown -emit-cir -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 %s -fclangir -triple spirv64-unknown-unknown -emit-llvm -fno-clangir-call-conv-lowering -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+__kernel void kernel_function() {}
+
+// CIR: #fn_attr[[ATTR:[0-9]*]] = {{.+}}cl.kernel_arg_metadata = #cir.cl.kernel_arg_metadata<addr_space = [], access_qual = [], type = [], base_type = [], type_qual = []>{{.+}}
+// CIR: cir.func{{.*}} @kernel_function() cc(spir_kernel) extra(#fn_attr[[ATTR]])
+
+// LLVM: define {{.*}}spir_kernel void @kernel_function() {{[^{]+}} !kernel_arg_addr_space ![[MD:[0-9]+]] !kernel_arg_access_qual ![[MD]] !kernel_arg_type ![[MD]] !kernel_arg_base_type ![[MD]] !kernel_arg_type_qual ![[MD]] {
+// LLVM: ![[MD]] = !{}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-attributes.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-attributes.cl
new file mode 100644
index 0000000000000..9f1aa851e4e67
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-attributes.cl
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
+
+
+kernel  __attribute__((vec_type_hint(int))) __attribute__((reqd_work_group_size(1,2,4))) void kernel1(int a) {}
+
+// CIR-DAG: #fn_attr[[KERNEL1:[0-9]*]] = {{.+}}cl.kernel_metadata = #cir.cl.kernel_metadata<reqd_work_group_size = [1 : i32, 2 : i32, 4 : i32], vec_type_hint = !s32i, vec_type_hint_signedness = 1>{{.+}}
+// CIR-DAG: cir.func{{.*}} @kernel1{{.+}} extra(#fn_attr[[KERNEL1]])
+
+// LLVM-DAG: define {{(dso_local )?}}spir_kernel void @kernel1(i32 {{[^%]*}}%0) {{[^{]+}} !reqd_work_group_size ![[MD1_REQD_WG:[0-9]+]] !vec_type_hint ![[MD1_VEC_TYPE:[0-9]+]]
+// LLVM-DAG: [[MD1_VEC_TYPE]] = !{i32 undef, i32 1}
+// LLVM-DAG: [[MD1_REQD_WG]] = !{i32 1, i32 2, i32 4}
+
+
+kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {}
+
+// CIR-DAG: #fn_attr[[KERNEL2:[0-9]*]] = {{.+}}cl.kernel_metadata = #cir.cl.kernel_metadata<work_group_size_hint = [8 : i32, 16 : i32, 32 : i32], vec_type_hint = !cir.vector<!u32i x 4>, vec_type_hint_signedness = 0>{{.+}}
+// CIR-DAG: cir.func{{.*}} @kernel2{{.+}} extra(#fn_attr[[KERNEL2]])
+
+// LLVM-DAG: define {{(dso_local )?}}spir_kernel void @kernel2(i32 {{[^%]*}}%0) {{[^{]+}} !vec_type_hint ![[MD2_VEC_TYPE:[0-9]+]] !work_group_size_hint ![[MD2_WG_SIZE:[0-9]+]]
+// LLVM-DAG: [[MD2_VEC_TYPE]] = !{<4 x i32> undef, i32 0}
+// LLVM-DAG: [[MD2_WG_SIZE]] = !{i32 8, i32 16, i32 32}
+
+
+kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {}
+
+// CIR-DAG: #fn_attr[[KERNEL3:[0-9]*]] = {{.+}}cl.kernel_metadata = #cir.cl.kernel_metadata<intel_reqd_sub_group_size = 8 : i32>{{.+}}
+// CIR-DAG: cir.func{{.*}} @kernel3{{.+}} extra(#fn_attr[[KERNEL3]])
+
+// LLVM-DAG: define {{(dso_local )?}}spir_kernel void @kernel3(i32 {{[^%]*}}%0) {{[^{]+}} !intel_reqd_sub_group_size ![[MD3_INTEL:[0-9]+]]
+// LLVM-DAG: [[MD3_INTEL]] = !{i32 8}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-unit-attr.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-unit-attr.cl
new file mode 100644
index 0000000000000..d17ee35a49d9d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/kernel-unit-attr.cl
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+
+
+// CIR: #fn_attr[[KERNEL1:[0-9]*]] = {{.+}}cl.kernel = #cir.cl.kernel
+// CIR-NEXT: #fn_attr[[FUNC1:[0-9]*]] =
+// CIR-NOT: cl.kernel = #cir.cl.kernel
+
+kernel void kernel1() {}
+// CIR: cir.func{{.*}} @kernel1{{.+}} extra(#fn_attr[[KERNEL1]])
+
+void func1() {}
+
+// CIR: cir.func{{.*}} @func1{{.+}} extra(#fn_attr[[FUNC1]])
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/nothrow.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/nothrow.cl
new file mode 100644
index 0000000000000..a45cf51ef649b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/nothrow.cl
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-cir -o %t.cir %s
+// RUN: FileCheck %s -input-file=%t.cir -check-prefixes CIR
+// RUN: %clang_cc1 -fclangir -triple=spirv64-unknown-unknown -emit-llvm -fno-clangir-call-conv-lowering -o %t.ll %s
+// RUN: FileCheck %s -input-file=%t.ll -check-prefixes LLVM
+
+// CIR-LABEL: #fn_attr =
+// CIR: cl.kernel = #cir.cl.kernel
+// CIR: nothrow = #cir.nothrow
+
+// CIR-LABEL: #fn_attr1 =
+// CIR-NOT: cl.kernel = #cir.cl.kernel
+// CIR: nothrow = #cir.nothrow
+
+kernel void ker() {};
+// CIR: cir.func @ker{{.*}} extra(#fn_attr) {
+// LLVM: define{{.*}}@ker(){{.*}} #0
+
+void foo() {};
+// CIR: cir.func @foo{{.*}} extra(#fn_attr1) {
+// LLVM: define{{.*}}@foo(){{.*}} #1
+
+// LLVM-LABEL: attributes #0
+// LLVM: nounwind
+
+// LLVM-LABEL: attributes #1
+// LLVM: nounwind
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/null-vec.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/null-vec.cl
new file mode 100644
index 0000000000000..31868590c374d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/null-vec.cl
@@ -0,0 +1,25 @@
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -emit-cir -o - %s -fclangir | FileCheck %s --check-prefix=CIR
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -emit-llvm -o - %s -fclangir | FileCheck %s --check-prefix=LLVM
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -emit-llvm -o - %s | FileCheck %s --check-prefix=OG-LLVM
+
+typedef __attribute__(( ext_vector_type(2) )) unsigned int uint2;
+
+kernel void test_null_vec(uint2 in1, uint2 in2, local uint2 *out)
+{
+    uint2 tmp[2] = {0, 0};  // Vector of NULL vals
+
+    if (in1.s0 != 1)
+        tmp[0] = in1;
+    if (in2.s1 != 2)
+        tmp[1] = in2;
+    *out = tmp[0] + tmp[1];
+}
+
+// CIR: cir.const #cir.zero : !cir.array<!cir.vector<!u32i x 2> x 2>
+// CIR: cir.binop(add, %{{.*}}, %{{.*}}) : !cir.vector<!u32i x 2>
+// LLVM: [[S1:%.*]] = select i1 %{{.*}}, <2 x i32> zeroinitializer, <2 x i32>
+// LLVM: [[S2:%.*]] = select i1 %{{.*}}, <2 x i32> zeroinitializer, <2 x i32>
+// LLVM: add <2 x i32> [[S2]], [[S1]]
+// OG-LLVM: [[S1:%.*]] = select i1 %{{.*}}, <2 x i32> zeroinitializer, <2 x i32>
+// OG-LLVM: [[S2:%.*]] = select i1 %{{.*}}, <2 x i32> zeroinitializer, <2 x i32>
+// OG-LLVM: add <2 x i32> [[S2]], [[S1]]
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/opencl-c-lang.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/opencl-c-lang.cl
new file mode 100644
index 0000000000000..67aeda32c2a18
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/opencl-c-lang.cl
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+// CIR: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<opencl_c>
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/opencl-version.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/opencl-version.cl
new file mode 100644
index 0000000000000..f64cdb917ed0e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/opencl-version.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR-CL30
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM-CL30
+// RUN: %clang_cc1 -cl-std=CL1.2 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR-CL12
+// RUN: %clang_cc1 -cl-std=CL1.2 -O0 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM-CL12
+
+// CIR-CL30: module {{.*}} attributes {{{.*}}cir.cl.version = #cir.cl.version<3, 0>
+// LLVM-CL30: !opencl.ocl.version = !{![[MDCL30:[0-9]+]]}
+// LLVM-CL30: ![[MDCL30]] = !{i32 3, i32 0}
+
+// CIR-CL12: module {{.*}} attributes {{{.*}}cir.cl.version = #cir.cl.version<1, 2>
+// LLVM-CL12: !opencl.ocl.version = !{![[MDCL12:[0-9]+]]}
+// LLVM-CL12: ![[MDCL12]] = !{i32 1, i32 2}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/printf.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/printf.cl
new file mode 100644
index 0000000000000..5e803b64d2fc6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/printf.cl
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -fclangir -no-enable-noundef-analysis -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spirv64-unknown-unknown -disable-llvm-passes -emit-cir -fno-clangir-call-conv-lowering -o %t.12fp64.cir %s
+// RUN: FileCheck -input-file=%t.12fp64.cir -check-prefixes=CIR-FP64,CIR-ALL %s
+// RUN: %clang_cc1 -fclangir -no-enable-noundef-analysis -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spirv64-unknown-unknown -disable-llvm-passes -emit-cir -fno-clangir-call-conv-lowering -o %t.12nofp64.cir %s
+// RUN: FileCheck -input-file=%t.12nofp64.cir -check-prefixes=CIR-NOFP64,CIR-ALL %s
+// RUN: %clang_cc1 -fclangir -no-enable-noundef-analysis -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64,+cl_khr_fp64 -triple spirv64-unknown-unknown -disable-llvm-passes -emit-cir -fno-clangir-call-conv-lowering -o %t.30fp64.cir %s
+// RUN: FileCheck -input-file=%t.30fp64.cir -check-prefixes=CIR-FP64,CIR-ALL %s
+// RUN: %clang_cc1 -fclangir -no-enable-noundef-analysis -cl-std=CL3.0 -cl-ext=-__opencl_c_fp64,-cl_khr_fp64 -triple spirv64-unknown-unknown -disable-llvm-passes -emit-cir -fno-clangir-call-conv-lowering -o %t.30nofp64.cir %s
+// RUN: FileCheck -input-file=%t.30nofp64.cir -check-prefixes=CIR-NOFP64,CIR-ALL %s
+// RUN: %clang_cc1 -fclangir -no-enable-noundef-analysis -cl-std=CL1.2 -cl-ext=-+cl_khr_fp64 -triple spirv64-unknown-unknown -disable-llvm-passes -emit-llvm -fno-clangir-call-conv-lowering -o %t.12fp64.ll %s
+// RUN: FileCheck -input-file=%t.12fp64.ll -check-prefixes=LLVM-FP64,LLVM-ALL %s
+// RUN: %clang_cc1 -fclangir -no-enable-noundef-analysis -cl-std=CL1.2 -cl-ext=-cl_khr_fp64 -triple spirv64-unknown-unknown -disable-llvm-passes -emit-llvm -fno-clangir-call-conv-lowering -o %t.12nofp64.ll %s
+// RUN: FileCheck -input-file=%t.12nofp64.ll -check-prefixes=LLVM-NOFP64,LLVM-ALL %s
+// RUN: %clang_cc1 -fclangir -no-enable-noundef-analysis -cl-std=CL3.0 -cl-ext=+__opencl_c_fp64,+cl_khr_fp64 -triple spirv64-unknown-unknown -disable-llvm-passes -emit-llvm -fno-clangir-call-conv-lowering -o %t.30fp64.ll %s
+// RUN: FileCheck -input-file=%t.30fp64.ll -check-prefixes=LLVM-FP64,LLVM-ALL %s
+// RUN: %clang_cc1 -fclangir -no-enable-noundef-analysis -cl-std=CL3.0 -cl-ext=-__opencl_c_fp64,-cl_khr_fp64 -triple spirv64-unknown-unknown -disable-llvm-passes -emit-llvm -fno-clangir-call-conv-lowering -o %t.30nofp64.ll %s
+// RUN: FileCheck -input-file=%t.30nofp64.ll -check-prefixes=LLVM-NOFP64,LLVM-ALL %s
+
+typedef __attribute__((ext_vector_type(2))) float float2;
+typedef __attribute__((ext_vector_type(2))) half half2;
+
+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
+typedef __attribute__((ext_vector_type(2))) double double2;
+#endif
+
+int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
+
+kernel void test_printf_float2(float2 arg) {
+  printf("%v2hlf", arg);
+}
+// CIR-ALL-LABEL: @test_printf_float2(
+// CIR-FP64: %{{.+}} = cir.call @printf(%{{.+}}, %{{.+}}) : (!cir.ptr<!s8i, lang_address_space(offload_constant)>, !cir.vector<!cir.float x 2>) -> !s32i cc(spir_function)
+// CIR-NOFP64:%{{.+}} = cir.call @printf(%{{.+}}, %{{.+}}) : (!cir.ptr<!s8i, lang_address_space(offload_constant)>, !cir.vector<!cir.float x 2>) -> !s32i cc(spir_function)
+// LLVM-ALL-LABEL: @test_printf_float2(
+// LLVM-FP64: %{{.+}} = call spir_func i32 (ptr addrspace(2), ...) @{{.*}}printf{{.*}}(ptr addrspace(2) @.str, <2 x float> %{{.*}})
+// LLVM-NOFP64:  call spir_func i32 (ptr addrspace(2), ...) @{{.*}}printf{{.*}}(ptr addrspace(2) @.str, <2 x float> %{{.*}})
+
+kernel void test_printf_half2(half2 arg) {
+  printf("%v2hf", arg);
+}
+// CIR-ALL-LABEL: @test_printf_half2(
+// CIR-FP64: %{{.+}} = cir.call @printf(%{{.+}}, %{{.+}}) : (!cir.ptr<!s8i, lang_address_space(offload_constant)>, !cir.vector<!cir.f16 x 2>) -> !s32i cc(spir_function)
+// CIR-NOFP64:%{{.+}} = cir.call @printf(%{{.+}}, %{{.+}}) : (!cir.ptr<!s8i, lang_address_space(offload_constant)>, !cir.vector<!cir.f16 x 2>) -> !s32i cc(spir_function)
+// LLVM-ALL-LABEL: @test_printf_half2(
+// LLVM-FP64:  %{{.+}} = call spir_func i32 (ptr addrspace(2), ...) @{{.*}}printf{{.*}}(ptr addrspace(2) @.str.1, <2 x half> %{{.*}})
+// LLVM-NOFP64:  %{{.+}} = call spir_func i32 (ptr addrspace(2), ...) @{{.*}}printf{{.*}}(ptr addrspace(2) @.str.1, <2 x half> %{{.*}})
+
+#if defined(cl_khr_fp64) || defined(__opencl_c_fp64)
+kernel void test_printf_double2(double2 arg) {
+  printf("%v2lf", arg);
+}
+// CIR-FP64-LABEL: @test_printf_double2(
+// CIR-FP64: %{{.+}} = cir.call @printf(%{{.+}}, %{{.+}}) : (!cir.ptr<!s8i, lang_address_space(offload_constant)>, !cir.vector<!cir.double x 2>) -> !s32i cc(spir_function)
+// LLVM-FP64-LABEL: @test_printf_double2(
+// LLVM-FP64: call spir_func i32 (ptr addrspace(2), ...) @{{.*}}printf{{.*}}(ptr addrspace(2) @.str.2, <2 x double> %{{.*}})
+#endif
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/spir-calling-conv.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/spir-calling-conv.cl
new file mode 100644
index 0000000000000..3008f68d7cb17
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/spir-calling-conv.cl
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -fclangir %s -O0 -triple "spirv64-unknown-unknown" -emit-cir -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -fclangir %s -O0 -triple "spirv64-unknown-unknown" -emit-llvm -fno-clangir-call-conv-lowering -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+// CIR: cir.func{{.*}} @get_dummy_id{{.*}} cc(spir_function)
+// LLVM-DAG: declare{{.*}} spir_func i32 @get_dummy_id(
+int get_dummy_id(int D);
+
+// CIR: cir.func{{.*}} @foo{{.*}} cc(spir_kernel)
+// LLVM-DAG: define{{.*}} spir_kernel void @foo(
+kernel void foo(global int *A) {
+  int id = get_dummy_id(0);
+  // CIR: %{{[0-9]+}} = cir.call @get_dummy_id(%2) : (!s32i) -> !s32i cc(spir_function)
+  // LLVM: %{{[a-z0-9_]+}} = call spir_func i32 @get_dummy_id(
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/spirv-target.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/spirv-target.cl
new file mode 100644
index 0000000000000..ea9b7a1479846
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/spirv-target.cl
@@ -0,0 +1,30 @@
+// See also: clang/test/CodeGenOpenCL/spirv_target.cl
+// RUN: %clang_cc1 -cl-std=CL3.0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t_64.cir
+// RUN: FileCheck --input-file=%t_64.cir %s --check-prefix=CIR-SPIRV64
+// RUN: %clang_cc1 -cl-std=CL3.0 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -triple spirv64-unknown-unknown %s -o %t_64.ll
+// RUN: FileCheck --input-file=%t_64.ll %s --check-prefix=LLVM-SPIRV64
+
+
+// CIR-SPIRV64: cir.triple = "spirv64-unknown-unknown"
+// LLVM-SPIRV64: target triple = "spirv64-unknown-unknown"
+
+typedef struct {
+  char c;
+  void *v;
+  void *v2;
+} my_st;
+
+// CIR-SPIRV64: cir.func{{.*}} @func
+// LLVM-SPIRV64: define spir_kernel void @func
+kernel void func(global long *arg) {
+  int res1[sizeof(my_st)  == 24 ? 1 : -1]; // expected-no-diagnostics
+  int res2[sizeof(void *) ==  8 ? 1 : -1]; // expected-no-diagnostics
+  int res3[sizeof(arg)    ==  8 ? 1 : -1]; // expected-no-diagnostics
+
+  my_st *tmp = 0;
+
+  // LLVM-SPIRV64: store i64 8, ptr addrspace(1)
+  arg[0] = (long)(&tmp->v);
+  // LLVM-SPIRV64: store i64 16, ptr addrspace(1)
+  arg[1] = (long)(&tmp->v2);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/static-vardecl.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/static-vardecl.cl
new file mode 100644
index 0000000000000..0e3885dc138fa
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/static-vardecl.cl
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-cir -triple spirv64-unknown-unknown %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -cl-std=CL3.0 -O0 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -triple spirv64-unknown-unknown %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+kernel void test_static(int i) {
+  static global int b = 15;
+  // CIR-DAG: cir.global "private" internal dso_local lang_address_space(offload_global) @test_static.b = #cir.int<15> : !s32i {alignment = 4 : i64}
+  // LLVM-DAG: @test_static.b = internal addrspace(1) global i32 15
+
+  local int c;
+  // CIR-DAG: cir.global "private" internal dso_local lang_address_space(offload_local) @test_static.c : !s32i {alignment = 4 : i64}
+  // LLVM-DAG: @test_static.c = internal addrspace(3) global i32 undef
+
+  // CIR-DAG: %[[#ADDRB:]] = cir.get_global @test_static.b : !cir.ptr<!s32i, lang_address_space(offload_global)>
+  // CIR-DAG: %[[#ADDRC:]] = cir.get_global @test_static.c : !cir.ptr<!s32i, lang_address_space(offload_local)>
+
+  c = b;
+  // CIR:      %[[#LOADB:]] = cir.load{{.*}} %[[#ADDRB]] : !cir.ptr<!s32i, lang_address_space(offload_global)>, !s32i
+  // CIR-NEXT: cir.store{{.*}} %[[#LOADB]], %[[#ADDRC]] : !s32i, !cir.ptr<!s32i, lang_address_space(offload_local)>
+
+  // LLVM:     %[[#LOADB:]] = load i32, ptr addrspace(1) @test_static.b, align 4
+  // LLVM-NEXT: store i32 %[[#LOADB]], ptr addrspace(3) @test_static.c, align 4
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/str_literals.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/str_literals.cl
new file mode 100644
index 0000000000000..81001b155d791
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/str_literals.cl
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 %s -fclangir -triple=spirv64-unknown-unknown -cl-opt-disable -emit-cir -o %t.cir -ffake-address-space-map
+// RUN: FileCheck -input-file=%t.cir -check-prefix=CIR %s
+// RUN: %clang_cc1 %s -fclangir -triple=spirv64-unknown-unknown -cl-opt-disable -emit-llvm -o %t.ll -ffake-address-space-map
+// RUN: FileCheck -input-file=%t.ll -check-prefix=LLVM %s
+
+__constant char *__constant x = "hello world";
+__constant char *__constant y = "hello world";
+
+// CIR: cir.global{{.*}} constant {{.*}}lang_address_space(offload_constant) @".str" = #cir.const_array<"hello world\00" : !cir.array<!s8i x 12>> : !cir.array<!s8i x 12>
+// CIR: cir.global{{.*}} constant {{.*}}lang_address_space(offload_constant) @x = #cir.global_view<@".str"> : !cir.ptr<!s8i, lang_address_space(offload_constant)>
+// CIR: cir.global{{.*}} constant {{.*}}lang_address_space(offload_constant) @y = #cir.global_view<@".str"> : !cir.ptr<!s8i, lang_address_space(offload_constant)>
+// CIR: cir.global{{.*}} constant {{.*}}lang_address_space(offload_constant) @".str.1" = #cir.const_array<"f\00" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
+// LLVM: addrspace(2) constant{{.*}}"hello world\00"
+// LLVM-NOT: addrspace(2) constant
+// LLVM: @x = {{(dso_local )?}}addrspace(2) constant ptr addrspace(2)
+// LLVM: @y = {{(dso_local )?}}addrspace(2) constant ptr addrspace(2)
+// LLVM: addrspace(2) constant{{.*}}"f\00"
+
+void f() {
+  // CIR: cir.store{{.*}} %{{.*}}, %{{.*}} : !cir.ptr<!s8i, lang_address_space(offload_constant)>, !cir.ptr<!cir.ptr<!s8i, lang_address_space(offload_constant)>, lang_address_space(offload_private)>
+  // LLVM: store ptr addrspace(2) {{.*}}, ptr
+  constant const char *f3 = __func__;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/vec_initializer.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/vec_initializer.cl
new file mode 100644
index 0000000000000..e230b9dce91a3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/vec_initializer.cl
@@ -0,0 +1,41 @@
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-cir -fclangir -o - %s | FileCheck %s --check-prefix=CIR
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O2 -emit-llvm -fclangir -o - %s | FileCheck %s --check-prefix=LLVM
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O2 -emit-llvm -o - %s | FileCheck %s --check-prefix=OG-LLVM
+
+int test_scalar(int val, char n) {
+  return val >> (n & 0x1f);
+}
+
+int2 test_vec2(int2 val, char2 n) {
+  return (int2)(test_scalar(val.x, n.x), test_scalar(val.y, n.y));
+}
+
+int3 test_vec3(int3 val, char3 n) {
+  return (int3)(test_vec2(val.xy, n.xy), test_scalar(val.z, n.z));
+}
+
+// CIR-LABEL: cir.func no_inline optnone @test_vec3
+// CIR: %[[IDX0:.*]] = cir.const #cir.int<0> : !u32i
+// CIR: %[[E0:.*]] = cir.vec.extract %{{.*}}[%[[IDX0]] : !u32i] : !cir.vector<!s32i x 2>
+// CIR: %[[IDX1:.*]] = cir.const #cir.int<1> : !u32i
+// CIR: %[[E1:.*]] = cir.vec.extract %{{.*}}[%[[IDX1]] : !u32i] : !cir.vector<!s32i x 2>
+// CIR: %[[V3:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<!s32i x 3>{{.*}}>, !cir.vector<!s32i x 3>
+// CIR: %[[IDX2:.*]] = cir.const #cir.int<2> : !s64i
+// CIR: %[[VAL2:.*]] = cir.vec.extract %[[V3]]
+// CIR: %[[N3:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<!s8i x 3>{{.*}}>, !cir.vector<!s8i x 3>
+// CIR: %[[NIDX2:.*]] = cir.const #cir.int<2> : !s64i
+// CIR: %[[NVAL2:.*]] = cir.vec.extract %[[N3]]
+// CIR: %[[SCALAR:.*]] = cir.call @test_scalar(%[[VAL2]], %[[NVAL2]])
+// CIR: cir.vec.create(%[[E0]], %[[E1]], %[[SCALAR]] : !s32i, !s32i, !s32i) : !cir.vector<!s32i x 3>
+
+// LLVM-LABEL: define spir_func <3 x i32> @test_vec3
+// LLVM: %[[V0:.*]] = insertelement <3 x i32> poison, i32 %{{.*}}, i64 0
+// LLVM: %[[V1:.*]] = insertelement <3 x i32> %[[V0]], i32 %{{.*}}, i64 1
+// LLVM: %[[V2:.*]] = insertelement <3 x i32> %[[V1]], i32 %{{.*}}, i64 2
+// LLVM: ret <3 x i32> %[[V2]]
+
+// OG-LLVM-LABEL: define spir_func <3 x i32> @test_vec3
+// OG-LLVM: %[[V0:.*]] = insertelement <3 x i32> poison, i32 %{{.*}}, i64 0
+// OG-LLVM: %[[V1:.*]] = insertelement <3 x i32> %[[V0]], i32 %{{.*}}, i64 1
+// OG-LLVM: %[[V2:.*]] = insertelement <3 x i32> %[[V1]], i32 %{{.*}}, i64 2
+// OG-LLVM: ret <3 x i32> %[[V2]]
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/vec_logic.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/vec_logic.cl
new file mode 100644
index 0000000000000..01c7b47aeeac9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/vec_logic.cl
@@ -0,0 +1,39 @@
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-cir -fclangir -o - %s | FileCheck %s --check-prefix=CIR
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-llvm -fclangir -o - %s | FileCheck %s --check-prefix=LLVM
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OG-LLVM
+
+kernel void test(char4 in1, char4 in2, local char4 *out)
+{
+    *out = (in1 == (char4)3 && (in1 == (char4)5 || in2 == (char4)7))
+            ? in1 : in2;
+}
+
+
+// CIR: [[ZERO:%.*]] = cir.const #cir.zero : !cir.vector<!s8i x 4>
+// CIR: [[CMP1:%.*]] = cir.vec.cmp(ne, %{{.*}}, [[ZERO]]) : !cir.vector<!s8i x 4>, !cir.vector<!cir.bool x 4>
+// CIR: [[CMP2:%.*]] = cir.vec.cmp(ne, %{{.*}}, [[ZERO]]) : !cir.vector<!s8i x 4>, !cir.vector<!cir.bool x 4>
+// CIR: [[OR:%.*]] = cir.binop(or, [[CMP1]], [[CMP2]]) : !cir.vector<!cir.bool x 4>
+// CIR: [[CAST1:%.*]] = cir.cast bool_to_int [[OR]] : !cir.vector<!cir.bool x 4> -> !cir.vector<!s8i x 4>
+// CIR: [[ZERO2:%.*]] = cir.const #cir.zero : !cir.vector<!s8i x 4>
+// CIR: [[CMP3:%.*]] = cir.vec.cmp(ne, %{{.*}}, [[ZERO2]]) : !cir.vector<!s8i x 4>, !cir.vector<!cir.bool x 4>
+// CIR: [[CMP4:%.*]] = cir.vec.cmp(ne, [[CAST1]], [[ZERO2]]) : !cir.vector<!s8i x 4>, !cir.vector<!cir.bool x 4>
+// CIR: [[AND:%.*]] = cir.binop(and, [[CMP3]], [[CMP4]]) : !cir.vector<!cir.bool x 4>
+// CIR: cir.cast bool_to_int [[AND]] : !cir.vector<!cir.bool x 4> -> !cir.vector<!s8i x 4>
+
+// LLVM: [[CMP1:%.*]] = icmp ne <4 x i8> %{{.*}}, zeroinitializer
+// LLVM: [[CMP2:%.*]] = icmp ne <4 x i8> %{{.*}}, zeroinitializer
+// LLVM: [[OR:%.*]] = or <4 x i1> [[CMP1]], [[CMP2]]
+// LLVM: [[SEXT:%.*]] = sext <4 x i1> [[OR]] to <4 x i8>
+// LLVM: [[CMP3:%.*]] = icmp ne <4 x i8> %{{.*}}, zeroinitializer
+// LLVM: [[CMP4:%.*]] = icmp ne <4 x i8> [[SEXT]], zeroinitializer
+// LLVM: [[AND:%.*]] = and <4 x i1> [[CMP3]], [[CMP4]]
+// LLVM: sext <4 x i1> [[AND]] to <4 x i8>
+
+// OG-LLVM: [[CMP1:%.*]] = icmp ne <4 x i8> %{{.*}}, zeroinitializer
+// OG-LLVM: [[CMP2:%.*]] = icmp ne <4 x i8> %{{.*}}, zeroinitializer
+// OG-LLVM: [[OR:%.*]] = or <4 x i1> [[CMP1]], [[CMP2]]
+// OG-LLVM: [[SEXT:%.*]] = sext <4 x i1> [[OR]] to <4 x i8>
+// OG-LLVM: [[CMP3:%.*]] = icmp ne <4 x i8> %{{.*}}, zeroinitializer
+// OG-LLVM: [[CMP4:%.*]] = icmp ne <4 x i8> [[SEXT]], zeroinitializer
+// OG-LLVM: [[AND:%.*]] = and <4 x i1> [[CMP3]], [[CMP4]]
+// OG-LLVM: sext <4 x i1> [[AND]] to <4 x i8>
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenCL/vec_widening.cl b/clang/test/CIR/Incubator/CodeGen/OpenCL/vec_widening.cl
new file mode 100644
index 0000000000000..75ee6e8f7ca55
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenCL/vec_widening.cl
@@ -0,0 +1,24 @@
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-cir -fclangir -o - %s | FileCheck %s --check-prefix=CIR
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-llvm -fclangir -o - %s | FileCheck %s --check-prefix=LLVM
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OG-LLVM
+
+kernel void vec_widening(local const short3 *l_in, local short3 *l_out)
+{
+    *l_out = *l_in + (short3)1;
+}
+
+// CIR: [[PTR:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.vector<!s16i x 4>, lang_address_space(offload_local)>, !cir.vector<!s16i x 4>
+// CIR: [[POISON:%.*]] = cir.const #cir.poison : !cir.vector<!s16i x 4>
+// CIR: [[SHUFFLE:%.*]] = cir.vec.shuffle([[PTR]], [[POISON]] : !cir.vector<!s16i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s16i x 3>
+// CIR: [[ONE_I32:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR: [[ONE_I16:%.*]] = cir.cast integral [[ONE_I32]] : !s32i -> !s16i
+// CIR: [[SPLAT:%.*]] = cir.vec.splat [[ONE_I16]] : !s16i, !cir.vector<!s16i x 3>
+// CIR: cir.binop(add, [[SHUFFLE]], [[SPLAT]]) : !cir.vector<!s16i x 3>
+
+// LLVM: [[LOAD:%.*]] = load <4 x i16>, ptr addrspace(3) %{{.*}}, align 8
+// LLVM: [[SHUF:%.*]] = shufflevector <4 x i16> [[LOAD]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// LLVM: add <3 x i16> [[SHUF]], splat (i16 1)
+
+// OG-LLVM: [[LOAD:%.*]] = load <4 x i16>, ptr addrspace(3) %{{.*}}, align 8
+// OG-LLVM: [[SHUF:%.*]] = shufflevector <4 x i16> [[LOAD]], <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
+// OG-LLVM: add <3 x i16> [[SHUF]], splat (i16 1)
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenMP/barrier.cpp b/clang/test/CIR/Incubator/CodeGen/OpenMP/barrier.cpp
new file mode 100644
index 0000000000000..b93016a3f1e41
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenMP/barrier.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fopenmp-enable-irbuilder -fopenmp -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func
+void omp_barrier_1(){
+// CHECK: omp.barrier
+  #pragma omp barrier
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenMP/parallel.cpp b/clang/test/CIR/Incubator/CodeGen/OpenMP/parallel.cpp
new file mode 100644
index 0000000000000..7b5d603cf979c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenMP/parallel.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fopenmp -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func
+void omp_parallel_1() {
+// CHECK: omp.parallel {
+// CHECK-NEXT: omp.terminator
+// CHECK-NEXT: }
+#pragma omp parallel
+{
+}
+}
+// CHECK: cir.func
+void omp_parallel_2() {
+// CHECK: %[[YVarDecl:.+]] = {{.*}} ["y", init]
+// CHECK: omp.parallel {
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[XVarDecl:.+]] = {{.*}} ["x", init]
+// CHECK-NEXT: %[[C1:.+]] = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT: cir.store{{.*}} %[[C1]], %[[XVarDecl]]
+// CHECK-NEXT: %[[XVal:.+]] = cir.load {{.*}} %[[XVarDecl]]
+// CHECK-NEXT: %[[COne:.+]] = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT: %[[BinOpVal:.+]] = cir.binop(add, %[[XVal]], %[[COne]])
+// CHECK-NEXT: cir.store{{.*}} %[[BinOpVal]], %[[YVarDecl]]
+// CHECK-NEXT: }
+// CHECK-NEXT: omp.terminator
+// CHECK-NEXT: }
+  int y = 0;
+#pragma omp parallel
+{
+  int x = 1;
+  y = x + 1;
+}
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenMP/taskwait.cpp b/clang/test/CIR/Incubator/CodeGen/OpenMP/taskwait.cpp
new file mode 100644
index 0000000000000..3b2059a8b9655
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenMP/taskwait.cpp
@@ -0,0 +1,9 @@
+// TODO: fix crash in emitTaskWaitCall
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fopenmp-enable-irbuilder -fopenmp -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func
+void omp_taskwait_1(){
+// CHECK-DISABLE: omp.taskwait
+//  #pragma omp taskwait
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/OpenMP/taskyield.cpp b/clang/test/CIR/Incubator/CodeGen/OpenMP/taskyield.cpp
new file mode 100644
index 0000000000000..aa2903c07f740
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/OpenMP/taskyield.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fopenmp-enable-irbuilder -fopenmp -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func
+void omp_taskyield_1(){
+// CHECK: omp.taskyield
+  #pragma omp taskyield
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/String.cpp b/clang/test/CIR/Incubator/CodeGen/String.cpp
new file mode 100644
index 0000000000000..dd24291e36227
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/String.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+class String {
+  char *storage{nullptr};
+  long size;
+  long capacity;
+
+public:
+  String() : size{0} {}
+  String(int size) : size{size} {}
+  String(const char *s) {}
+};
+
+void test() {
+  String s1{};
+  String s2{1};
+  String s3{"abcdefghijklmnop"};
+}
+
+//      CHECK: cir.func {{.*}} @_ZN6StringC2Ev
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!rec_String>
+// CHECK-NEXT:   cir.store{{.*}} %arg0, %0
+// CHECK-NEXT:   %1 = cir.load{{.*}} %0
+// CHECK-NEXT:   %2 = cir.get_member %1[0] {name = "storage"}
+// CHECK-NEXT:   %3 = cir.const #cir.ptr<null> : !cir.ptr<!s8i>
+// CHECK-NEXT:   cir.store{{.*}} %3, %2 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %4 = cir.get_member %1[1] {name = "size"} : !cir.ptr<!rec_String> -> !cir.ptr<!s64i>
+// CHECK-NEXT:   %5 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   %6 = cir.cast integral %5 : !s32i -> !s64i
+// CHECK-NEXT:   cir.store{{.*}} %6, %4 : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+//      CHECK: cir.func {{.*}} @_ZN6StringC2Ei
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!rec_String>
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["size", init]
+// CHECK-NEXT:   cir.store{{.*}} %arg0, %0
+// CHECK-NEXT:   cir.store{{.*}} %arg1, %1
+// CHECK-NEXT:   %2 = cir.load{{.*}} %0
+// CHECK-NEXT:   %3 = cir.get_member %2[0] {name = "storage"}
+// CHECK-NEXT:   %4 = cir.const #cir.ptr<null> : !cir.ptr<!s8i>
+// CHECK-NEXT:   cir.store{{.*}} %4, %3
+// CHECK-NEXT:   %5 = cir.get_member %2[1] {name = "size"} : !cir.ptr<!rec_String> -> !cir.ptr<!s64i>
+// CHECK-NEXT:   %6 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %7 = cir.cast integral %6 : !s32i -> !s64i
+// CHECK-NEXT:   cir.store{{.*}} %7, %5 : !s64i, !cir.ptr<!s64i>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+//      CHECK: cir.func {{.*}} @_ZN6StringC2EPKc
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!rec_String>, !cir.ptr<!cir.ptr<!rec_String>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+// CHECK-NEXT:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_String>, !cir.ptr<!cir.ptr<!rec_String>>
+// CHECK-NEXT:   cir.store{{.*}} %arg1, %1 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %2 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_String>>, !cir.ptr<!rec_String>
+// CHECK-NEXT:   %3 = cir.get_member %2[0] {name = "storage"} : !cir.ptr<!rec_String> -> !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %4 = cir.const #cir.ptr<null> : !cir.ptr<!s8i>
+// CHECK-NEXT:   cir.store{{.*}} %4, %3 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   cir.return
+
+//      CHECK: cir.func {{.*}} @_ZN6StringC1EPKc
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!rec_String>, !cir.ptr<!cir.ptr<!rec_String>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+// CHECK-NEXT:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_String>, !cir.ptr<!cir.ptr<!rec_String>>
+// CHECK-NEXT:   cir.store{{.*}} %arg1, %1 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %2 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_String>>, !cir.ptr<!rec_String>
+// CHECK-NEXT:   %3 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK-NEXT:   cir.call @_ZN6StringC2EPKc(%2, %3) : (!cir.ptr<!rec_String>, !cir.ptr<!s8i>) -> ()
+// CHECK-NEXT:   cir.return
+
+// CHECK: cir.func {{.*}} @_Z4testv()
+// CHECK:   cir.call @_ZN6StringC1Ev(%0) : (!cir.ptr<!rec_String>) -> ()
+// CHECK:   cir.call @_ZN6StringC1Ei(%1, %3) : (!cir.ptr<!rec_String>, !s32i) -> ()
+// CHECK:   cir.call @_ZN6StringC1EPKc(%2, %5) : (!cir.ptr<!rec_String>, !cir.ptr<!s8i>) -> ()
diff --git a/clang/test/CIR/Incubator/CodeGen/StringExample.cpp b/clang/test/CIR/Incubator/CodeGen/StringExample.cpp
new file mode 100644
index 0000000000000..a2c0ef374f1ca
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/StringExample.cpp
@@ -0,0 +1,34 @@
+// RUN: true
+
+int strlen(char const *);
+void puts(char const *);
+
+struct String {
+  long size;
+  long capacity;
+  char *storage;
+
+  String() : size{0}, capacity{0}, storage{nullptr} {}
+  String(char const *s) : size{strlen(s)}, capacity{size},
+                          storage{new char[capacity]} {}
+};
+
+struct StringView {
+  long size;
+  char *storage;
+
+  StringView(const String &s) : size{s.size}, storage{s.storage} {}
+  StringView() : size{0}, storage{nullptr} {}
+};
+
+int main() {
+  StringView sv;
+  {
+    String s = "Hi";
+    sv = s;
+
+    puts(sv.storage);
+  }
+
+  puts(sv.storage);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx-builtins.c
new file mode 100644
index 0000000000000..48202c7185df1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx-builtins.c
@@ -0,0 +1,232 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+// This test mimics clang/test/CodeGen/X86/avx-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+__m256 test_mm256_undefined_ps(void) {
+  // CIR-X64-LABEL: _mm256_undefined_ps
+  // CIR-X64: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 4>
+  // CIR-X64: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 4> -> !cir.vector<!cir.float x 8>
+  // CIR-X64: cir.return %{{.*}} : !cir.vector<!cir.float x 8>
+
+  // LLVM-X64-LABEL: test_mm256_undefined_ps
+  // LLVM-X64: store <8 x float> zeroinitializer, ptr %[[A:.*]], align 32
+  // LLVM-X64: %{{.*}} = load <8 x float>, ptr %[[A]], align 32
+  // LLVM-X64: ret <8 x float> %{{.*}}
+
+  return _mm256_undefined_ps();
+}
+
+__m256d test_mm256_undefined_pd(void) {
+  // CIR-X64-LABEL: _mm256_undefined_pd
+  // CIR-X64: %{{.*}} = cir.const #cir.zero : !cir.vector<!cir.double x 4>
+  // CIR-X64: cir.return %{{.*}} : !cir.vector<!cir.double x 4>
+
+  // LLVM-X64-LABEL: test_mm256_undefined_pd
+  // LLVM-X64: store <4 x double> zeroinitializer, ptr %[[A:.*]], align 32
+  // LLVM-X64: %{{.*}} = load <4 x double>, ptr %[[A]], align 32
+  // LLVM-X64: ret <4 x double> %{{.*}}
+
+  return _mm256_undefined_pd();
+}
+
+__m256i test_mm256_undefined_si256(void) {
+  // CIR-X64-LABEL: _mm256_undefined_si256
+  // CIR-X64: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 4>
+  // CIR-X64: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 4> -> !cir.vector<!s64i x 4>
+  // CIR-X64: cir.return %{{.*}} : !cir.vector<!s64i x 4>
+  
+  // LLVM-X64-LABEL: test_mm256_undefined_si256
+  // LLVM-X64: store <4 x i64> zeroinitializer, ptr %[[A:.*]], align 32
+  // LLVM-X64: %{{.*}} = load <4 x i64>, ptr %[[A]], align 32
+  // LLVM-X64: ret <4 x i64> %{{.*}}
+  return _mm256_undefined_si256();
+}
+
+int test_mm256_extract_epi8(__m256i A) {
+  // CIR-CHECK-LABEL: test_mm256_extract_epi8
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s8i x 32>
+  // CIR-CHECK %{{.*}} = cir.cast integral %{{.*}} : !u8i -> !s32i
+
+  // LLVM-CHECK-LABEL: test_mm256_extract_epi8
+  // LLVM-CHECK: extractelement <32 x i8> %{{.*}}, {{i32|i64}} 31
+  // LLVM-CHECK: zext i8 %{{.*}} to i32
+  return _mm256_extract_epi8(A, 31);
+}
+
+int test_mm256_extract_epi16(__m256i A) {
+  // CIR-CHECK-LABEL: test_mm256_extract_epi16
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s16i x 16>
+  // CIR-CHECK %{{.*}} = cir.cast integral %{{.*}} : !u16i -> !s32i
+
+  // LLVM-CHECK-LABEL: test_mm256_extract_epi16
+  // LLVM-CHECK: extractelement <16 x i16> %{{.*}}, {{i32|i64}} 15
+  // LLVM-CHECK: zext i16 %{{.*}} to i32
+  return _mm256_extract_epi16(A, 15);
+}
+
+int test_mm256_extract_epi32(__m256i A) {
+  // CIR-CHECK-LABEL: test_mm256_extract_epi32
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s32i x 8>
+
+  // LLVM-CHECK-LABEL: test_mm256_extract_epi32
+  // LLVM-CHECK: extractelement <8 x i32> %{{.*}}, {{i32|i64}} 7
+  return _mm256_extract_epi32(A, 7);
+}
+
+#if __x86_64__
+long long test_mm256_extract_epi64(__m256i A) {
+  // CIR-X64-LABEL: test_mm256_extract_epi64
+  // LLVM-X64-LABEL: test_mm256_extract_epi64
+  return _mm256_extract_epi64(A, 3);
+}
+#endif
+
+__m256i test_mm256_insert_epi8(__m256i x, char b) {
+
+  // CIR-CHECK-LABEL: test_mm256_insert_epi8
+  // CIR-CHECK-LABEL: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[{{%.*}} : {{!u32i|!u64i}}] : !cir.vector<{{!s8i|!u8i}} x 32>
+
+  // LLVM-CHECK-LABEL: test_mm256_insert_epi8
+  // LLVM-CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 14
+  return _mm256_insert_epi8(x, b, 14);
+}
+
+__m256i test_mm256_insert_epi16(__m256i x, int b) {
+
+  // CIR-CHECK-LABEL: test_mm256_insert_epi16
+  // CIR-CHECK: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[{{%.*}} : {{!u32i|!u64i}}] : !cir.vector<!s16i x 16>
+
+  // LLVM-CHECK-LABEL: test_mm256_insert_epi16
+  // LLVM-CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, {{i32|i64}} 4
+  return _mm256_insert_epi16(x, b, 4);
+}
+
+__m256i test_mm256_insert_epi32(__m256i x, int b) {
+
+  // CIR-CHECK-LABEL: test_mm256_insert_epi32
+  // CIR-CHECK: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[{{%.*}} : {{!u32i|!u64i}}] : !cir.vector<!s32i x 8>
+
+  // LLVM-CHECK-LABEL: test_mm256_insert_epi32
+  // LLVM-CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 5
+  return _mm256_insert_epi32(x, b, 5);
+}
+
+#ifdef __x86_64__
+__m256i test_mm256_insert_epi64(__m256i x, long long b) {
+
+  // CIR-X64-LABEL: test_mm256_insert_epi64
+  // CIR-X64: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[{{%.*}} : {{!u32i|!u64i}}] : !cir.vector<!s64i x 4>
+
+  // LLVM-X64-LABEL: test_mm256_insert_epi64
+  // LLVM-X64: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 2
+  return _mm256_insert_epi64(x, b, 2);
+}
+#endif
+
+__m256d test_mm256_blend_pd(__m256d A, __m256d B) {
+  // CIR-LABEL: test_mm256_blend_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 4>) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: test_mm256_blend_pd
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+
+  // OGCG-LABEL: test_mm256_blend_pd
+  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  return _mm256_blend_pd(A, B, 0x05);
+}
+
+__m256 test_mm256_blend_ps(__m256 A, __m256 B) {
+  // CIR-LABEL: test_mm256_blend_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, #cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: test_mm256_blend_ps
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
+
+  // OGCG-LABEL: test_mm256_blend_ps
+  // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
+  return _mm256_blend_ps(A, B, 0x35);
+}
+
+__m256d test_mm256_insertf128_pd(__m256d A, __m128d B) {
+  // CIR-LABEL: test_mm256_insertf128_pd
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: test_mm256_insertf128_pd
+  // LLVM: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  return _mm256_insertf128_pd(A, B, 0);
+}
+
+__m256 test_mm256_insertf128_ps(__m256 A, __m128 B) {
+  // CIR-LABEL: test_mm256_insertf128_ps
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.float x 8>
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: test_mm256_insertf128_ps
+  // LLVM: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm256_insertf128_ps(A, B, 1);
+}
+
+__m256i test_mm256_insertf128_si256(__m256i A, __m128i B) {
+  // CIR-LABEL: test_mm256_insertf128_si256
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 8>
+  // %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 8>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i]
+
+  // LLVM-LABEL: test_mm256_insertf128_si256
+  // LLVM: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // LLVM: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  return _mm256_insertf128_si256(A, B, 0);
+}
+
+__m256d test_mm256_shuffle_pd(__m256d A, __m256d B) {
+  // CIR-LABEL: test_mm256_shuffle_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 4>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<!cir.double x 4>
+
+  // CHECK-LABEL: test_mm256_shuffle_pd
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+
+  // OGCG-LABEL: test_mm256_shuffle_pd
+  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  return _mm256_shuffle_pd(A, B, 0);
+}
+
+__m256 test_mm256_shuffle_ps(__m256 A, __m256 B) {
+  // CIR-LABEL: test_mm256_shuffle_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<8> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<12> : !s32i] : !cir.vector<!cir.float x 8>
+
+  // CHECK-LABEL: test_mm256_shuffle_ps
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
+
+  // OGCG-LABEL: test_mm256_shuffle_ps
+  // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
+  return _mm256_shuffle_ps(A, B, 0);
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx-shuffle-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx-shuffle-builtins.c
new file mode 100644
index 0000000000000..0848a8bce7ff8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx-shuffle-builtins.c
@@ -0,0 +1,95 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-cir -o %t.cir | opt -S -passes=mem2reg
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -disable-O0-optnone -fclangir -emit-llvm -o %t.ll | opt -S -passes=mem2reg
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+// CIR-LABEL: @test_mm256_insertf128_pd_0(
+// CIR: [[A:%.*]] = cir.load align(32) %0 : !cir.ptr<!cir.vector<!cir.double x 4>>, !cir.vector<!cir.double x 4>
+// CIR: [[B:%.*]] = cir.load align(16) %1 : !cir.ptr<!cir.vector<!cir.double x 2>>, !cir.vector<!cir.double x 2>
+// CIR: %{{.*}} = cir.vec.shuffle([[B]], %{{.*}} : !cir.vector<!cir.double x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle([[A]], %{{.*}} : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.double x 4>
+// CIR: cir.return %{{.*}} : !cir.vector<!cir.double x 4>
+
+
+// LLVM-LABEL: @test_mm256_insertf128_pd_0
+// LLVM:    [[A:%.*]] = load <4 x double>, ptr %{{.*}}, align 32
+// LLVM:    [[B:%.*]] = load <2 x double>, ptr %{{.*}}, align 16
+// LLVM-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+// LLVM:    ret <4 x double>
+__m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
+  return _mm256_insertf128_pd(a, b, 0);
+}
+
+// CIR-LABEL: @test_mm256_insertf128_ps_0(
+// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.float x 8>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.float x 8>
+// CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 8>
+
+// LLVM-LABEL: @test_mm256_insertf128_ps_0(
+// LLVM:    %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// LLVM-NEXT:    %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+// LLVM:    ret <8 x float> %{{.*}}
+//
+__m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
+  return _mm256_insertf128_ps(a, b, 0);
+}
+
+// CIR-LABEL: @test_mm256_insertf128_ps_1(
+// CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.float x 8>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!cir.float x 8>
+// CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 8>
+
+// LLVM-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_1(
+// LLVM:    %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// LLVM-NEXT:    %{{.*}} = shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+// LLVM:    ret <8 x float> %{{.*}}
+//
+__m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) {
+  return _mm256_insertf128_ps(a, b, 1);
+}
+
+// CIR-LABEL: @test_mm256_insertf128_si256_0(
+// CIR: [[TMP0:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<!s64i x 4> -> !cir.vector<!s32i x 8>
+// CIR: [[TMP1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<!s64i x 2> -> !cir.vector<!s32i x 4>
+// CIR: %{{.*}} = cir.vec.shuffle([[TMP1]], %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s32i x 8>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle([[TMP0]], %{{.*}} : !cir.vector<!s32i x 8>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s32i x 8>
+// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s32i x 8> -> !cir.vector<!s64i x 4>
+// CIR: cir.return %{{.*}} : !cir.vector<!s64i x 4>
+
+// LLVM-LABEL: @test_mm256_insertf128_si256_0
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+// LLVM:    [[TMP1:%.*]] = bitcast <2 x i64> %{{.*}} to <4 x i32>
+// LLVM:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+// LLVM:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+// LLVM:    ret <4 x i64> %{{.*}}
+//
+__m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
+  return _mm256_insertf128_si256(a, b, 0);
+}
+
+// CIR-LABEL: @test_mm256_insertf128_si256_1(
+// CIR: [[TMP0:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<!s64i x 4> -> !cir.vector<!s32i x 8>
+// CIR: [[TMP1:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<!s64i x 2> -> !cir.vector<!s32i x 4>
+// CIR: %{{.*}} = cir.vec.shuffle([[TMP1]], %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s32i x 8>
+// CIR-NEXT: %{{.*}} = cir.vec.shuffle([[TMP0]], %{{.*}} : !cir.vector<!s32i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!s32i x 8>
+// CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s32i x 8> -> !cir.vector<!s64i x 4>
+// CIR: cir.return %{{.*}} : !cir.vector<!s64i x 4>
+
+// LLVM-LABEL: @test_mm256_insertf128_si256_1
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+// LLVM:    [[TMP1:%.*]] = bitcast <2 x i64> %{{.*}} to <4 x i32>
+// LLVM:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// LLVM-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+// LLVM:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+// LLVM:    ret <4 x i64> %{{.*}}
+//
+__m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
+  return _mm256_insertf128_si256(a, b, 1);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx10_2_512bf16-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx10_2_512bf16-builtins.c
new file mode 100644
index 0000000000000..87cdcd9b6b7c7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx10_2_512bf16-builtins.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2-512 -fclangir -emit-cir -o %t.cir -Wno-invalid-feature-combination -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2-512 -fclangir -emit-llvm -o %t.ll -Wno-invalid-feature-combination -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m512bh test_mm512_undefined_pbh(void) {
+
+  // CIR-LABEL: _mm512_undefined_pbh
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 8> -> !cir.vector<!cir.bf16 x 32>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.bf16 x 32>
+
+  // LLVM-LABEL: test_mm512_undefined_pbh
+  // LLVM: store <32 x bfloat> zeroinitializer, ptr %[[A:.*]], align 64
+  // LLVM: %{{.*}} = load <32 x bfloat>, ptr %[[A]], align 64
+  // LLVM: ret <32 x bfloat> %{{.*}}
+  return _mm512_undefined_pbh();
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx10_2bf16-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx10_2bf16-builtins.c
new file mode 100644
index 0000000000000..3ea389245b024
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx10_2bf16-builtins.c
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2 -target-feature +avx10.2-256 -fclangir -emit-cir -o %t.cir -Wno-invalid-feature-combination -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2 -target-feature +avx10.2-256 -fclangir -emit-llvm -o %t.ll -Wno-invalid-feature-combination -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m128bh test_mm_undefined_pbh(void) {
+  // CIR-LABEL: _mm_undefined_pbh
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 2>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 2> -> !cir.vector<!cir.bf16 x 8>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.bf16 x 8>
+
+  // LLVM-LABEL: @test_mm_undefined_pbh
+  // LLVM: store <8 x bfloat> zeroinitializer, ptr %[[A:.*]], align 16
+  // LLVM: %{{.*}} = load <8 x bfloat>, ptr %[[A]], align 16
+  // LLVM: ret <8 x bfloat> %{{.*}}
+  return _mm_undefined_pbh();
+}
+
+__m256bh test_mm256_undefined_pbh(void) {
+  // CIR-LABEL: _mm256_undefined_pbh
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 4>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 4> -> !cir.vector<!cir.bf16 x 16>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.bf16 x 16>
+
+  // LLVM-LABEL: @test_mm256_undefined_pbh
+  // LLVM: store <16 x bfloat> zeroinitializer, ptr %[[A:.*]], align 32
+  // LLVM: %{{.*}} = load <16 x bfloat>, ptr %[[A]], align 32
+  // LLVM: ret <16 x bfloat> %{{.*}}
+  return _mm256_undefined_pbh();
+}
+
+void test_mm_mask_store_sbh(void *__P, __mmask8 __U, __m128bh __A) {
+  // CIR-LABEL: _mm_mask_store_sbh
+  // CIR: cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.bf16 x 8>, !cir.ptr<!cir.vector<!cir.bf16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_store_sbh
+  // LLVM: call void @llvm.masked.store.v8bf16.p0(<8 x bfloat> %{{.*}}, ptr elementtype(<8 x bfloat>) align 1 %{{.*}}, <8 x i1> %{{.*}})
+  _mm_mask_store_sbh(__P, __U, __A);
+}
+
+__m128bh test_mm_load_sbh(void const *A) {
+  // CIR-LABEL: _mm_load_sbh
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.bf16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.bf16 x 8>) -> !cir.vector<!cir.bf16 x 8> 
+
+  // LLVM-LABEL: @test_mm_load_sbh
+  // NOTE: OG represents the mask using a bitcast from splat (i8 1), see IR-differences #1767
+  // LLVM: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr elementtype(<8 x bfloat>) align 1 %{{.*}}, <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x bfloat> %{{.*}})
+  return _mm_load_sbh(A);
+}
+
+__m128bh test_mm_mask_load_sbh(__m128bh __A, __mmask8 __U, const void *__W) {
+  // CIR-LABEL: _mm_mask_load_sbh
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.bf16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.bf16 x 8>) -> !cir.vector<!cir.bf16 x 8>
+
+  // LLVM-LABEL: @test_mm_mask_load_sbh
+  // LLVM: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr elementtype(<8 x bfloat>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x bfloat> %{{.*}})
+  return _mm_mask_load_sbh(__A, __U, __W);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx2-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx2-builtins.c
new file mode 100644
index 0000000000000..ebb83016ad0fa
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx2-builtins.c
@@ -0,0 +1,145 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+// This test mimics clang/test/CodeGen/X86/avx2-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+// FIXME: We should also lower the __builtin_ia32_pblendw128 (and similar)
+// functions to this IR. In the future we could delete the corresponding
+// intrinsic in LLVM if it's not being used anymore.
+__m256i test_mm256_blend_epi16(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_blend_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 16>) [#cir.int<0> : !s32i, #cir.int<17> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<25> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s16i x 16>
+
+  // LLVM-LABEL: test_mm256_blend_epi16
+  // LLVM-NOT: @llvm.x86.avx2.pblendw
+  // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  // OGCG-LABEL: test_mm256_blend_epi16
+  // OGCG-NOT: @llvm.x86.avx2.pblendw
+  // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  return _mm256_blend_epi16(a, b, 2);
+}
+
+__m128i test_mm_blend_epi32(__m128i a, __m128i b) {
+  // CIR-LABEL: _mm_blend_epi32
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: test_mm_blend_epi32
+  // LLVM-NOT: @llvm.x86.avx2.pblendd.128
+  // LLVM: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+
+  // OGCG-LABEL: test_mm_blend_epi32
+  // OGCG-NOT: @llvm.x86.avx2.pblendd.128
+  // OGCG: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  return _mm_blend_epi32(a, b, 0x05);
+}
+
+__m256i test_mm256_blend_epi32(__m256i a, __m256i b) {
+  // CIR-LABEL: _mm256_blend_epi32
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 8>) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, #cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s32i x 8>
+
+  // LLVM-LABEL: test_mm256_blend_epi32
+  // LLVM-NOT: @llvm.x86.avx2.pblendd.256
+  // LLVM: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
+
+  // OGCG-LABEL: test_mm256_blend_epi32
+  // OGCG-NOT: @llvm.x86.avx2.pblendd.256
+  // OGCG: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
+  return _mm256_blend_epi32(a, b, 0x35);
+}
+
+__m256i test0_mm256_inserti128_si256(__m256i a, __m128i b) {
+
+  // CIR-LABEL: test0_mm256_inserti128_si256
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: test0_mm256_inserti128_si256
+  // LLVM: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  return _mm256_inserti128_si256(a, b, 0);
+}
+
+__m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) {
+  // CIR-LABEL: test1_mm256_inserti128_si256
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: test1_mm256_inserti128_si256
+  // LLVM: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  return _mm256_inserti128_si256(a, b, 1);
+}
+
+// Immediate should be truncated to one bit.
+__m256i test2_mm256_inserti128_si256(__m256i a, __m128i b) {
+  // CIR-LABEL: test2_mm256_inserti128_si256
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: test2_mm256_inserti128_si256
+  // LLVM: shufflevector <2 x i64> %{{.*}}, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  return _mm256_inserti128_si256(a, b, 0);
+}
+
+__m256i test_mm256_shufflelo_epi16(__m256i a) {
+  // CIR-LABEL: _mm256_shufflelo_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 16>) [#cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s16i x 16> 
+
+  // LLVM-LABEL: test_mm256_shufflelo_epi16
+  // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
+
+  // OGCG-LABEL: test_mm256_shufflelo_epi16
+  // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
+  return _mm256_shufflelo_epi16(a, 83);
+}
+
+__m256i test_mm256_shufflehi_epi16(__m256i a) {
+  // CIR-LABEL: _mm256_shufflehi_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 16>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i, #cir.int<6> : !s32i, #cir.int<5> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<15> : !s32i, #cir.int<14> : !s32i, #cir.int<14> : !s32i, #cir.int<13> : !s32i] : !cir.vector<!s16i x 16>
+
+  // LLVM-LABEL: test_mm256_shufflehi_epi16
+  // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
+
+  // OGCG-LABEL: test_mm256_shufflehi_epi16
+  // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
+  return _mm256_shufflehi_epi16(a, 107);
+}
+
+__m256i test_mm256_alignr_epi8(__m256i a, __m256i b) {
+  // CIR-LABEL: test_mm256_alignr_epi8
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<{{!s8i|!u8i}} x 32>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<32> : !s32i, #cir.int<33> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i, #cir.int<28> : !s32i, #cir.int<29> : !s32i, #cir.int<30> : !s32i, #cir.int<31> : !s32i, #cir.int<48> : !s32i, #cir.int<49> : !s32i] : !cir.vector<{{!s8i|!u8i}} x 32>
+
+  // LLVM-LABEL: test_mm256_alignr_epi8
+  // LLVM: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49>
+
+  // OGCG-LABEL: test_mm256_alignr_epi8
+  // OGCG: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49>
+  return _mm256_alignr_epi8(a, b, 2);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx512bw-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx512bw-builtins.c
new file mode 100644
index 0000000000000..47b0c7ecabeb5
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx512bw-builtins.c
@@ -0,0 +1,145 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw  -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char  -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux  -target-feature +avx512bw -fno-signed-char  -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM-UNSIGNED-CHAR --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+void test_mm512_mask_storeu_epi16(void *__P, __mmask32 __U, __m512i __A) {
+  // CIR-LABEL: _mm512_mask_storeu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s16i x 32>, !cir.ptr<!cir.vector<!s16i x 32>>, !u32i, !cir.vector<!cir.int<s, 1> x 32>) -> !void
+
+  // LLVM-LABEL: @test_mm512_mask_storeu_epi16
+  // LLVM: call void @llvm.masked.store.v32i16.p0(<32 x i16> %{{.*}}, ptr elementtype(<32 x i16>) align 1 %{{.*}}, <32 x i1> %{{.*}})
+  return _mm512_mask_storeu_epi16(__P, __U, __A);
+}
+
+void test_mm512_mask_storeu_epi8(void *__P, __mmask64 __U, __m512i __A) {
+  // CIR-LABEL: _mm512_mask_storeu_epi8
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<{{!s8i|!u8i}} x 64>, !cir.ptr<!cir.vector<{{!s8i|!u8i}} x 64>>, !u32i, !cir.vector<!cir.int<s, 1> x 64>) -> !void
+
+  // LLVM-LABEL: @test_mm512_mask_storeu_epi8
+  // LLVM: call void @llvm.masked.store.v64i8.p0(<64 x i8> %{{.*}}, ptr elementtype(<64 x i8>) align 1 %{{.*}}, <64 x i1> %{{.*}})
+  return _mm512_mask_storeu_epi8(__P, __U, __A); 
+}
+
+__m512i test_mm512_movm_epi16(__mmask32 __A) {
+  // CIR-LABEL: _mm512_movm_epi16
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u32i -> !cir.vector<!cir.int<s, 1> x 32>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 32> -> !cir.vector<!s16i x 32>
+  // LLVM-LABEL: @test_mm512_movm_epi16
+  // LLVM:  %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM:  %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i16>
+  return _mm512_movm_epi16(__A); 
+}
+
+__m512i test_mm512_mask_loadu_epi8(__m512i __W, __mmask64 __U, void const *__P) {
+  // CIR-LABEL: _mm512_mask_loadu_epi8
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<{{!s8i|!u8i}} x 64>>, !u32i, !cir.vector<!cir.int<s, 1> x 64>, !cir.vector<{{!s8i|!u8i}} x 64>) -> !cir.vector<{{!s8i|!u8i}} x 64>
+
+  // LLVM-LABEL: @test_mm512_mask_loadu_epi8
+  // LLVM: @llvm.masked.load.v64i8.p0(ptr elementtype(<64 x i8>) align 1 %{{.*}}, <64 x i1> %{{.*}}, <64 x i8> %{{.*}})
+  return _mm512_mask_loadu_epi8(__W, __U, __P); 
+}
+
+__m512i test_mm512_mask_loadu_epi16(__m512i __W, __mmask32 __U, void const *__P) {
+  // CIR-LABEL: _mm512_mask_loadu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 32>>, !u32i, !cir.vector<!cir.int<s, 1> x 32>, !cir.vector<!s16i x 32>) -> !cir.vector<!s16i x 32>
+
+  // LLVM-LABEL: @test_mm512_mask_loadu_epi16
+  // LLVM: @llvm.masked.load.v32i16.p0(ptr elementtype(<32 x i16>) align 1 %{{.*}}, <32 x i1> %{{.*}}, <32 x i16> %{{.*}})
+  return _mm512_mask_loadu_epi16(__W, __U, __P); 
+}
+
+__m512i test_mm512_maskz_loadu_epi16(__mmask32 __U, void const *__P) {
+  // CIR-LABEL: _mm512_maskz_loadu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 32>>, !u32i, !cir.vector<!cir.int<s, 1> x 32>, !cir.vector<!s16i x 32>) -> !cir.vector<!s16i x 32>
+
+  // LLVM-LABEL: @test_mm512_maskz_loadu_epi16
+  // LLVM: @llvm.masked.load.v32i16.p0(ptr elementtype(<32 x i16>) align 1 %{{.*}}, <32 x i1> %{{.*}}, <32 x i16> %{{.*}})
+  return _mm512_maskz_loadu_epi16(__U, __P); 
+}
+
+__m512i test_mm512_maskz_loadu_epi8(__mmask64 __U, void const *__P) {
+  // CIR-LABEL: _mm512_maskz_loadu_epi8
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<{{!s8i|!u8i}} x 64>>, !u32i, !cir.vector<!cir.int<s, 1> x 64>, !cir.vector<{{!s8i|!u8i}} x 64>) -> !cir.vector<{{!s8i|!u8i}} x 64>
+
+  // LLVM-LABEL: @test_mm512_maskz_loadu_epi8
+  // LLVM: @llvm.masked.load.v64i8.p0(ptr elementtype(<64 x i8>) align 1 %{{.*}}, <64 x i1> %{{.*}}, <64 x i8> %{{.*}})
+  return _mm512_maskz_loadu_epi8(__U, __P); 
+}
+
+__mmask64 test_mm512_movepi8_mask(__m512i __A) {
+  // CIR-LABEL: @_mm512_movepi8_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<{{!s8i|!u8i}} x 64>, !cir.vector<!cir.int<u, 1> x 64>
+
+  // LLVM-LABEL: @test_mm512_movepi8_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer
+
+  // In the unsigned case below, the canonicalizer proves the comparison is
+  // always false (no i8 unsigned value can be < 0) and folds it away.
+  // LLVM-UNSIGNED-CHAR: store i64 0, ptr %{{.*}}, align 8
+  
+  // OGCG-LABEL: @test_mm512_movepi8_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer
+  return _mm512_movepi8_mask(__A); 
+}
+
+__mmask32 test_mm512_movepi16_mask(__m512i __A) {
+  // CIR-LABEL: @_mm512_movepi16_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s16i x 32>, !cir.vector<!cir.int<u, 1> x 32>
+
+  // LLVM-LABEL: @test_mm512_movepi16_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm512_movepi16_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
+  return _mm512_movepi16_mask(__A); 
+}
+
+__m512i test_mm512_shufflelo_epi16(__m512i __A) {
+  // CIR-LABEL: _mm512_shufflelo_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 32>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<8> : !s32i, #cir.int<8> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<17> : !s32i, #cir.int<17> : !s32i, #cir.int<16> : !s32i, #cir.int<16> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, #cir.int<25> : !s32i, #cir.int<25> : !s32i, #cir.int<24> : !s32i, #cir.int<24> : !s32i, #cir.int<28> : !s32i, #cir.int<29> : !s32i, #cir.int<30> : !s32i, #cir.int<31> : !s32i] : !cir.vector<!s16i x 32>
+
+  // LLVM-LABEL: @test_mm512_shufflelo_epi16
+  // LLVM: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
+
+  // OGCG-LABEL: @test_mm512_shufflelo_epi16
+  // OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
+  return _mm512_shufflelo_epi16(__A, 5); 
+}
+
+__m512i test_mm512_shufflehi_epi16(__m512i __A) {
+  // CIR-LABEL: _mm512_shufflehi_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 32>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<13> : !s32i, #cir.int<13> : !s32i, #cir.int<12> : !s32i, #cir.int<12> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<21> : !s32i, #cir.int<21> : !s32i, #cir.int<20> : !s32i, #cir.int<20> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i, #cir.int<29> : !s32i, #cir.int<29> : !s32i, #cir.int<28> : !s32i, #cir.int<28> : !s32i] : !cir.vector<!s16i x 32>
+
+  // LLVM-LABEL: @test_mm512_shufflehi_epi16
+  // LLVM: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
+
+  // OGCG-LABEL: @test_mm512_shufflehi_epi16
+  // OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
+  return _mm512_shufflehi_epi16(__A, 5); 
+}
+
+__m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B){
+  // CIR-LABEL: _mm512_alignr_epi8
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<{{!s8i|!u8i}} x 64>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<64> : !s32i, #cir.int<65> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i, #cir.int<28> : !s32i, #cir.int<29> : !s32i, #cir.int<30> : !s32i, #cir.int<31> : !s32i, #cir.int<80> : !s32i, #cir.int<81> : !s32i, #cir.int<34> : !s32i, #cir.int<35> : !s32i, #cir.int<36> : !s32i, #cir.int<37> : !s32i, #cir.int<38> : !s32i, #cir.int<39> : !s32i, #cir.int<40> : !s32i, #cir.int<41> : !s32i, #cir.int<42> : !s32i, #cir.int<43> : !s32i, #cir.int<44> : !s32i, #cir.int<45> : !s32i, #cir.int<46> : !s32i, #cir.int<47> : !s32i, #cir.int<96> : !s32i, #cir.int<97> : !s32i, #cir.int<50> : !s32i, #cir.int<51> : !s32i, #cir.int<52> : !s32i, #cir.int<53> : !s32i, #cir.int<54> : !s32i, #cir.int<55> : !s32i, #cir.int<56> : !s32i, #cir.int<57> : !s32i, #cir.int<58> : !s32i, #cir.int<59> : !s32i, #cir.int<60> : !s32i, #cir.int<61> : !s32i, #cir.int<62> : !s32i, #cir.int<63> : !s32i, #cir.int<112> : !s32i, #cir.int<113> : !s32i] : !cir.vector<{{!s8i|!u8i}} x 64>
+    
+  // LLVM-LABEL: @test_mm512_alignr_epi8
+  // LLVM: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113>
+
+  // OGCG-LABEL: @test_mm512_alignr_epi8
+  // OGCG: shufflevector <64 x i8> %{{.*}}, <64 x i8> %{{.*}}, <64 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113>
+  return _mm512_alignr_epi8(__A, __B, 2); 
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx512dq-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx512dq-builtins.c
new file mode 100644
index 0000000000000..db12a3cef2911
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx512dq-builtins.c
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-cir -o %t.cir -Wall -Werror 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefix=OGCG
+
+#include <immintrin.h>
+
+__m512i test_mm512_movm_epi64(__mmask8 __A) {
+  // CIR-LABEL: _mm512_movm_epi64
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 8> -> !cir.vector<!s64i x 8>
+  // LLVM-LABEL: @test_mm512_movm_epi64
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i64>
+  return _mm512_movm_epi64(__A); 
+}
+
+__m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) {
+  // CIR-LABEL: test_mm512_insertf32x8
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 16>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i] : !cir.vector<!cir.float x 16>
+
+  // LLVM-LABEL: @test_mm512_insertf32x8
+  // LLVM: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  return _mm512_insertf32x8(__A, __B, 1); 
+}
+
+__m512i test_mm512_inserti32x8(__m512i __A, __m256i __B) {
+  // CIR-LABEL: test_mm512_inserti32x8
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 16>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i] : !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: @test_mm512_inserti32x8
+  // LLVM: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  return _mm512_inserti32x8(__A, __B, 1); 
+}
+
+__m512d test_mm512_insertf64x2(__m512d __A, __m128d __B) {
+  // CIR-LABEL: test_mm512_insertf64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i] : !cir.vector<!cir.double x 8>
+
+  // LLVM-LABEL: @test_mm512_insertf64x2
+  // LLVM: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  return _mm512_insertf64x2(__A, __B, 3); 
+}
+
+__m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) {
+  // CIR-LABEL: test_mm512_inserti64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: @test_mm512_inserti64x2
+  // LLVM: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+  return _mm512_inserti64x2(__A, __B, 1); 
+}
+
+__mmask16 test_mm512_movepi32_mask(__m512i __A) {
+  // CIR-LABEL: _mm512_movepi32_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s32i x 16>, !cir.vector<!cir.int<u, 1> x 16>
+
+  // LLVM-LABEL: @test_mm512_movepi32_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm512_movepi32_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
+  return _mm512_movepi32_mask(__A); 
+}
+
+__mmask8 test_mm512_movepi64_mask(__m512i __A) {
+  // CIR-LABEL: @_mm512_movepi64_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 8>, !cir.vector<!cir.int<u, 1> x 8>
+
+  // LLVM-LABEL: @test_mm512_movepi64_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm512_movepi64_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer
+  return _mm512_movepi64_mask(__A); 
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx512f-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx512f-builtins.c
new file mode 100644
index 0000000000000..150f1acc85690
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx512f-builtins.c
@@ -0,0 +1,702 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+__m512 test_mm512_undefined(void) {
+  // CIR-LABEL: _mm512_undefined
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 8> -> !cir.vector<!cir.float x 16>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 16>
+
+  // LLVM-LABEL: test_mm512_undefined
+  // LLVM: store <16 x float> zeroinitializer, ptr %[[A:.*]], align 64
+  // LLVM: %{{.*}} = load <16 x float>, ptr %[[A]], align 64
+  // LLVM: ret <16 x float> %{{.*}}
+  return _mm512_undefined();
+}
+
+__m512 test_mm512_undefined_ps(void) {
+  // CIR-LABEL: _mm512_undefined_ps
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 8> -> !cir.vector<!cir.float x 16>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 16>
+
+  // LLVM-LABEL: test_mm512_undefined_ps
+  // LLVM: store <16 x float> zeroinitializer, ptr %[[A:.*]], align 64
+  // LLVM: %{{.*}} = load <16 x float>, ptr %[[A]], align 64
+  // LLVM: ret <16 x float> %{{.*}}
+  return _mm512_undefined_ps();
+}
+
+__m512d test_mm512_undefined_pd(void) {
+  // CIR-LABEL: _mm512_undefined_pd
+  // CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!cir.double x 8>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.double x 8>
+
+  // LLVM-LABEL: test_mm512_undefined_pd
+  // LLVM: store <8 x double> zeroinitializer, ptr %[[A:.*]], align 64
+  // LLVM: %{{.*}} = load <8 x double>, ptr %[[A]], align 64
+  // LLVM: ret <8 x double> %{{.*}}
+  return _mm512_undefined_pd();
+}
+
+__m512i test_mm512_undefined_epi32(void) {
+  // CIR-LABEL: _mm512_undefined_epi32
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 8> -> !cir.vector<!s64i x 8>
+  // CIR: cir.return %{{.*}} : !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: test_mm512_undefined_epi32
+  // LLVM: store <8 x i64> zeroinitializer, ptr %[[A:.*]], align 64
+  // LLVM: %{{.*}} = load <8 x i64>, ptr %[[A]], align 64
+  // LLVM: ret <8 x i64> %{{.*}}
+  return _mm512_undefined_epi32();
+}
+
+void test_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) {
+  // CIR-LABEL: _mm512_mask_storeu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 8>, !cir.ptr<!s64i>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_storeu_epi64
+  // LLVM: call void @llvm.masked.store.v8i64.p0(<8 x i64> %{{.*}}, ptr elementtype(<8 x i64>) align 1 %{{.*}}, <8 x i1> %{{.*}})
+  return _mm512_mask_storeu_epi64(__P, __U, __A); 
+}
+
+void test_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) {
+  // CIR-LABEL: _mm512_mask_storeu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 16>, !cir.ptr<!s32i>, !u32i, !cir.vector<!cir.int<s, 1> x 16>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_storeu_epi32
+  // LLVM: call void @llvm.masked.store.v16i32.p0(<16 x i32> %{{.*}}, ptr elementtype(<16 x i32>) align 1 %{{.*}}, <16 x i1> %{{.*}})
+  return _mm512_mask_storeu_epi32(__P, __U, __A); 
+}
+
+void test_mm_mask_store_ss(float * __P, __mmask8 __U, __m128 __A){
+  // CIR-LABEL: _mm_mask_store_ss
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>) -> !void
+
+  // LLVM-LABEL: test_mm_mask_store_ss
+  // LLVM: call void @llvm.masked.store.v4f32.p0(<4 x float> %{{.*}}, ptr elementtype(<4 x float>) align 1 %{{.*}}, <4 x i1> %{{.*}})
+
+  _mm_mask_store_ss(__P, __U, __A);
+}
+
+void test_mm_mask_store_sd(double * __P, __mmask8 __U, __m128d __A){
+  // CIR-LABEL: _mm_mask_store_sd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>) -> !void
+
+  // LLVM-LABEL: test_mm_mask_store_sd
+  // LLVM: call void @llvm.masked.store.v2f64.p0(<2 x double> %{{.*}}, ptr elementtype(<2 x double>) align 1 %{{.*}}, <2 x i1> %{{.*}})
+  _mm_mask_store_sd(__P, __U, __A);
+}
+
+void test_mm512_mask_store_pd(void *p, __m512d a, __mmask8 m){
+  // CIR-LABEL: _mm512_mask_store_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 8>, !cir.ptr<!cir.vector<!cir.double x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_store_pd
+  // LLVM: call void @llvm.masked.store.v8f64.p0(<8 x double> %{{.*}}, ptr elementtype(<8 x double>) align 64 %{{.*}}, <8 x i1> %{{.*}})
+  _mm512_mask_store_pd(p, m, a);
+}
+
+void test_mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A) {
+  // CIR-LABEL: _mm512_mask_store_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 16>, !cir.ptr<!cir.vector<!s32i x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_store_epi32
+  // LLVM: call void @llvm.masked.store.v16i32.p0(<16 x i32> %{{.*}}, ptr elementtype(<16 x i32>) align 64 %{{.*}}, <16 x i1> %{{.*}})
+  return _mm512_mask_store_epi32(__P, __U, __A); 
+}
+
+void test_mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A) {
+  // CIR-LABEL: _mm512_mask_store_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 8>, !cir.ptr<!cir.vector<!s64i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_store_epi64
+  // LLVM: call void @llvm.masked.store.v8i64.p0(<8 x i64> %{{.*}}, ptr elementtype(<8 x i64>) align 64 %{{.*}}, <8 x i1> %{{.*}})
+  return _mm512_mask_store_epi64(__P, __U, __A); 
+}
+
+void test_mm512_mask_store_ps(void *p, __m512 a, __mmask16 m){
+  // CIR-LABEL: _mm512_mask_store_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 16>, !cir.ptr<!cir.vector<!cir.float x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_store_ps
+  // LLVM: call void @llvm.masked.store.v16f32.p0(<16 x float> %{{.*}}, ptr elementtype(<16 x float>) align 64 %{{.*}}, <16 x i1> %{{.*}})
+  _mm512_mask_store_ps(p, m, a);
+}
+
+__m512 test_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void *__P)
+{
+  // CIR-LABEL: _mm512_mask_loadu_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.float>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!cir.float x 16>) -> !cir.vector<!cir.float x 16>
+
+  // LLVM-LABEL: test_mm512_mask_loadu_ps
+  // LLVM: @llvm.masked.load.v16f32.p0(ptr elementtype(<16 x float>) align 1 %{{.*}}, <16 x i1> %{{.*}}, <16 x float> %{{.*}})
+  return _mm512_mask_loadu_ps (__W,__U, __P);
+}
+
+__m512 test_mm512_maskz_load_ps(__mmask16 __U, void *__P)
+{
+
+  // CIR-LABEL: _mm512_maskz_load_ps
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!cir.float x 16>) -> !cir.vector<!cir.float x 16>
+
+  // LLVM-LABEL: test_mm512_maskz_load_ps
+  // LLVM: @llvm.masked.load.v16f32.p0(ptr elementtype(<16 x float>) align 64 %{{.*}}, <16 x i1> %{{.*}}, <16 x float> %{{.*}})
+  return _mm512_maskz_load_ps(__U, __P);
+}
+
+__m512d test_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void *__P)
+{
+  // CIR-LABEL: _mm512_mask_loadu_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.double>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.double x 8>) -> !cir.vector<!cir.double x 8>
+
+  // LLVM-LABEL: test_mm512_mask_loadu_pd
+  // LLVM: @llvm.masked.load.v8f64.p0(ptr elementtype(<8 x double>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x double> %{{.*}})
+  return _mm512_mask_loadu_pd (__W,__U, __P);
+}
+
+__m512d test_mm512_maskz_load_pd(__mmask8 __U, void *__P)
+{
+  // CIR-LABEL: _mm512_maskz_load_pd
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.double x 8>) -> !cir.vector<!cir.double x 8>
+
+  // LLVM-LABEL: test_mm512_maskz_load_pd
+  // LLVM: @llvm.masked.load.v8f64.p0(ptr elementtype(<8 x double>) align 64 %{{.*}}, <8 x i1> %{{.*}}, <8 x double> %{{.*}})
+  return _mm512_maskz_load_pd(__U, __P);
+}
+
+__m512i test_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void *__P)
+{
+  // CIR-LABEL: _mm512_mask_loadu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!s32i>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s32i x 16>) -> !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: test_mm512_mask_loadu_epi32
+  // LLVM: @llvm.masked.load.v16i32.p0(ptr elementtype(<16 x i32>) align 1 %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_mask_loadu_epi32 (__W,__U, __P);
+}
+
+__m512i test_mm512_maskz_loadu_epi32 (__mmask16 __U, void *__P)
+{
+  // CIR-LABEL: _mm512_maskz_loadu_epi32
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!s32i>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s32i x 16>) -> !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: test_mm512_maskz_loadu_epi32
+  // LLVM: @llvm.masked.load.v16i32.p0(ptr elementtype(<16 x i32>) align 1 %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_maskz_loadu_epi32 (__U, __P);
+}
+
+__m512i test_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void *__P)
+{
+  // CIR-LABEL: _mm512_mask_loadu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!s64i>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s64i x 8>) -> !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: test_mm512_mask_loadu_epi64 
+  // LLVM: @llvm.masked.load.v8i64.p0(ptr elementtype(<8 x i64>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}})
+  return _mm512_mask_loadu_epi64 (__W,__U, __P);
+}
+
+__m512i test_mm512_maskz_loadu_epi64 (__mmask16 __U, void *__P)
+{
+  // CIR-LABEL: _mm512_maskz_loadu_epi64
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!s64i>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s64i x 8>) -> !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: test_mm512_maskz_loadu_epi64
+  // LLVM: @llvm.masked.load.v8i64.p0(ptr elementtype(<8 x i64>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}})
+  return _mm512_maskz_loadu_epi64 (__U, __P);
+}
+
+__m128 test_mm_mask_load_ss(__m128 __A, __mmask8 __U, const float* __W)
+{
+  // CIR-LABEL: _mm_mask_load_ss
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+  
+  // LLVM-LABEL: test_mm_mask_load_ss
+  // LLVM: call {{.*}}<4 x float> @llvm.masked.load.v4f32.p0(ptr elementtype(<4 x float>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}})
+  return _mm_mask_load_ss(__A, __U, __W);
+}
+
+__m128 test_mm_maskz_load_ss (__mmask8 __U, const float * __W)
+{
+  // CIR-LABEL: _mm_maskz_load_ss
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: test_mm_maskz_load_ss
+  // LLVM: call {{.*}}<4 x float> @llvm.masked.load.v4f32.p0(ptr elementtype(<4 x float>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}})
+  return _mm_maskz_load_ss (__U, __W);
+}
+
+__m128d test_mm_mask_load_sd (__m128d __A, __mmask8 __U, const double * __W)
+{
+  // CIR-LABEL: _mm_mask_load_sd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: test_mm_mask_load_sd
+  // LLVM: call {{.*}}<2 x double> @llvm.masked.load.v2f64.p0(ptr elementtype(<2 x double>) align 1 %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_mask_load_sd (__A, __U, __W);
+}
+
+__m128d test_mm_maskz_load_sd (__mmask8 __U, const double * __W)
+{
+  // CIR-LABEL: _mm_maskz_load_sd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: test_mm_maskz_load_sd
+  // LLVM: call {{.*}}<2 x double> @llvm.masked.load.v2f64.p0(ptr elementtype(<2 x double>) align 1 %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_maskz_load_sd (__U, __W);
+}
+
+__m512 test_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void *__P)
+{
+  // CIR-LABEL: _mm512_mask_load_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!cir.float x 16>) -> !cir.vector<!cir.float x 16>
+
+  // LLVM-LABEL: test_mm512_mask_load_ps
+  // LLVM: @llvm.masked.load.v16f32.p0(ptr elementtype(<16 x float>) align 64 %{{.*}}, <16 x i1> %{{.*}}, <16 x float> %{{.*}})
+  return _mm512_mask_load_ps (__W,__U, __P);
+}
+
+__m512d test_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void *__P)
+{
+  // CIR-LABEL: _mm512_mask_load_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.double x 8>) -> !cir.vector<!cir.double x 8>
+
+  // LLVM-LABEL: test_mm512_mask_load_pd
+  // LLVM: @llvm.masked.load.v8f64.p0(ptr elementtype(<8 x double>) align 64 %{{.*}}, <8 x i1> %{{.*}}, <8 x double> %{{.*}})
+  return _mm512_mask_load_pd (__W,__U, __P);
+}
+
+__m512i test_mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P) {
+  // CIR-LABEL: _mm512_mask_load_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s32i x 16>) -> !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: test_mm512_mask_load_epi32
+  // LLVM: @llvm.masked.load.v16i32.p0(ptr elementtype(<16 x i32>) align 64 %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_mask_load_epi32(__W, __U, __P); 
+}
+
+__m512i test_mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm512_mask_load_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s64i x 8>) -> !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: test_mm512_mask_load_epi64
+  // LLVM: @llvm.masked.load.v8i64.p0(ptr elementtype(<8 x i64>) align 64 %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}})
+  return _mm512_mask_load_epi64(__W, __U, __P); 
+}
+
+__m512i test_mm512_maskz_load_epi64(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm512_maskz_load_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s64i x 8>) -> !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: test_mm512_maskz_load_epi64
+  // LLVM: @llvm.masked.load.v8i64.p0(ptr elementtype(<8 x i64>) align 64 %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}})
+  return _mm512_maskz_load_epi64(__U, __P); 
+}
+
+__m512i test_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm512_mask_expandloadu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 8>>, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s64i x 8>) -> !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: test_mm512_mask_expandloadu_epi64
+  // LLVM: @llvm.masked.expandload.v8i64(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}})
+  return _mm512_mask_expandloadu_epi64(__W, __U, __P); 
+}
+
+__m512i test_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm512_maskz_expandloadu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 8>>, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s64i x 8>) -> !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: test_mm512_maskz_expandloadu_epi64
+  // LLVM: @llvm.masked.expandload.v8i64(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i64> %{{.*}})
+  return _mm512_maskz_expandloadu_epi64(__U, __P); 
+}
+
+__m512i test_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) {
+  // CIR-LABEL: _mm512_mask_expandloadu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 16>>, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s32i x 16>) -> !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: test_mm512_mask_expandloadu_epi32
+  // LLVM: @llvm.masked.expandload.v16i32(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_mask_expandloadu_epi32(__W, __U, __P); 
+}
+
+__m512i test_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) {
+  // CIR-LABEL: _mm512_maskz_expandloadu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 16>>, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s32i x 16>) -> !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: test_mm512_maskz_expandloadu_epi32
+  // LLVM: @llvm.masked.expandload.v16i32(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_maskz_expandloadu_epi32(__U, __P); 
+}
+
+void test_mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A) {
+  // CIR-LABEL: _mm512_mask_compressstoreu_pd
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 8>, !cir.ptr<!cir.vector<!cir.double x 8>>, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_compressstoreu_pd
+  // LLVM: @llvm.masked.compressstore.v8f64(<8 x double> %{{.*}}, ptr %{{.*}}, <8 x i1> %{{.*}})
+  return _mm512_mask_compressstoreu_pd(__P, __U, __A); 
+}
+
+void test_mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A) {
+  // CIR-LABEL: _mm512_mask_compressstoreu_ps
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 16>, !cir.ptr<!cir.vector<!cir.float x 16>>, !cir.vector<!cir.int<s, 1> x 16>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_compressstoreu_ps
+  // LLVM: @llvm.masked.compressstore.v16f32(<16 x float> %{{.*}}, ptr %{{.*}}, <16 x i1> %{{.*}})
+  return _mm512_mask_compressstoreu_ps(__P, __U, __A); 
+}
+
+void test_mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A) {
+  // CIR-LABEL: _mm512_mask_compressstoreu_epi64
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 8>, !cir.ptr<!cir.vector<!s64i x 8>>, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_compressstoreu_epi64
+  // LLVM: @llvm.masked.compressstore.v8i64(<8 x i64> %{{.*}}, ptr %{{.*}}, <8 x i1> %{{.*}})
+  return _mm512_mask_compressstoreu_epi64(__P, __U, __A); 
+}
+
+void test_mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A) {
+  // CIR-LABEL: _mm512_mask_compressstoreu_epi32
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 16>, !cir.ptr<!cir.vector<!s32i x 16>>, !cir.vector<!cir.int<s, 1> x 16>) -> !void
+
+  // LLVM-LABEL: test_mm512_mask_compressstoreu_epi32
+  // LLVM: @llvm.masked.compressstore.v16i32(<16 x i32> %{{.*}}, ptr %{{.*}}, <16 x i1> %{{.*}})
+  return _mm512_mask_compressstoreu_epi32(__P, __U, __A); 
+}
+__m512d test_mm512_i32gather_pd(__m256i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_i32gather_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpd.512"
+
+  // LLVM-LABEL: test_mm512_i32gather_pd
+  // LLVM: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_i32gather_pd(__index, __addr, 2); 
+}
+
+__m512d test_mm512_mask_i32gather_pd(__m512d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_mask_i32gather_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpd.512"
+
+  // LLVM-LABEL: test_mm512_mask_i32gather_pd
+  // LLVM: @llvm.x86.avx512.mask.gather.dpd.512
+  return _mm512_mask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m512 test_mm512_i32gather_ps(__m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_i32gather_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dps.512"
+
+  // LLVM-LABEL: test_mm512_i32gather_ps
+  // LLVM: @llvm.x86.avx512.mask.gather.dps.512
+  return _mm512_i32gather_ps(__index, __addr, 2); 
+}
+
+__m512d test_mm512_i64gather_pd(__m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_i64gather_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpd.512"
+
+  // LLVM-LABEL: test_mm512_i64gather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
+  return _mm512_i64gather_pd(__index, __addr, 2); 
+}
+
+__m512d test_mm512_mask_i64gather_pd(__m512d __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_mask_i64gather_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpd.512"
+
+  // LLVM-LABEL: test_mm512_mask_i64gather_pd
+  // CHECK: @llvm.x86.avx512.mask.gather.qpd.512
+  return _mm512_mask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m256 test_mm512_i64gather_ps(__m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_i64gather_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qps.512"
+
+  // LLVM-LABEL: test_mm512_i64gather_ps
+  // LLVM: @llvm.x86.avx512.mask.gather.qps.512
+  return _mm512_i64gather_ps(__index, __addr, 2); 
+}
+
+__m256 test_mm512_mask_i64gather_ps(__m256 __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_mask_i64gather_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qps.512"
+
+  // LLVM-LABEL: test_mm512_mask_i64gather_ps
+  // LLVM: @llvm.x86.avx512.mask.gather.qps.512
+  return _mm512_mask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m512i test_mm512_i32gather_epi64(__m256i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_i32gather_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpq.512"
+
+  // LLVM-LABEL: test_mm512_i32gather_epi64
+  // LLVM: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_i32gather_epi64(__index, __addr, 2); 
+}
+
+__m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_mask_i32gather_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpq.512"
+
+  // LLVM-LABEL: test_mm512_mask_i32gather_epi64
+  // LLVM: @llvm.x86.avx512.mask.gather.dpq.512
+  return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m512i test_mm512_i32gather_epi32(__m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_i32gather_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpi.512"
+
+  // LLVM-LABEL: test_mm512_i32gather_epi32
+  // LLVM: @llvm.x86.avx512.mask.gather.dpi.512
+  return _mm512_i32gather_epi32(__index, __addr, 2); 
+}
+
+__m512i test_mm512_mask_i32gather_epi32(__m512i __v1_old, __mmask16 __mask, __m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_mask_i32gather_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.dpi.512"
+
+  // LLVM-LABEL: test_mm512_mask_i32gather_epi32
+  // LLVM: @llvm.x86.avx512.mask.gather.dpi.512
+  return _mm512_mask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m512i test_mm512_i64gather_epi64(__m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_i64gather_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpq.512"
+
+  // LLVM-LABEL: test_mm512_i64gather_epi64
+  // LLVM: @llvm.x86.avx512.mask.gather.qpq.512
+  return _mm512_i64gather_epi64(__index, __addr, 2); 
+}
+
+__m512i test_mm512_mask_i64gather_epi64(__m512i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_mask_i64gather_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpq.512"
+
+  // LLVM-LABEL: test_mm512_mask_i64gather_epi64
+  // LLVM: @llvm.x86.avx512.mask.gather.qpq.512
+  return _mm512_mask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m256i test_mm512_i64gather_epi32(__m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_i64gather_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpi.512"
+
+  // LLVM-LABEL: test_mm512_i64gather_epi32
+  // LLVM: @llvm.x86.avx512.mask.gather.qpi.512
+  return _mm512_i64gather_epi32(__index, __addr, 2); 
+}
+
+__m256i test_mm512_mask_i64gather_epi32(__m256i __v1_old, __mmask8 __mask, __m512i __index, void const *__addr) {
+  // CIR-LABEL: _mm512_mask_i64gather_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather.qpi.512"
+
+  // LLVM-LABEL: test_mm512_mask_i64gather_epi32
+  // LLVM: @llvm.x86.avx512.mask.gather.qpi.512
+  return _mm512_mask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); 
+}
+
+
+void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) {
+  // CIR-LABEL: test_mm512_i32scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
+
+  // LLVM-LABEL: test_mm512_i32scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_i32scatter_pd(__addr, __index, __v1, 2); 
+}
+
+void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) {
+  // CIR-LABEL: test_mm512_mask_i32scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
+
+  // LLVM-LABEL: test_mm512_mask_i32scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
+  return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2); 
+}
+
+void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) {
+  // CIR-LABEL: test_mm512_i32scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
+
+  // LLVM-LABEL: test_mm512_i32scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scatter.dps.512
+  return _mm512_i32scatter_ps(__addr, __index, __v1, 2); 
+}
+
+void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) {
+  // CIR-LABEL: test_mm512_mask_i32scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
+
+  // LLVM-LABEL: test_mm512_mask_i32scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scatter.dps.512
+  return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2); 
+}
+
+void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) {
+  // CIR-LABEL: test_mm512_i64scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
+
+  // LLVM-LABEL: test_mm512_i64scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
+  return _mm512_i64scatter_pd(__addr, __index, __v1, 2); 
+}
+
+void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
+  // CIR-LABEL: test_mm512_mask_i64scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
+
+  // LLVM-LABEL: test_mm512_mask_i64scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
+  return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2); 
+}
+
+void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) {
+  // CIR-LABEL: test_mm512_i64scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
+
+  // LLVM-LABEL: test_mm512_i64scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scatter.qps.512
+  return _mm512_i64scatter_ps(__addr, __index, __v1, 2); 
+}
+
+void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) {
+  // CIR-LABEL: test_mm512_mask_i64scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
+
+  // LLVM-LABEL: test_mm512_mask_i64scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scatter.qps.512
+  return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2); 
+}
+
+void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) {
+  // CIR-LABEL: test_mm512_i32scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
+
+  // LLVM-LABEL: test_mm512_i32scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
+  return _mm512_i32scatter_epi32(__addr, __index, __v1, 2); 
+}
+
+void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) {
+  // CIR-LABEL: test_mm512_mask_i32scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
+
+  // LLVM-LABEL: test_mm512_mask_i32scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
+  return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2); 
+}
+
+void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
+  // CIR-LABEL: test_mm512_i64scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
+
+  // LLVM-LABEL: test_mm512_i64scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
+  return _mm512_i64scatter_epi64(__addr, __index, __v1, 2); 
+}
+
+void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
+  // CIR-LABEL: test_mm512_mask_i64scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
+
+  // LLVM-LABEL: test_mm512_mask_i64scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
+  return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2); 
+}
+
+void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) {
+  // CIR-LABEL: test_mm512_i64scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
+
+  // LLVM-LABEL: test_mm512_i64scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
+  return _mm512_i64scatter_epi32(__addr, __index, __v1, 2); 
+}
+
+void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) {
+  // CIR-LABEL: test_mm512_mask_i64scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
+
+  // LLVM-LABEL: test_mm512_mask_i64scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
+  return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2); 
+}
+
+__m512d test_mm512_insertf64x4(__m512d __A, __m256d __B) {
+  // CIR-LABEL: test_mm512_insertf64x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!cir.double x 8>
+
+  // LLVM-LABEL: test_mm512_insertf64x4
+  // LLVM: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm512_insertf64x4(__A, __B, 1);
+}
+
+__m512 test_mm512_insertf32x4(__m512 __A, __m128 __B) {
+  // CIR-LABEL: test_mm512_insertf32x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 16>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!cir.float x 16>
+
+  // LLVM-LABEL: test_mm512_insertf32x4
+  // LLVM: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  return _mm512_insertf32x4(__A, __B, 1);
+}
+
+__m512i test_mm512_inserti64x4(__m512i __A, __m256i __B) {
+  // CIR-LABEL: test_mm512_inserti64x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!s64i x 8>
+
+  // LLVM-LABEL: test_mm512_inserti64x4
+  // LLVM: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm512_inserti64x4(__A, __B, 1); 
+}
+
+__m512i test_mm512_inserti32x4(__m512i __A, __m128i __B) {
+  // CIR-LABEL: test_mm512_inserti32x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 16>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: test_mm512_inserti32x4
+  // LLVM: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  return _mm512_inserti32x4(__A, __B, 1); 
+}
+
+__m512d test_mm512_shuffle_pd(__m512d __M, __m512d __V) {
+  // CIR-LABEL: test_mm512_shuffle_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 8>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<3> : !s32i, #cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, #cir.int<14> : !s32i] : !cir.vector<!cir.double x 8>
+
+  // CHECK-LABEL: test_mm512_shuffle_pd
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
+
+  // OGCG-LABEL: test_mm512_shuffle_pd
+  // OGCG: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
+  return _mm512_shuffle_pd(__M, __V, 4); 
+}
+
+__m512 test_mm512_shuffle_ps(__m512 __M, __m512 __V) {
+  // CIR-LABEL: test_mm512_shuffle_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 16>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<16> : !s32i, #cir.int<16> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<20> : !s32i, #cir.int<20> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<24> : !s32i, #cir.int<24> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<28> : !s32i, #cir.int<28> : !s32i] : !cir.vector<!cir.float x 16>
+
+  // CHECK-LABEL: test_mm512_shuffle_ps
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
+
+  // OGCG-LABEL: test_mm512_shuffle_ps
+  // OGCG: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
+  return _mm512_shuffle_ps(__M, __V, 4);
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx512fp16-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx512fp16-builtins.c
new file mode 100644
index 0000000000000..01936adb3bbd5
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx512fp16-builtins.c
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -fclangir -emit-llvm -o %t.ll  -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+
+#include <immintrin.h>
+
+__m128h test_mm_undefined_ph(void) {
+  // CIR-LABEL: _mm_undefined_ph
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 2>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 2> -> !cir.vector<!cir.f16 x 8>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.f16 x 8>
+
+  // LLVM-LABEL: @test_mm_undefined_ph
+  // LLVM: store <8 x half> zeroinitializer, ptr %[[A:.*]], align 16
+  // LLVM: %{{.*}} = load <8 x half>, ptr %[[A]], align 16
+  // LLVM: ret <8 x half> %{{.*}}
+    return _mm_undefined_ph();
+}
+
+__m256h test_mm256_undefined_ph(void) {
+  // CIR-LABEL: _mm256_undefined_ph
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 4>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 4> -> !cir.vector<!cir.f16 x 16>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.f16 x 16>
+
+  // LLVM-LABEL: @test_mm256_undefined_ph
+  // LLVM: store <16 x half> zeroinitializer, ptr %[[A:.*]], align 32
+  // LLVM: %{{.*}} = load <16 x half>, ptr %[[A]], align 32
+  // LLVM: ret <16 x half> %{{.*}}
+  return _mm256_undefined_ph();
+}
+
+__m512h test_mm512_undefined_ph(void) {
+  // CIR-LABEL: _mm512_undefined_ph
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 8>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 8> -> !cir.vector<!cir.f16 x 32>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.f16 x 32>
+
+  // LLVM-LABEL: @test_mm512_undefined_ph
+  // LLVM: store <32 x half> zeroinitializer, ptr %[[A:.*]], align 64
+  // LLVM: %{{.*}} = load <32 x half>, ptr %[[A]], align 64
+  // LLVM: ret <32 x half> %{{.*}}
+  return _mm512_undefined_ph();
+}
+
+void test_mm_mask_store_sh(void *__P, __mmask8 __U, __m128h __A) {
+  // CIR-LABEL: _mm_mask_store_sh
+  // CIR: cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.f16 x 8>, !cir.ptr<!cir.vector<!cir.f16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_store_sh
+  // LLVM: call void @llvm.masked.store.v8f16.p0(<8 x half> %{{.*}}, ptr elementtype(<8 x half>) align 1 %{{.*}}, <8 x i1> %{{.*}})
+  _mm_mask_store_sh(__P, __U, __A);
+}
+
+__m128h test_mm_mask_load_sh(__m128h __A, __mmask8 __U, const void *__W) {
+  // CIR-LABEL: _mm_mask_load_sh
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.f16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.f16 x 8>) -> !cir.vector<!cir.f16 x 8>
+
+  // LLVM-LABEL: @test_mm_mask_load_sh
+  // LLVM: %{{.*}} = call <8 x half> @llvm.masked.load.v8f16.p0(ptr elementtype(<8 x half>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x half> %{{.*}})
+  return _mm_mask_load_sh(__A, __U, __W);
+}
+
+__m128h test_mm_maskz_load_sh(__mmask8 __U, const void *__W) {
+  // CIR-LABEL: _mm_maskz_load_sh
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.f16 x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.f16 x 8>) -> !cir.vector<!cir.f16 x 8>
+
+  // LLVM-LABEL: @test_mm_maskz_load_sh
+  // LLVM: %{{.*}} = call <8 x half> @llvm.masked.load.v8f16.p0(ptr elementtype(<8 x half>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x half> %{{.*}})
+  return _mm_maskz_load_sh(__U, __W);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx512vbmi2-builtins.c
new file mode 100644
index 0000000000000..bb9aa5d1f1c1b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx512vbmi2-builtins.c
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m512i test_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const* __P) {
+  // CIR-LABEL: _mm512_mask_expandloadu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 32>>, !cir.vector<!cir.int<s, 1> x 32>, !cir.vector<!s16i x 32>) -> !cir.vector<!s16i x 32>
+
+  // LLVM-LABEL: @test_mm512_mask_expandloadu_epi16
+  // LLVM: @llvm.masked.expandload.v32i16(ptr %{{.*}}, <32 x i1> %{{.*}}, <32 x i16> %{{.*}})
+  return _mm512_mask_expandloadu_epi16(__S, __U, __P);
+}
+
+__m512i test_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const* __P) {
+  // CIR-LABEL: _mm512_maskz_expandloadu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 32>>, !cir.vector<!cir.int<s, 1> x 32>, !cir.vector<!s16i x 32>) -> !cir.vector<!s16i x 32>
+
+  // LLVM-LABEL: @test_mm512_maskz_expandloadu_epi16
+  // LLVM: @llvm.masked.expandload.v32i16(ptr %{{.*}}, <32 x i1> %{{.*}}, <32 x i16> %{{.*}})
+  return _mm512_maskz_expandloadu_epi16(__U, __P);
+}
+
+__m512i test_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const* __P) {
+  // CIR-LABEL: _mm512_mask_expandloadu_epi8
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s8i x 64>>, !cir.vector<!cir.int<s, 1> x 64>, !cir.vector<!s8i x 64>) -> !cir.vector<!s8i x 64>
+
+  // LLVM-LABEL: @test_mm512_mask_expandloadu_epi8
+  // LLVM: @llvm.masked.expandload.v64i8(ptr %{{.*}}, <64 x i1> %{{.*}}, <64 x i8> %{{.*}})
+  return _mm512_mask_expandloadu_epi8(__S, __U, __P);
+}
+
+__m512i test_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const* __P) {
+  // CIR-LABEL: _mm512_maskz_expandloadu_epi8
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s8i x 64>>, !cir.vector<!cir.int<s, 1> x 64>, !cir.vector<!s8i x 64>) -> !cir.vector<!s8i x 64>
+
+  // LLVM-LABEL: @test_mm512_maskz_expandloadu_epi8
+  // LLVM: @llvm.masked.expandload.v64i8(ptr %{{.*}}, <64 x i1> %{{.*}}, <64 x i8> %{{.*}})
+  return _mm512_maskz_expandloadu_epi8(__U, __P);
+}
+
+void test_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) {
+  // CIR-LABEL: _mm512_mask_compressstoreu_epi16
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s16i x 32>, !cir.ptr<!cir.vector<!s16i x 32>>, !cir.vector<!cir.int<s, 1> x 32>) -> !void
+
+  // LLVM-LABEL: @test_mm512_mask_compressstoreu_epi16
+  // LLVM: @llvm.masked.compressstore.v32i16(<32 x i16> %{{.*}}, ptr %{{.*}}, <32 x i1> %{{.*}})
+  _mm512_mask_compressstoreu_epi16(__P, __U, __D);
+}
+
+void test_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) {
+  // CIR-LABEL: _mm512_mask_compressstoreu_epi8
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s8i x 64>, !cir.ptr<!cir.vector<!s8i x 64>>, !cir.vector<!cir.int<s, 1> x 64>) -> !void
+
+  // LLVM-LABEL: @test_mm512_mask_compressstoreu_epi8
+  // LLVM: @llvm.masked.compressstore.v64i8(<64 x i8> %{{.*}}, ptr %{{.*}}, <64 x i1> %{{.*}})
+  _mm512_mask_compressstoreu_epi8(__P, __U, __D);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx512vl-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx512vl-builtins.c
new file mode 100644
index 0000000000000..aad5de06e9968
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx512vl-builtins.c
@@ -0,0 +1,1049 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+
+#include <immintrin.h>
+
+void test_mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A) {
+  // CIR-LABEL: _mm_mask_storeu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 2>, !cir.ptr<!cir.vector<!s64i x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>)
+
+  // LLVM-LABEL: @test_mm_mask_storeu_epi64
+  // LLVM: call void @llvm.masked.store.v2i64.p0(<2 x i64> %{{.*}}, ptr elementtype(<2 x i64>) align 1 %{{.*}}, <2 x i1> %{{.*}})
+  return _mm_mask_storeu_epi64(__P, __U, __A); 
+}
+
+void test_mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A) {
+  // CIR-LABEL: _mm_mask_storeu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>)
+
+  // LLVM-LABEL: @test_mm_mask_storeu_epi32
+  // LLVM: call void @llvm.masked.store.v4i32.p0(<4 x i32> %{{.*}}, ptr elementtype(<4 x i32>) align 1 %{{.*}}, <4 x i1> %{{.*}})
+  return _mm_mask_storeu_epi32(__P, __U, __A); 
+}
+
+void test_mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A) {
+  // CIR-LABEL: _mm_mask_storeu_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>)
+
+  // LLVM-LABEL: @test_mm_mask_storeu_pd
+  // LLVM: call void @llvm.masked.store.v2f64.p0(<2 x double> %{{.*}}, ptr elementtype(<2 x double>) align 1 %{{.*}}, <2 x i1> %{{.*}})
+  return _mm_mask_storeu_pd(__P, __U, __A); 
+}
+
+void test_mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A) {
+  // CIR-LABEL: _mm_mask_storeu_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>)
+
+  // LLVM-LABEL: @test_mm_mask_storeu_ps
+  // LLVM: call void @llvm.masked.store.v4f32.p0(<4 x float> %{{.*}}, ptr elementtype(<4 x float>) align 1 %{{.*}}, <4 x i1> %{{.*}})
+  return _mm_mask_storeu_ps(__P, __U, __A); 
+}
+
+void test_mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A) {
+  // CIR-LABEL: _mm256_mask_storeu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 8>, !cir.ptr<!cir.vector<!s32i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>)
+
+  // LLVM-LABEL: @test_mm256_mask_storeu_epi32
+  // LLVM: call void @llvm.masked.store.v8i32.p0(<8 x i32> %{{.*}}, ptr elementtype(<8 x i32>) align 1 %{{.*}}, <8 x i1> %{{.*}})
+  return _mm256_mask_storeu_epi32(__P, __U, __A); 
+}
+
+void test_mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A) {
+  // CIR-LABEL: _mm256_mask_storeu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 4>, !cir.ptr<!cir.vector<!s64i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>)
+
+  // LLVM-LABEL: @test_mm256_mask_storeu_epi64
+  // LLVM: call void @llvm.masked.store.v4i64.p0(<4 x i64> %{{.*}}, ptr elementtype(<4 x i64>) align 1 %{{.*}}, <4 x i1> %{{.*}})
+  return _mm256_mask_storeu_epi64(__P, __U, __A); 
+}
+
+void test_mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A) {
+  // CIR-LABEL: _mm256_mask_storeu_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 8>, !cir.ptr<!cir.vector<!cir.float x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_storeu_ps
+  // LLVM: call void @llvm.masked.store.v8f32.p0(<8 x float> %{{.*}}, ptr elementtype(<8 x float>) align 1 %{{.*}}, <8 x i1> %{{.*}})
+  return _mm256_mask_storeu_ps(__P, __U, __A); 
+}
+
+void test_mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A) {
+  // CIR-LABEL: _mm_mask_store_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 2>, !cir.ptr<!cir.vector<!s64i x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_store_epi64
+  // LLVM: call void @llvm.masked.store.v2i64.p0(<2 x i64> %{{.*}}, ptr elementtype(<2 x i64>) align 16 %{{.*}}, <2 x i1> %{{.*}})
+  return _mm_mask_store_epi64(__P, __U, __A); 
+}
+
+void test_mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A) {
+  // CIR-LABEL: _mm_mask_store_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_store_ps
+  // LLVM: call void @llvm.masked.store.v4f32.p0(<4 x float> %{{.*}}, ptr elementtype(<4 x float>) align 16 %{{.*}}, <4 x i1> %{{.*}})
+  return _mm_mask_store_ps(__P, __U, __A); 
+}
+
+void test_mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A) {
+  // CIR-LABEL: _mm_mask_store_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_store_pd
+  // LLVM: call void @llvm.masked.store.v2f64.p0(<2 x double> %{{.*}}, ptr elementtype(<2 x double>) align 16 %{{.*}}, <2 x i1> %{{.*}})
+  return _mm_mask_store_pd(__P, __U, __A); 
+}
+
+void test_mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A) {
+  // CIR-LABEL: _mm256_mask_store_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 8>, !cir.ptr<!cir.vector<!s32i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_store_epi32
+  // LLVM: call void @llvm.masked.store.v8i32.p0(<8 x i32> %{{.*}}, ptr elementtype(<8 x i32>) align 32 %{{.*}}, <8 x i1> %{{.*}})
+  return _mm256_mask_store_epi32(__P, __U, __A); 
+}
+
+void test_mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A) {
+  // CIR-LABEL: _mm256_mask_store_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 4>, !cir.ptr<!cir.vector<!s64i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_store_epi64
+  // LLVM: call void @llvm.masked.store.v4i64.p0(<4 x i64> %{{.*}}, ptr elementtype(<4 x i64>) align 32 %{{.*}}, <4 x i1> %{{.*}})
+  return _mm256_mask_store_epi64(__P, __U, __A); 
+}
+
+void test_mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A) {
+  // CIR-LABEL: _mm256_mask_store_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 8>, !cir.ptr<!cir.vector<!cir.float x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_store_ps
+  // LLVM: call void @llvm.masked.store.v8f32.p0(<8 x float> %{{.*}}, ptr elementtype(<8 x float>) align 32 %{{.*}}, <8 x i1> %{{.*}})
+  return _mm256_mask_store_ps(__P, __U, __A); 
+}
+
+void test_mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A) {
+  // CIR-LABEL: _mm256_mask_store_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 4>, !cir.ptr<!cir.vector<!cir.double x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_store_pd
+  // LLVM: call void @llvm.masked.store.v4f64.p0(<4 x double> %{{.*}}, ptr elementtype(<4 x double>) align 32 %{{.*}}, <4 x i1> %{{.*}})
+  return _mm256_mask_store_pd(__P, __U, __A); 
+}
+  
+__m128 test_mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_loadu_ps
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_mm_mask_loadu_ps
+  // LLVM: @llvm.masked.load.v4f32.p0(ptr elementtype(<4 x float>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}})
+  return _mm_mask_loadu_ps(__W, __U, __P); 
+}
+
+__m128 test_mm_maskz_loadu_ps(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_loadu_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_mm_maskz_loadu_ps
+  // LLVM: @llvm.masked.load.v4f32.p0(ptr elementtype(<4 x float>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}})
+  return _mm_maskz_loadu_ps(__U, __P); 
+}
+
+__m256 test_mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_loadu_ps
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.float x 8>) -> !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: @test_mm256_mask_loadu_ps
+  // LLVM: @llvm.masked.load.v8f32.p0(ptr elementtype(<8 x float>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}})
+  return _mm256_mask_loadu_ps(__W, __U, __P); 
+}
+
+__m256 test_mm256_maskz_loadu_ps(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_loadu_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.float x 8>) -> !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: @test_mm256_maskz_loadu_ps
+  // LLVM: @llvm.masked.load.v8f32.p0(ptr elementtype(<8 x float>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}})
+  return _mm256_maskz_loadu_ps(__U, __P); 
+}
+
+__m256d test_mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_loadu_pd
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.double x 4>) -> !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: @test_mm256_mask_loadu_pd
+  // LLVM: @llvm.masked.load.v4f64.p0(ptr elementtype(<4 x double>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x double> %{{.*}})
+  return _mm256_mask_loadu_pd(__W, __U, __P); 
+}
+
+__m128i test_mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_loadu_epi32
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_mm_mask_loadu_epi32
+  // LLVM: @llvm.masked.load.v4i32.p0(ptr elementtype(<4 x i32>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_mask_loadu_epi32(__W, __U, __P); 
+}
+
+__m256i test_mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_loadu_epi32
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s32i x 8>) -> !cir.vector<!s32i x 8>
+
+  // LLVM-LABEL: @test_mm256_mask_loadu_epi32
+  // LLVM: @llvm.masked.load.v8i32.p0(ptr elementtype(<8 x i32>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x i32> %{{.*}})
+  return _mm256_mask_loadu_epi32(__W, __U, __P); 
+}
+
+__m128i test_mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_loadu_epi64
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_mm_mask_loadu_epi64
+  // LLVM: @llvm.masked.load.v2i64.p0(ptr elementtype(<2 x i64>) align 1 %{{.*}}, <2 x i1> %{{.*}}, <2 x i64> %{{.*}})
+  return _mm_mask_loadu_epi64(__W, __U, __P); 
+}
+
+__m256i test_mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_loadu_epi64
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s64i x 4>) -> !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: @test_mm256_mask_loadu_epi64
+  // LLVM: @llvm.masked.load.v4i64.p0(ptr elementtype(<4 x i64>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x i64> %{{.*}})
+  return _mm256_mask_loadu_epi64(__W, __U, __P); 
+}
+
+__m256i test_mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_loadu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s64i x 4>) -> !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: @test_mm256_maskz_loadu_epi64
+  // LLVM: @llvm.masked.load.v4i64.p0(ptr elementtype(<4 x i64>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x i64> %{{.*}})
+  return _mm256_maskz_loadu_epi64(__U, __P); 
+}
+
+__m128 test_mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_load_ps
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_mm_mask_load_ps
+  // LLVM: @llvm.masked.load.v4f32.p0(ptr elementtype(<4 x float>) align 16 %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}})
+  return _mm_mask_load_ps(__W, __U, __P); 
+}
+
+__m128 test_mm_maskz_load_ps(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_load_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_mm_maskz_load_ps
+  // LLVM: @llvm.masked.load.v4f32.p0(ptr elementtype(<4 x float>) align 16 %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}})
+  return _mm_maskz_load_ps(__U, __P); 
+}
+
+__m256 test_mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_load_ps
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.float x 8>) -> !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: @test_mm256_mask_load_ps
+  // LLVM: @llvm.masked.load.v8f32.p0(ptr elementtype(<8 x float>) align 32 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}})
+  return _mm256_mask_load_ps(__W, __U, __P); 
+}
+
+__m256 test_mm256_maskz_load_ps(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_load_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.float x 8>) -> !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: @test_mm256_maskz_load_ps
+  // LLVM: @llvm.masked.load.v8f32.p0(ptr elementtype(<8 x float>) align 32 %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}})
+  return _mm256_maskz_load_ps(__U, __P); 
+}
+
+__m128d test_mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_load_pd
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_mm_mask_load_pd
+  // LLVM: @llvm.masked.load.v2f64.p0(ptr elementtype(<2 x double>) align 16 %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_mask_load_pd(__W, __U, __P); 
+}
+
+__m128d test_mm_maskz_load_pd(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_load_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_mm_maskz_load_pd
+  // LLVM: @llvm.masked.load.v2f64.p0(ptr elementtype(<2 x double>) align 16 %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_maskz_load_pd(__U, __P); 
+}
+
+__m128d test_mm_maskz_loadu_pd(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_loadu_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_mm_maskz_loadu_pd
+  // LLVM: @llvm.masked.load.v2f64.p0(ptr elementtype(<2 x double>) align 1 %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_maskz_loadu_pd(__U, __P); 
+}
+
+__m256d test_mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P) {
+  //CIR-LABEL: _mm256_mask_load_pd
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.double x 4>) -> !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: @test_mm256_mask_load_pd
+  // LLVM: @llvm.masked.load.v4f64.p0(ptr elementtype(<4 x double>) align 32 %{{.*}}, <4 x i1> %{{.*}}, <4 x double> %{{.*}})
+  return _mm256_mask_load_pd(__W, __U, __P); 
+}
+
+__m256d test_mm256_maskz_load_pd(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_load_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.double x 4>) -> !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: @test_mm256_maskz_load_pd
+  // LLVM: @llvm.masked.load.v4f64.p0(ptr elementtype(<4 x double>) align 32 %{{.*}}, <4 x i1> %{{.*}}, <4 x double> %{{.*}})
+  return _mm256_maskz_load_pd(__U, __P); 
+}
+
+__m256d test_mm256_maskz_loadu_pd(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_loadu_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.double x 4>) -> !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: @test_mm256_maskz_loadu_pd
+  // LLVM: @llvm.masked.load.v4f64.p0(ptr elementtype(<4 x double>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x double> %{{.*}})
+  return _mm256_maskz_loadu_pd(__U, __P); 
+}
+
+__m128i test_mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_load_epi32
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_mm_mask_load_epi32
+  // LLVM: @llvm.masked.load.v4i32.p0(ptr elementtype(<4 x i32>) align 16 %{{.*}}, <4 x i1> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_mask_load_epi32(__W, __U, __P); 
+}
+
+__m128i test_mm_maskz_load_epi32(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_load_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_mm_maskz_load_epi32
+  // LLVM: @llvm.masked.load.v4i32.p0(ptr elementtype(<4 x i32>) align 16 %{{.*}}, <4 x i1> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_maskz_load_epi32(__U, __P); 
+}
+
+__m128i test_mm_maskz_loadu_epi32(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_loadu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_mm_maskz_loadu_epi32
+  // LLVM: @llvm.masked.load.v4i32.p0(ptr elementtype(<4 x i32>) align 1 %{{.*}}, <4 x i1> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_maskz_loadu_epi32(__U, __P); 
+}
+
+__m256i test_mm256_maskz_load_epi32(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_load_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s32i x 8>) -> !cir.vector<!s32i x 8>
+
+  // LLVM-LABEL: @test_mm256_maskz_load_epi32
+  // LLVM: @llvm.masked.load.v8i32.p0(ptr elementtype(<8 x i32>) align 32 %{{.*}}, <8 x i1> %{{.*}}, <8 x i32> %{{.*}})
+  return _mm256_maskz_load_epi32(__U, __P); 
+}
+
+__m256i test_mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_loadu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s32i x 8>) -> !cir.vector<!s32i x 8>
+
+  // LLVM-LABEL: @test_mm256_maskz_loadu_epi32
+  // LLVM: @llvm.masked.load.v8i32.p0(ptr elementtype(<8 x i32>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x i32> %{{.*}})
+  return _mm256_maskz_loadu_epi32(__U, __P); 
+}
+
+__m128i test_mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_load_epi64
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_mm_mask_load_epi64
+  // LLVM: @llvm.masked.load.v2i64.p0(ptr elementtype(<2 x i64>) align 16 %{{.*}}, <2 x i1> %{{.*}}, <2 x i64> %{{.*}})
+  return _mm_mask_load_epi64(__W, __U, __P); 
+}
+
+__m128i test_mm_maskz_loadu_epi64(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_loadu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_mm_maskz_loadu_epi64
+  // LLVM: @llvm.masked.load.v2i64.p0(ptr elementtype(<2 x i64>) align 1 %{{.*}}, <2 x i1> %{{.*}}, <2 x i64> %{{.*}})
+  return _mm_maskz_loadu_epi64(__U, __P); 
+}
+
+__m128i test_mm_maskz_load_epi64(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_load_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 2>>, !u32i, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_mm_maskz_load_epi64
+  // LLVM: @llvm.masked.load.v2i64.p0(ptr elementtype(<2 x i64>) align 16 %{{.*}}, <2 x i1> %{{.*}}, <2 x i64> %{{.*}})
+  return _mm_maskz_load_epi64(__U, __P); 
+}
+
+__m256i test_mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_load_epi64
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s64i x 4>) -> !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: @test_mm256_mask_load_epi64
+  // LLVM: @llvm.masked.load.v4i64.p0(ptr elementtype(<4 x i64>) align 32 %{{.*}}, <4 x i1> %{{.*}}, <4 x i64> %{{.*}})
+  return _mm256_mask_load_epi64(__W, __U, __P); 
+}
+
+__m256i test_mm256_maskz_load_epi64(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_load_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s64i x 4>) -> !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: @test_mm256_maskz_load_epi64
+  // LLVM: @llvm.masked.load.v4i64.p0(ptr elementtype(<4 x i64>) align 32 %{{.*}}, <4 x i1> %{{.*}}, <4 x i64> %{{.*}})
+  return _mm256_maskz_load_epi64(__U, __P); 
+}
+
+__m128d test_mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_expandloadu_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 2>>, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_mm_mask_expandloadu_pd
+  // LLVM: @llvm.masked.expandload.v2f64(ptr %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_mask_expandloadu_pd(__W,__U,__P); 
+}
+
+__m128d test_mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_expandloadu_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %4, %8, %5 : (!cir.ptr<!cir.vector<!cir.double x 2>>, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!cir.double x 2>) -> !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: @test_mm_maskz_expandloadu_pd
+  // LLVM: @llvm.masked.expandload.v2f64(ptr %{{.*}}, <2 x i1> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_maskz_expandloadu_pd(__U,__P); 
+}
+
+__m256d test_mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_expandloadu_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 4>>, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.double x 4>) -> !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: @test_mm256_mask_expandloadu_pd
+  // LLVM: @llvm.masked.expandload.v4f64(ptr %{{.*}}, <4 x i1> %{{.*}}, <4 x double> %{{.*}})
+  return _mm256_mask_expandloadu_pd(__W,__U,__P); 
+}
+
+__m256d test_mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_expandloadu_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.double x 4>>, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.double x 4>) -> !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: @test_mm256_maskz_expandloadu_pd
+  // LLVM: @llvm.masked.expandload.v4f64(ptr %{{.*}}, <4 x i1> %{{.*}}, <4 x double> %{{.*}})
+  return _mm256_maskz_expandloadu_pd(__U,__P); 
+}
+
+__m128 test_mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_expandloadu_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 4>>, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_mm_mask_expandloadu_ps
+  // LLVM: @llvm.masked.expandload.v4f32(ptr %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}})
+  return _mm_mask_expandloadu_ps(__W,__U,__P); 
+}
+
+__m128 test_mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_expandloadu_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 4>>, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!cir.float x 4>) -> !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: @test_mm_maskz_expandloadu_ps
+  // LLVM: @llvm.masked.expandload.v4f32(ptr %{{.*}}, <4 x i1> %{{.*}}, <4 x float> %{{.*}})
+  return _mm_maskz_expandloadu_ps(__U,__P); 
+}
+
+__m256 test_mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_expandloadu_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 8>>, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.float x 8>) -> !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: @test_mm256_mask_expandloadu_ps
+  // LLVM: @llvm.masked.expandload.v8f32(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}})
+  return _mm256_mask_expandloadu_ps(__W,__U,__P); 
+}
+
+__m256 test_mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_expandloadu_ps
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!cir.float x 8>>, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!cir.float x 8>) -> !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: @test_mm256_maskz_expandloadu_ps
+  // LLVM: @llvm.masked.expandload.v8f32(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x float> %{{.*}})
+  return _mm256_maskz_expandloadu_ps(__U,__P); 
+}
+
+__m128i test_mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_expandloadu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 2>>, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_mm_mask_expandloadu_epi64
+  // LLVM: @llvm.masked.expandload.v2i64(ptr %{{.*}}, <2 x i1> %{{.*}}, <2 x i64> %{{.*}})
+  return _mm_mask_expandloadu_epi64(__W,__U,__P); 
+}
+
+__m128i test_mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_expandloadu_epi64
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s64i x 2>>, !cir.vector<!cir.int<s, 1> x 2>, !cir.vector<!s64i x 2>) -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_mm_maskz_expandloadu_epi64
+  // LLVM: @llvm.masked.expandload.v2i64(ptr %{{.*}}, <2 x i1> %{{.*}}, <2 x i64> %{{.*}})
+  return _mm_maskz_expandloadu_epi64(__U,__P); 
+}
+
+__m128i test_mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_expandloadu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_mm_mask_expandloadu_epi32
+  // LLVM: @llvm.masked.expandload.v4i32(ptr %{{.*}}, <4 x i1> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_mask_expandloadu_epi32(__W,__U,__P); 
+}
+
+__m128i test_mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_expandloadu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!cir.int<s, 1> x 4>, !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_mm_maskz_expandloadu_epi32
+  // LLVM: @llvm.masked.expandload.v4i32(ptr %{{.*}}, <4 x i1> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_maskz_expandloadu_epi32(__U,__P); 
+}
+
+__m256i test_mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U,   void const *__P) {
+  // CIR-LABEL: _mm256_mask_expandloadu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 8>>, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s32i x 8>) -> !cir.vector<!s32i x 8>
+
+  // LLVM-LABEL: @test_mm256_mask_expandloadu_epi32
+  // LLVM: @llvm.masked.expandload.v8i32(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i32> %{{.*}})
+  return _mm256_mask_expandloadu_epi32(__W,__U,__P); 
+}
+
+__m256i test_mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_expandloadu_epi32
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 8>>, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s32i x 8>) -> !cir.vector<!s32i x 8>
+
+  // LLVM-LABEL: @test_mm256_maskz_expandloadu_epi32
+  // LLVM: @llvm.masked.expandload.v8i32(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i32> %{{.*}})
+  return _mm256_maskz_expandloadu_epi32(__U,__P);
+}
+
+void test_mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A) {
+  // CIR-LABEL: _mm_mask_compressstoreu_pd
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>, !cir.vector<!cir.int<s, 1> x 2>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_compressstoreu_pd
+  // LLVM: @llvm.masked.compressstore.v2f64(<2 x double> %{{.*}}, ptr %{{.*}}, <2 x i1> %{{.*}})
+  return _mm_mask_compressstoreu_pd(__P,__U,__A); 
+}
+
+void test_mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A) {
+  // CIR-LABEL: _mm256_mask_compressstoreu_pd
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 4>, !cir.ptr<!cir.vector<!cir.double x 4>>, !cir.vector<!cir.int<s, 1> x 4>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_compressstoreu_pd
+  // LLVM: @llvm.masked.compressstore.v4f64(<4 x double> %{{.*}}, ptr %{{.*}}, <4 x i1> %{{.*}})
+  return _mm256_mask_compressstoreu_pd(__P,__U,__A); 
+}
+void test_mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A) {
+  // CIR-LABEL: _mm_mask_compressstoreu_ps
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>, !cir.vector<!cir.int<s, 1> x 4>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_compressstoreu_ps
+  // LLVM: @llvm.masked.compressstore.v4f32(<4 x float> %{{.*}}, ptr %{{.*}}, <4 x i1> %{{.*}})
+  return _mm_mask_compressstoreu_ps(__P,__U,__A); 
+}
+
+void test_mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A) {
+  // CIR-LABEL: _mm256_mask_compressstoreu_ps
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.float x 8>, !cir.ptr<!cir.vector<!cir.float x 8>>, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_compressstoreu_ps
+  // LLVM: @llvm.masked.compressstore.v8f32(<8 x float> %{{.*}}, ptr %{{.*}}, <8 x i1> %{{.*}})
+  return _mm256_mask_compressstoreu_ps(__P,__U,__A); 
+}
+
+void test_mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A) {
+  // CIR-LABEL: _mm_mask_compressstoreu_epi64
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 2>, !cir.ptr<!cir.vector<!s64i x 2>>, !cir.vector<!cir.int<s, 1> x 2>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_compressstoreu_epi64
+  // LLVM: @llvm.masked.compressstore.v2i64(<2 x i64> %{{.*}}, ptr %{{.*}}, <2 x i1> %{{.*}})
+  return _mm_mask_compressstoreu_epi64(__P,__U,__A); 
+}
+
+void test_mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A) {
+  // CIR-LABEL: _mm256_mask_compressstoreu_epi64
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s64i x 4>, !cir.ptr<!cir.vector<!s64i x 4>>, !cir.vector<!cir.int<s, 1> x 4>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_compressstoreu_epi64
+  // LLVM: @llvm.masked.compressstore.v4i64(<4 x i64> %{{.*}}, ptr %{{.*}}, <4 x i1> %{{.*}})
+  return _mm256_mask_compressstoreu_epi64(__P,__U,__A); 
+}
+
+void test_mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A) {
+  // CIR-LABEL: _mm_mask_compressstoreu_epi32
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!cir.int<s, 1> x 4>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_compressstoreu_epi32
+  // LLVM: @llvm.masked.compressstore.v4i32(<4 x i32> %{{.*}}, ptr %{{.*}}, <4 x i1> %{{.*}})
+  return _mm_mask_compressstoreu_epi32(__P,__U,__A); 
+}
+
+void test_mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A) {
+  // CIR-LABEL: _mm256_mask_compressstoreu_epi32
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s32i x 8>, !cir.ptr<!cir.vector<!s32i x 8>>, !cir.vector<!cir.int<s, 1> x 8>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_compressstoreu_epi32
+  // LLVM: @llvm.masked.compressstore.v8i32(<8 x i32> %{{.*}}, ptr %{{.*}}, <8 x i1> %{{.*}})
+  return _mm256_mask_compressstoreu_epi32(__P,__U,__A); 
+}
+__m128d test_mm_mmask_i64gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm_mmask_i64gather_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div2.df"
+
+  // LLVM-LABEL: @test_mm_mmask_i64gather_pd
+  // LLVM: @llvm.x86.avx512.mask.gather3div2.df
+  return _mm_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m128i test_mm_mmask_i64gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm_mmask_i64gather_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div2.di"
+
+  // LLVM-LABEL: @test_mm_mmask_i64gather_epi64
+  // LLVM: @llvm.x86.avx512.mask.gather3div2.di
+  return _mm_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m256d test_mm256_mmask_i64gather_pd(__m256d __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+  // CIR-LABEL: test_mm256_mmask_i64gather_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.df"
+
+  // LLVM-LABEL: @test_mm256_mmask_i64gather_pd
+  // LLVM: @llvm.x86.avx512.mask.gather3div4.df
+  return _mm256_mmask_i64gather_pd(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m256i test_mm256_mmask_i64gather_epi64(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+  // CIR-LABEL: test_mm256_mmask_i64gather_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.di"
+
+  // LLVM-LABEL: @test_mm256_mmask_i64gather_epi64
+  // LLVM: @llvm.x86.avx512.mask.gather3div4.di
+  return _mm256_mmask_i64gather_epi64(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m128 test_mm_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm_mmask_i64gather_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.sf"
+
+  // LLVM-LABEL: @test_mm_mmask_i64gather_ps
+  // LLVM: @llvm.x86.avx512.mask.gather3div4.sf
+  return _mm_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m128i test_mm_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm_mmask_i64gather_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div4.si"
+
+  // LLVM-LABEL: @test_mm_mmask_i64gather_epi32
+  // LLVM: @llvm.x86.avx512.mask.gather3div4.si
+  return _mm_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m128 test_mm256_mmask_i64gather_ps(__m128 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+  // CIR-LABEL: test_mm256_mmask_i64gather_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div8.sf"
+
+  // LLVM-LABEL: @test_mm256_mmask_i64gather_ps
+  // LLVM: @llvm.x86.avx512.mask.gather3div8.sf
+  return _mm256_mmask_i64gather_ps(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m128i test_mm256_mmask_i64gather_epi32(__m128i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+  // CIR-LABEL: test_mm256_mmask_i64gather_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3div8.si"
+
+  // LLVM-LABEL: @test_mm256_mmask_i64gather_epi32
+  // LLVM: @llvm.x86.avx512.mask.gather3div8.si
+  return _mm256_mmask_i64gather_epi32(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m128d test_mm_mask_i32gather_pd(__m128d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm_mask_i32gather_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv2.df"
+
+  // LLVM-LABEL: @test_mm_mask_i32gather_pd
+  // LLVM: @llvm.x86.avx512.mask.gather3siv2.df
+  return _mm_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m128i test_mm_mask_i32gather_epi64(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm_mask_i32gather_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv2.di"
+
+  // LLVM-LABEL: @test_mm_mask_i32gather_epi64
+  // LLVM: @llvm.x86.avx512.mask.gather3siv2.di
+  return _mm_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m256d test_mm256_mask_i32gather_pd(__m256d __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm256_mask_i32gather_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.df"
+
+  // LLVM-LABEL: @test_mm256_mask_i32gather_pd
+  // LLVM: @llvm.x86.avx512.mask.gather3siv4.df
+  return _mm256_mmask_i32gather_pd(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m256i test_mm256_mask_i32gather_epi64(__m256i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm256_mask_i32gather_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.di"
+
+  // LLVM-LABEL: @test_mm256_mask_i32gather_epi64
+  // LLVM: @llvm.x86.avx512.mask.gather3siv4.di
+  return _mm256_mmask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m128 test_mm_mask_i32gather_ps(__m128 __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm_mask_i32gather_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.sf"
+
+  // LLVM-LABEL: @test_mm_mask_i32gather_ps
+  // LLVM: @llvm.x86.avx512.mask.gather3siv4.sf
+  return _mm_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m128i test_mm_mask_i32gather_epi32(__m128i __v1_old, __mmask8 __mask, __m128i __index, void const *__addr) {
+  // CIR-LABEL: test_mm_mask_i32gather_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv4.si"
+
+  // LLVM-LABEL: @test_mm_mask_i32gather_epi32
+  // LLVM: @llvm.x86.avx512.mask.gather3siv4.si
+  return _mm_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m256 test_mm256_mask_i32gather_ps(__m256 __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+  // CIR-LABEL: test_mm256_mask_i32gather_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv8.sf"
+
+  // LLVM-LABEL: @test_mm256_mask_i32gather_ps
+  // LLVM: @llvm.x86.avx512.mask.gather3siv8.sf
+  return _mm256_mmask_i32gather_ps(__v1_old, __mask, __index, __addr, 2); 
+}
+
+__m256i test_mm256_mask_i32gather_epi32(__m256i __v1_old, __mmask8 __mask, __m256i __index, void const *__addr) {
+  // CIR-LABEL: test_mm256_mask_i32gather_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.gather3siv8.si"
+
+  // LLVM-LABEL: @test_mm256_mask_i32gather_epi32
+  // LLVM: @llvm.x86.avx512.mask.gather3siv8.si
+  return _mm256_mmask_i32gather_epi32(__v1_old, __mask, __index, __addr, 2); 
+}
+
+void test_mm_i64scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
+  // CIR-LABEL: test_mm_i64scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv2.df"
+
+  // LLVM-LABEL: @test_mm_i64scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv2.df
+  return _mm_i64scatter_pd(__addr,__index,__v1,2); 
+}
+
+void test_mm_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
+  // CIR-LABEL: test_mm_mask_i64scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv2.df"
+
+  // LLVM-LABEL: @test_mm_mask_i64scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv2.df
+  return _mm_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm_i64scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
+  // CIR-LABEL: test_mm_i64scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv2.di"
+
+  // LLVM-LABEL: @test_mm_i64scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv2.di
+  return _mm_i64scatter_epi64(__addr,__index,__v1,2); 
+}
+
+void test_mm_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CIR-LABEL: test_mm_mask_i64scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv2.di"
+
+  // LLVM-LABEL: @test_mm_mask_i64scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv2.di
+  return _mm_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm256_i64scatter_pd(double *__addr, __m256i __index,  __m256d __v1) {
+  // CIR-LABEL: test_mm256_i64scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv4.df"
+
+  // LLVM-LABEL: @test_mm256_i64scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv4.df
+  return _mm256_i64scatter_pd(__addr,__index,__v1,2); 
+}
+
+void test_mm256_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m256i __index, __m256d __v1) {
+  // CIR-LABEL: test_mm256_mask_i64scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv4.df"
+
+  // LLVM-LABEL: @test_mm256_mask_i64scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv4.df
+  return _mm256_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm256_i64scatter_epi64(long long *__addr, __m256i __index,  __m256i __v1) {
+  // CIR-LABEL: test_mm256_i64scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv4.di"
+
+  // LLVM-LABEL: @test_mm256_i64scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv4.di
+  return _mm256_i64scatter_epi64(__addr,__index,__v1,2); 
+}
+
+void test_mm256_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
+  // CIR-LABEL: test_mm256_mask_i64scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv4.di"
+
+  // LLVM-LABEL: @test_mm256_mask_i64scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv4.di
+  return _mm256_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm_i64scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
+  // CIR-LABEL: test_mm_i64scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv4.sf"
+
+  // LLVM-LABEL: @test_mm_i64scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv4.sf
+  return _mm_i64scatter_ps(__addr,__index,__v1,2); 
+}
+
+void test_mm_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
+  // CIR-LABEL: test_mm_mask_i64scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv4.sf"
+
+  // LLVM-LABEL: @test_mm_mask_i64scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv4.sf
+  return _mm_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm_i64scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
+  // CIR-LABEL: test_mm_i64scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv4.si"
+
+  // LLVM-LABEL: @test_mm_i64scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv4.si
+  return _mm_i64scatter_epi32(__addr,__index,__v1,2); 
+}
+
+void test_mm_mask_i64scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CIR-LABEL: test_mm_mask_i64scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv4.si"
+
+  // LLVM-LABEL: @test_mm_mask_i64scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv4.si
+  return _mm_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm256_i64scatter_ps(float *__addr, __m256i __index,  __m128 __v1) {
+  // CIR-LABEL: test_mm256_i64scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv8.sf"
+
+  // LLVM-LABEL: @test_mm256_i64scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv8.sf
+  return _mm256_i64scatter_ps(__addr,__index,__v1,2); 
+}
+
+void test_mm256_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m128 __v1) {
+  // CIR-LABEL: test_mm256_mask_i64scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv8.sf"
+
+  // LLVM-LABEL: @test_mm256_mask_i64scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv8.sf
+  return _mm256_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm256_i64scatter_epi32(int *__addr, __m256i __index,  __m128i __v1) {
+  // CIR-LABEL: test_mm256_i64scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv8.si"
+
+  // LLVM-LABEL: @test_mm256_i64scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv8.si
+  return _mm256_i64scatter_epi32(__addr,__index,__v1,2); 
+}
+
+void test_mm256_mask_i64scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m128i __v1) {
+  // CIR-LABEL: test_mm256_mask_i64scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scatterdiv8.si"
+
+  // LLVM-LABEL: @test_mm256_mask_i64scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scatterdiv8.si
+  return _mm256_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm_i32scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
+  // CIR-LABEL: test_mm_i32scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv2.df"
+
+  // LLVM-LABEL: @test_mm_i32scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scattersiv2.df
+  return _mm_i32scatter_pd(__addr,__index,__v1,2); 
+}
+
+void test_mm_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
+  // CIR-LABEL: test_mm_mask_i32scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv2.df"
+
+  // LLVM-LABEL: @test_mm_mask_i32scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scattersiv2.df
+  return _mm_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm_i32scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
+  // CIR-LABEL: test_mm_i32scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv2.di"
+
+  // LLVM-LABEL: @test_mm_i32scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scattersiv2.di
+  return _mm_i32scatter_epi64(__addr,__index,__v1,2); 
+}
+
+void test_mm_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CIR-LABEL: test_mm_mask_i32scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv2.di"
+
+  // LLVM-LABEL: @test_mm_mask_i32scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scattersiv2.di
+  return _mm_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm256_i32scatter_pd(double *__addr, __m128i __index,  __m256d __v1) {
+  // CIR-LABEL: test_mm256_i32scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv4.df"
+
+  // LLVM-LABEL: @test_mm256_i32scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scattersiv4.df
+  return _mm256_i32scatter_pd(__addr,__index,__v1,2); 
+}
+
+void test_mm256_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m256d __v1) {
+  // CIR-LABEL: test_mm256_mask_i32scatter_pd
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv4.df"
+
+  // LLVM-LABEL: @test_mm256_mask_i32scatter_pd
+  // LLVM: @llvm.x86.avx512.mask.scattersiv4.df
+  return _mm256_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm256_i32scatter_epi64(long long *__addr, __m128i __index,  __m256i __v1) {
+  // CIR-LABEL: test_mm256_i32scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv4.di"
+
+  // LLVM-LABEL: @test_mm256_i32scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scattersiv4.di
+  return _mm256_i32scatter_epi64(__addr,__index,__v1,2); 
+}
+
+void test_mm256_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask,  __m128i __index, __m256i __v1) {
+  // CIR-LABEL: test_mm256_mask_i32scatter_epi64
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv4.di"
+
+  // LLVM-LABEL: @test_mm256_mask_i32scatter_epi64
+  // LLVM: @llvm.x86.avx512.mask.scattersiv4.di
+  return _mm256_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm_i32scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
+  // CIR-LABEL: test_mm_i32scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv4.sf"
+
+  // LLVM-LABEL: @test_mm_i32scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scattersiv4.sf
+  return _mm_i32scatter_ps(__addr,__index,__v1,2); 
+}
+
+void test_mm_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
+  // CIR-LABEL: test_mm_mask_i32scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv4.sf"
+
+  // LLVM-LABEL: @test_mm_mask_i32scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scattersiv4.sf
+  return _mm_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm_i32scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
+  // CIR-LABEL: test_mm_i32scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv4.si"
+
+  // LLVM-LABEL: @test_mm_i32scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scattersiv4.si
+  return _mm_i32scatter_epi32(__addr,__index,__v1,2); 
+}
+
+void test_mm_mask_i32scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CIR-LABEL: test_mm_mask_i32scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv4.si"
+
+  // LLVM-LABEL: @test_mm_mask_i32scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scattersiv4.si
+  return _mm_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm256_i32scatter_ps(float *__addr, __m256i __index,  __m256 __v1) {
+  // CIR-LABEL: test_mm256_i32scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv8.sf"
+
+  // LLVM-LABEL: @test_mm256_i32scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scattersiv8.sf
+  return _mm256_i32scatter_ps(__addr,__index,__v1,2); 
+}
+
+void test_mm256_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m256 __v1) {
+  // CIR-LABEL: test_mm256_mask_i32scatter_ps
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv8.sf"
+
+  // LLVM-LABEL: @test_mm256_mask_i32scatter_ps
+  // LLVM: @llvm.x86.avx512.mask.scattersiv8.sf
+  return _mm256_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+
+void test_mm256_i32scatter_epi32(int *__addr, __m256i __index,  __m256i __v1) {
+  // CIR-LABEL: test_mm256_i32scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv8.si"
+
+  // LLVM-LABEL: @test_mm256_i32scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scattersiv8.si
+  return _mm256_i32scatter_epi32(__addr,__index,__v1,2); 
+}
+
+void test_mm256_mask_i32scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
+  // CIR-LABEL: test_mm256_mask_i32scatter_epi32
+  // CIR: cir.llvm.intrinsic "x86.avx512.mask.scattersiv8.si"
+
+  // LLVM-LABEL: @test_mm256_mask_i32scatter_epi32
+  // LLVM: @llvm.x86.avx512.mask.scattersiv8.si
+  return _mm256_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+
+__m256 test_mm256_insertf32x4(__m256 __A, __m128 __B) {
+  // CIR-LABEL: test_mm256_insertf32x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!cir.float x 8>
+
+  // LLVM-LABEL: @test_mm256_insertf32x4
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm256_insertf32x4(__A, __B, 1); 
+}
+
+__m256i test_mm256_inserti32x4(__m256i __A, __m128i __B) {
+  // CIR-LABEL: test_mm256_inserti32x4
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i] : !cir.vector<!s32i x 8> 
+
+  // LLVM-LABEL: @test_mm256_inserti32x4
+  // LLVM: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+  return _mm256_inserti32x4(__A, __B, 1); 
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx512vlbw-buiiltins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx512vlbw-buiiltins.c
new file mode 100644
index 0000000000000..dd7b9b2ac4e7f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx512vlbw-buiiltins.c
@@ -0,0 +1,221 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM-UNSIGNED-CHAR --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1-512 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+void test_mm_mask_storeu_epi16(void *__P, __mmask8 __U, __m128i __A) {
+  // CIR-LABEL: _mm_mask_storeu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s16i x 8>, !cir.ptr<!cir.vector<!s16i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>)
+
+  // LLVM-LABEL: @test_mm_mask_storeu_epi16
+  // LLVM: call void @llvm.masked.store.v8i16.p0(<8 x i16> %{{.*}}, ptr elementtype(<8 x i16>) align 1 %{{.*}}, <8 x i1> %{{.*}})
+  return _mm_mask_storeu_epi16(__P, __U, __A); 
+}
+
+void test_mm_mask_storeu_epi8(void *__P, __mmask16 __U, __m128i __A) {
+  // CIR-LABEL: _mm_mask_storeu_epi8
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<{{!s8i|!u8i}} x 16>, !cir.ptr<!cir.vector<{{!s8i|!u8i}} x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>)
+
+  // LLVM-LABEL: @test_mm_mask_storeu_epi8
+  // LLVM: call void @llvm.masked.store.v16i8.p0(<16 x i8> %{{.*}}, ptr elementtype(<16 x i8>) align 1 %{{.*}}, <16 x i1> %{{.*}})
+  return _mm_mask_storeu_epi8(__P, __U, __A); 
+}
+
+void test_mm256_mask_storeu_epi8(void *__P, __mmask32 __U, __m256i __A) {
+  // CIR-LABEL: _mm256_mask_storeu_epi8
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<{{!s8i|!u8i}} x 32>, !cir.ptr<!cir.vector<{{!s8i|!u8i}} x 32>>, !u32i, !cir.vector<!cir.int<s, 1> x 32>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_storeu_epi8
+  // LLVM: call void @llvm.masked.store.v32i8.p0(<32 x i8> %{{.*}}, ptr elementtype(<32 x i8>) align 1 %{{.*}}, <32 x i1> %{{.*}})
+  return _mm256_mask_storeu_epi8(__P, __U, __A); 
+}
+
+void test_mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A) {
+  // CIR-LABEL: _mm256_mask_storeu_pd
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.store" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!cir.double x 4>, !cir.ptr<!cir.vector<!cir.double x 4>>, !u32i, !cir.vector<!cir.int<s, 1> x 4>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_storeu_pd
+  // LLVM: call void @llvm.masked.store.v4f64.p0(<4 x double> %{{.*}}, ptr elementtype(<4 x double>) align 1 %{{.*}}, <4 x i1> %{{.*}})
+  return _mm256_mask_storeu_pd(__P, __U, __A); 
+}
+
+__m128i test_mm_movm_epi8(__mmask16 __A) {
+  // CIR-LABEL: _mm_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<!cir.int<s, 1> x 16>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 16> -> !cir.vector<{{!s8i|!u8i}} x 16>
+
+  // LLVM-LABEL: @test_mm_movm_epi8
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i8>
+  return _mm_movm_epi8(__A); 
+}
+
+__m256i test_mm256_movm_epi8(__mmask32 __A) {
+  // CIR-LABEL: _mm256_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u32i -> !cir.vector<!cir.int<s, 1> x 32>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 32> -> !cir.vector<{{!s8i|!u8i}} x 32>
+
+  // LLVM-LABEL: @test_mm256_movm_epi8
+  // LLVM: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i8>
+  return _mm256_movm_epi8(__A); 
+}
+
+__m512i test_mm512_movm_epi8(__mmask64 __A) {
+  // CIR-LABEL: _mm512_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u64i -> !cir.vector<!cir.int<s, 1> x 64>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 64> -> !cir.vector<{{!s8i|!u8i}} x 64>
+
+  // LLVM-LABEL: @test_mm512_movm_epi8
+  // LLVM:  %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM:  %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8>
+  return _mm512_movm_epi8(__A); 
+}
+
+__m128i test_mm_movm_epi16(__mmask8 __A) {
+  // CIR-LABEL: _mm_movm_epi16
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 8> -> !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_mm_movm_epi16
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i16>
+  return _mm_movm_epi16(__A); 
+}
+
+__m256i test_mm256_movm_epi16(__mmask16 __A) {
+  // CIR-LABEL: _mm256_movm_epi16
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<!cir.int<s, 1> x 16>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 16> -> !cir.vector<!s16i x 16>
+
+  // LLVM-LABEL: @test_mm256_movm_epi16
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i16>
+  return _mm256_movm_epi16(__A); 
+}
+
+__m128i test_mm_mask_loadu_epi8(__m128i __W, __mmask16 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_loadu_epi8
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<{{!s8i|!u8i}} x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<{{!s8i|!u8i}} x 16>) -> !cir.vector<{{!s8i|!u8i}} x 16>
+
+  // LLVM-LABEL: @test_mm_mask_loadu_epi8
+  // LLVM: @llvm.masked.load.v16i8.p0(ptr elementtype(<16 x i8>) align 1 %{{.*}}, <16 x i1> %{{.*}}, <16 x i8> %{{.*}})
+  return _mm_mask_loadu_epi8(__W, __U, __P); 
+}
+
+__m256i test_mm256_mask_loadu_epi8(__m256i __W, __mmask32 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_loadu_epi8
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<{{!s8i|!u8i}} x 32>>, !u32i, !cir.vector<!cir.int<s, 1> x 32>, !cir.vector<{{!s8i|!u8i}} x 32>) -> !cir.vector<{{!s8i|!u8i}} x 32>
+
+  // LLVM-LABEL: @test_mm256_mask_loadu_epi8
+  // LLVM: @llvm.masked.load.v32i8.p0(ptr elementtype(<32 x i8>) align 1 %{{.*}}, <32 x i1> %{{.*}}, <32 x i8> %{{.*}})
+  return _mm256_mask_loadu_epi8(__W, __U, __P); 
+}
+
+__m128i test_mm_mask_loadu_epi16(__m128i __W, __mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_mask_loadu_epi16
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_mm_mask_loadu_epi16
+  // LLVM: @llvm.masked.load.v8i16.p0(ptr elementtype(<8 x i16>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_mask_loadu_epi16(__W, __U, __P); 
+}
+
+__m128i test_mm_maskz_loadu_epi16(__mmask8 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_loadu_epi16
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 8>>, !u32i, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_mm_maskz_loadu_epi16
+  // LLVM: @llvm.masked.load.v8i16.p0(ptr elementtype(<8 x i16>) align 1 %{{.*}}, <8 x i1> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_maskz_loadu_epi16(__U, __P); 
+}
+
+__m128i test_mm_maskz_loadu_epi8(__mmask16 __U, void const *__P) {
+  // CIR-LABEL: _mm_maskz_loadu_epi8
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<{{!s8i|!u8i}} x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<{{!s8i|!u8i}} x 16>) -> !cir.vector<{{!s8i|!u8i}} x 16>
+
+  // LLVM-LABEL: @test_mm_maskz_loadu_epi8
+  // LLVM: @llvm.masked.load.v16i8.p0(ptr elementtype(<16 x i8>) align 1 %{{.*}}, <16 x i1> %{{.*}}, <16 x i8> %{{.*}})
+  return _mm_maskz_loadu_epi8(__U, __P); 
+}
+
+__m256i test_mm256_mask_loadu_epi16(__m256i __W, __mmask16 __U, void const *__P) {
+  // CIR-LABEL: _mm256_mask_loadu_epi16
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s16i x 16>) -> !cir.vector<!s16i x 16>
+
+  // LLVM-LABEL: @test_mm256_mask_loadu_epi16
+  // LLVM: @llvm.masked.load.v16i16.p0(ptr elementtype(<16 x i16>) align 1 %{{.*}}, <16 x i1> %{{.*}}, <16 x i16> %{{.*}})
+  return _mm256_mask_loadu_epi16(__W, __U, __P); 
+}
+
+__m256i test_mm256_maskz_loadu_epi16(__mmask16 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_loadu_epi16
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s16i x 16>) -> !cir.vector<!s16i x 16>
+
+  // LLVM-LABEL: @test_mm256_maskz_loadu_epi16
+  // LLVM: @llvm.masked.load.v16i16.p0(ptr elementtype(<16 x i16>) align 1 %{{.*}}, <16 x i1> %{{.*}}, <16 x i16> %{{.*}})
+  return _mm256_maskz_loadu_epi16(__U, __P); 
+}
+
+
+__m256i test_mm256_maskz_loadu_epi8(__mmask32 __U, void const *__P) {
+  // CIR-LABEL: _mm256_maskz_loadu_epi8
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<{{!s8i|!u8i}} x 32>>, !u32i, !cir.vector<!cir.int<s, 1> x 32>, !cir.vector<{{!s8i|!u8i}} x 32>) -> !cir.vector<{{!s8i|!u8i}} x 32>
+
+  // LLVM-LABEL: @test_mm256_maskz_loadu_epi8
+  // LLVM: @llvm.masked.load.v32i8.p0(ptr elementtype(<32 x i8>) align 1 %{{.*}}, <32 x i1> %{{.*}}, <32 x i8> %{{.*}})
+  return _mm256_maskz_loadu_epi8(__U, __P); 
+}
+
+__m512i test_mm512_maskz_load_epi32(__mmask16 __U, void const *__P) {
+  // CIR-LABEL: _mm512_maskz_load_epi32
+  // CIR: cir.llvm.intrinsic "masked.load" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s32i x 16>>, !u32i, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s32i x 16>) -> !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: test_mm512_maskz_load_epi32
+  // LLVM: @llvm.masked.load.v16i32.p0(ptr elementtype(<16 x i32>) align 64 %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_maskz_load_epi32(__U, __P); 
+}
+
+__mmask16 test_mm_movepi8_mask(__m128i __A) {
+  // CIR-LABEL: _mm_movepi8_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<{{!s8i|!u8i}} x 16>, !cir.vector<!cir.int<u, 1> x 16>
+
+  // LLVM-LABEL: @test_mm_movepi8_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer
+
+  // In the unsigned case below, the canonicalizer proves the comparison is
+  // always false (no i8 unsigned value can be < 0) and folds it away.
+  // LLVM-UNSIGNED-CHAR: store i16 0, ptr %{{.*}}, align 2
+
+  // OGCG-LABEL: @test_mm_movepi8_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer
+  return _mm_movepi8_mask(__A); 
+}
+
+__mmask16 test_mm256_movepi16_mask(__m256i __A) {
+  // CIR-LABEL: _mm256_movepi16_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s16i x 16>, !cir.vector<!cir.int<u, 1> x 16>
+
+  // LLVM-LABEL: @test_mm256_movepi16_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm256_movepi16_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer
+  return _mm256_movepi16_mask(__A); 
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx512vldq-builtins.c
new file mode 100644
index 0000000000000..349a50285a7a5
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx512vldq-builtins.c
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+__m128i test_mm_movm_epi32(__mmask8 __A) {
+  // CIR-LABEL: _mm_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<s, 1> x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.int<s, 1> x 4>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 4> -> !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_mm_movm_epi32
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i32>
+  return _mm_movm_epi32(__A); 
+}
+
+__m256i test_mm256_movm_epi32(__mmask8 __A) {
+  // CIR-LABEL: _mm256_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 8> -> !cir.vector<!s32i x 8>
+
+  // LLVM-LABEL: @test_mm256_movm_epi32
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i32>
+  return _mm256_movm_epi32(__A); 
+}
+
+__m512i test_mm512_movm_epi32(__mmask16 __A) {
+  // CIR-LABEL: _mm512_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<!cir.int<s, 1> x 16>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 16> -> !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: @test_mm512_movm_epi32
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32>
+  return _mm512_movm_epi32(__A); 
+}
+
+__m128i test_mm_movm_epi64(__mmask8 __A) {
+  // CIR-LABEL: _mm_movm_epi64
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<s, 1> x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.int<s, 1> x 2>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 2> -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_mm_movm_epi64
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // LLVM: %{{.*}} = sext <2 x i1> %{{.*}} to <2 x i64>
+  return _mm_movm_epi64(__A); 
+}
+
+__m256i test_mm256_movm_epi64(__mmask8 __A) {
+  // CIR-LABEL: _mm256_movm_epi64
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 1> x 8>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<s, 1> x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.int<s, 1> x 4>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 4> -> !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: @test_mm256_movm_epi64
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i64>
+  return _mm256_movm_epi64(__A); 
+}
+
+__m256d test_mm256_insertf64x2(__m256d __A, __m128d __B) {
+  // CIR-LABEL: test_mm256_insertf64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<!cir.double x 4>
+
+  // LLVM-LABEL: @test_mm256_insertf64x2
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  return _mm256_insertf64x2(__A, __B, 1); 
+}
+
+__m256i test_mm256_inserti64x2(__m256i __A, __m128i __B) {
+  // CIR-LABEL: test_mm256_inserti64x2
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i] : !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: @test_mm256_inserti64x2
+  // LLVM: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  return _mm256_inserti64x2(__A, __B, 1); 
+}
+
+__mmask8 test_mm256_movepi32_mask(__m256i __A) {
+  // LLVM-LABEL: @test_mm256_movepi32_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm256_movepi32_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
+  return _mm256_movepi32_mask(__A); 
+}
+
+__mmask8 test_mm_movepi64_mask(__m128i __A) {
+  // CIR-LABEL: _mm_movepi64_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 2>, !cir.vector<!cir.int<u, 1> x 2>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<u, 1> x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.int<u, 1> x 8>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!cir.int<u, 1> x 8> -> !u8i
+
+  // LLVM-LABEL: @test_mm_movepi64_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer
+  // LLVM: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+
+  // OGCG-LABEL: @test_mm_movepi64_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer
+  // OGCG: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  return _mm_movepi64_mask(__A); 
+}
+
+__mmask8 test_mm256_movepi64_mask(__m256i __A) {
+  // CIR-LABEL: _mm256_movepi64_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 4>, !cir.vector<!cir.int<u, 1> x 4>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<u, 1> x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.int<u, 1> x 8>
+
+  // LLVM-LABEL: @test_mm256_movepi64_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
+  // LLVM: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+
+  // OGCG-LABEL: @test_mm256_movepi64_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
+  // OGCG: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return _mm256_movepi64_mask(__A); 
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/avx512vlvbmi2-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/avx512vlvbmi2-builtins.c
new file mode 100644
index 0000000000000..6dc1e299c7f28
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/avx512vlvbmi2-builtins.c
@@ -0,0 +1,105 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512vbmi2 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512vbmi2 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m128i test_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const* __P) {
+  // CIR-LABEL: _mm_mask_expandloadu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 8>>, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_mm_mask_expandloadu_epi16
+  // LLVM: @llvm.masked.expandload.v8i16(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_mask_expandloadu_epi16(__S, __U, __P);
+}
+
+__m128i test_mm_maskz_expandloadu_epi16(__mmask8 __U, void const* __P) {
+  // CIR-LABEL: _mm_maskz_expandloadu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 8>>, !cir.vector<!cir.int<s, 1> x 8>, !cir.vector<!s16i x 8>) -> !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_mm_maskz_expandloadu_epi16
+  // LLVM: @llvm.masked.expandload.v8i16(ptr %{{.*}}, <8 x i1> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_maskz_expandloadu_epi16(__U, __P);
+}
+
+__m256i test_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const* __P) {
+  // CIR-LABEL: _mm256_mask_expandloadu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 16>>, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s16i x 16>) -> !cir.vector<!s16i x 16>
+
+  // LLVM-LABEL: @test_mm256_mask_expandloadu_epi16
+  // LLVM: @llvm.masked.expandload.v16i16(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i16> %{{.*}})
+  return _mm256_mask_expandloadu_epi16(__S, __U, __P);
+}
+
+__m256i test_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const* __P) {
+  // CIR-LABEL: _mm256_maskz_expandloadu_epi16
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s16i x 16>>, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s16i x 16>) -> !cir.vector<!s16i x 16>
+
+  // LLVM-LABEL: @test_mm256_maskz_expandloadu_epi16
+  // LLVM: @llvm.masked.expandload.v16i16(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i16> %{{.*}})
+return _mm256_maskz_expandloadu_epi16(__U, __P);
+}
+
+__m128i test_mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const* __P) {
+   // CIR-LABEL: _mm_mask_expandloadu_epi8
+   // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+   // LLVM-LABEL: @test_mm_mask_expandloadu_epi8
+   // LLVM: @llvm.masked.expandload.v16i8(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i8> %{{.*}})
+   return _mm_mask_expandloadu_epi8(__S, __U, __P);
+}
+
+__m128i test_mm_maskz_expandloadu_epi8(__mmask16 __U, void const* __P) {
+   // CIR-LABEL: _mm_maskz_expandloadu_epi8
+   // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!cir.int<s, 1> x 16>, !cir.vector<!s8i x 16>) -> !cir.vector<!s8i x 16>
+
+   // LLVM-LABEL: @test_mm_maskz_expandloadu_epi8
+   // LLVM: @llvm.masked.expandload.v16i8(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i8> %{{.*}})
+return _mm_maskz_expandloadu_epi8(__U, __P);
+}
+
+__m256i test_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const* __P) {
+  // CIR-LABEL: _mm256_mask_expandloadu_epi8
+  // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s8i x 32>>, !cir.vector<!cir.int<s, 1> x 32>, !cir.vector<!s8i x 32>) -> !cir.vector<!s8i x 32>
+
+  // LLVM-LABEL: @test_mm256_mask_expandloadu_epi8
+  // LLVM: @llvm.masked.expandload.v32i8(ptr %{{.*}}, <32 x i1> %{{.*}}, <32 x i8> %{{.*}})
+  return _mm256_mask_expandloadu_epi8(__S, __U, __P);
+}
+
+__m256i test_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const* __P) {
+   // CIR-LABEL: _mm256_maskz_expandloadu_epi8
+   // CIR: %{{.*}} = cir.llvm.intrinsic "masked.expandload" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.ptr<!cir.vector<!s8i x 32>>, !cir.vector<!cir.int<s, 1> x 32>, !cir.vector<!s8i x 32>) -> !cir.vector<!s8i x 32>
+
+   // LLVM-LABEL: @test_mm256_maskz_expandloadu_epi8
+   // LLVM: @llvm.masked.expandload.v32i8(ptr %{{.*}}, <32 x i1> %{{.*}}, <32 x i8> %{{.*}})
+   return _mm256_maskz_expandloadu_epi8(__U, __P);
+}
+
+void test_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) {
+  // CIR-LABEL: _mm256_mask_compressstoreu_epi16
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s16i x 16>, !cir.ptr<!cir.vector<!s16i x 16>>, !cir.vector<!cir.int<s, 1> x 16>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_compressstoreu_epi16
+  // LLVM: @llvm.masked.compressstore.v16i16(<16 x i16> %{{.*}}, ptr %{{.*}}, <16 x i1> %{{.*}})
+  _mm256_mask_compressstoreu_epi16(__P, __U, __D);
+}
+
+void test_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) {
+  // CIR-LABEL: _mm_mask_compressstoreu_epi8
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s8i x 16>, !cir.ptr<!cir.vector<!s8i x 16>>, !cir.vector<!cir.int<s, 1> x 16>) -> !void
+
+  // LLVM-LABEL: @test_mm_mask_compressstoreu_epi8
+  // LLVM: @llvm.masked.compressstore.v16i8(<16 x i8> %{{.*}}, ptr %{{.*}}, <16 x i1> %{{.*}})
+  _mm_mask_compressstoreu_epi8(__P, __U, __D);
+}
+
+void test_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) {
+  // CIR-LABEL: _mm256_mask_compressstoreu_epi8
+  // CIR: cir.llvm.intrinsic "masked.compressstore" %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<!s8i x 32>, !cir.ptr<!cir.vector<!s8i x 32>>, !cir.vector<!cir.int<s, 1> x 32>) -> !void
+
+  // LLVM-LABEL: @test_mm256_mask_compressstoreu_epi8
+  // LLVM: @llvm.masked.compressstore.v32i8(<32 x i8> %{{.*}}, ptr %{{.*}}, <32 x i1> %{{.*}})
+  _mm256_mask_compressstoreu_epi8(__P, __U, __D);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/bmi-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/bmi-builtins.c
new file mode 100644
index 0000000000000..6e5873bfd3397
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/bmi-builtins.c
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/bmi-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+unsigned short test__tzcnt_u16(unsigned short __X) {
+  // CIR-LABEL: __tzcnt_u16
+  // LLVM-LABEL: __tzcnt_u16
+  return __tzcnt_u16(__X);
+  // CIR: {{%.*}} = cir.llvm.intrinsic "cttz" {{%.*}} : (!u16i, !cir.bool) -> !u16i
+  // LLVM: i16 @llvm.cttz.i16(i16 %{{.*}}, i1 false)
+}
+
+unsigned int test__tzcnt_u32(unsigned int __X) {
+  // CIR-LABEL: __tzcnt_u32
+  // LLVM-LABEL: __tzcnt_u32
+  return __tzcnt_u32(__X);
+  // CIR: {{%.*}} = cir.llvm.intrinsic "cttz" {{%.*}} : (!u32i, !cir.bool) -> !u32i
+  // LLVM: i32 @llvm.cttz.i32(i32 %{{.*}}, i1 false)
+}
+
+#ifdef __x86_64__
+unsigned long long test__tzcnt_u64(unsigned long long __X) {
+  // CIR-LABEL: __tzcnt_u64
+  // LLVM-LABEL: __tzcnt_u64
+  return __tzcnt_u64(__X);
+  // CIR: {{%.*}} = cir.llvm.intrinsic "cttz" {{%.*}} : (!u64i, !cir.bool) -> !u64i
+  // LLVM: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false)
+}
+#endif
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/lzcnt-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/lzcnt-builtins.c
new file mode 100644
index 0000000000000..384be85f99167
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/lzcnt-builtins.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/lzcnt-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+unsigned int test_lzcnt_u32(unsigned int __X)
+{
+  // CIR-LABEL: _lzcnt_u32
+  // LLVM-LABEL: _lzcnt_u32
+  return _lzcnt_u32(__X);
+  // CIR: {{%.*}} = cir.llvm.intrinsic "ctlz" {{%.*}} : (!u32i, !cir.bool) -> !u32i
+  // LLVM: @llvm.ctlz.i32(i32 %{{.*}}, i1 false)
+}
+
+unsigned long long test__lzcnt_u64(unsigned long long __X)
+{
+  // CIR-LABEL: _lzcnt_u64
+  // LLVM-LABEL: _lzcnt_u64
+  return _lzcnt_u64(__X);
+  // CIR: {{%.*}} = cir.llvm.intrinsic "ctlz" {{%.*}} : (!u64i, !cir.bool) -> !u64i
+  // LLVM: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/mmx-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/mmx-builtins.c
new file mode 100644
index 0000000000000..3b75c00995b71
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/mmx-builtins.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR-CHECK --implicit-check-not=x86mmx --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR-CHECK --implicit-check-not=x86mmx --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM-CHECK --implicit-check-not=x86mmx --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM-CHECK --implicit-check-not=x86mmx --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/mmx-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+int test_mm_extract_pi16(__m64 a) {
+
+  // CIR-CHECK-LABEL: test_mm_extract_pi16
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : !u64i : !cir.vector<!s16i x 4>
+
+  // LLVM-CHECK-LABEL: test_mm_extract_pi16
+  // LLVM-CHECK: extractelement <4 x i16> %{{.*}}, i64 2
+  return _mm_extract_pi16(a, 2);
+}
+
+__m64 test_mm_insert_pi16(__m64 a, int d) {
+
+  // CIR-CHECK-LABEL: test_mm_insert_pi16
+  // CIR-CHECK-LABEL: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[{{%.*}} : !u64i] : !cir.vector<!s16i x 4>
+
+  // LLVM-CHECK-LABEL: test_mm_insert_pi16
+  // LLVM-CHECK: insertelement <4 x i16>
+  return _mm_insert_pi16(a, d, 2);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/palignr.c b/clang/test/CIR/Incubator/CodeGen/X86/palignr.c
new file mode 100644
index 0000000000000..4855534622665
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/palignr.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=CIR
+
+// RUN: %clang_cc1 %s -triple=x86_64-unknown-linux -target-feature +ssse3 -emit-llvm -o %t_og.ll
+// RUN: FileCheck --input-file=%t_og.ll %s --check-prefix=OGCG
+
+#define _mm_alignr_epi8(a, b, n) (__builtin_ia32_palignr128((a), (b), (n)))
+typedef __attribute__((vector_size(16))) int int4;
+
+// CIR-LABEL: @align1
+// CIR: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 15, i32 16, i32 17
+// OGCG-LABEL: @align1
+// OGCG: %palignr = shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 15, i32 16, i32 17
+int4 align1(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 15); }
+
+// CIR-LABEL: @align2
+// CIR: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 16, i32 17, i32 18
+// OGCG-LABEL: @align2
+// OGCG: %palignr = shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 16, i32 17, i32 18
+int4 align2(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 16); }
+
+// CIR-LABEL: @align3
+// CIR: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3
+// OGCG-LABEL: @align3  
+// OGCG: %palignr = shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3
+int4 align3(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 17); }
+
+// CIR-LABEL: @align4
+// CIR: store <4 x i32> zeroinitializer, ptr %{{.*}}, align 16
+// OGCG-LABEL: @align4
+// OGCG: ret <4 x i32> zeroinitializer
+int4 align4(int4 a, int4 b) { return _mm_alignr_epi8(a, b, 32); }
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/pause.c b/clang/test/CIR/Incubator/CodeGen/X86/pause.c
new file mode 100644
index 0000000000000..67a0d6770c517
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/pause.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -target-feature +sse2 -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -target-feature +sse2 -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -target-feature -sse2 -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -target-feature -sse2 -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/pause.c, which eventually
+// CIR shall be able to support fully.
+
+#include <x86intrin.h>
+
+void test_mm_pause(void) {
+  // CIR-LABEL: test_mm_pause
+  // LLVM-LABEL: test_mm_pause
+  _mm_pause();
+  // CIR: {{%.*}} = cir.llvm.intrinsic "x86.sse2.pause" : () -> !void
+  // LLVM: call void @llvm.x86.sse2.pause()
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/rd-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/rd-builtins.c
new file mode 100644
index 0000000000000..2c279ceba24cb
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/rd-builtins.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -ffreestanding  -triple x86_64-unknown-linux -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/rd-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <x86intrin.h>
+
+int test_rdtsc(void) {
+  // CIR-LABEL: @test_rdtsc
+  // LLVM-LABEL: @test_rdtsc
+  return __rdtsc();
+  // CIR: {{%.*}} = cir.llvm.intrinsic "x86.rdtsc"  : () -> !u64i
+  // LLVM: call i64 @llvm.x86.rdtsc
+}
+
+unsigned long long test_rdtscp(unsigned int *a) {
+
+  return __rdtscp(a);
+
+  // CIR-LABEL: @__rdtscp
+  // CIR: [[RDTSCP:%.*]] = cir.llvm.intrinsic "x86.rdtscp"  : () -> !rec_anon_struct
+  // CIR: [[TSC_AUX:%.*]] = cir.extract_member [[RDTSCP]][1] : !rec_anon_struct -> !u32i
+  // CIR: cir.store [[TSC_AUX]], %{{.*}} : !u32i, !cir.ptr<!u32i>
+  // CIR: {{%.*}} = cir.extract_member [[RDTSCP]][0] : !rec_anon_struct -> !u64i
+
+  // LLVM: @test_rdtscp
+  // LLVM: [[RDTSCP:%.*]] = call { i64, i32 } @llvm.x86.rdtscp
+  // LLVM: [[TSC_AUX:%.*]] = extractvalue { i64, i32 } [[RDTSCP]], 1
+  // LLVM: store i32 [[TSC_AUX]], ptr %{{.*}}
+  // LLVM: [[TSC:%.*]] = extractvalue { i64, i32 } [[RDTSCP]], 0
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/sse-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/sse-builtins.c
new file mode 100644
index 0000000000000..b44065036438a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/sse-builtins.c
@@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+// This test mimics clang/test/CodeGen/X86/sse-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+
+void test_mm_prefetch(char const* p) {
+  // CIR-LABEL: test_mm_prefetch
+  // LLVM-LABEL: test_mm_prefetch
+  _mm_prefetch(p, 0);
+  // CIR: cir.prefetch(%{{.*}} : !cir.ptr<!void>) locality(0) read
+  // LLVM: call void @llvm.prefetch.p0(ptr {{.*}}, i32 0, i32 0, i32 1)
+}
+
+void test_mm_sfence(void) {
+  // CIR-LABEL: test_mm_sfence
+  // LLVM-LABEL: test_mm_sfence
+  _mm_sfence();
+  // CIR: {{%.*}} = cir.llvm.intrinsic "x86.sse.sfence" : () -> !void
+  // LLVM: call void @llvm.x86.sse.sfence()
+}
+
+__m128 test_mm_undefined_ps(void) {
+  // CIR-LABEL: _mm_undefined_ps
+  // CIR: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 2>
+  // CIR: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 2> -> !cir.vector<!cir.float x 4>
+  // CIR: cir.return %{{.*}} : !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: test_mm_undefined_ps
+  // LLVM: store <4 x float> zeroinitializer, ptr %[[A:.*]], align 16
+  // LLVM: %{{.*}} = load <4 x float>, ptr %[[A]], align 16
+  // LLVM: ret <4 x float> %{{.*}}
+  return _mm_undefined_ps();
+}
+
+void test_mm_setcsr(unsigned int A) {
+  // CIR-LABEL: test_mm_setcsr
+  // CIR: cir.store {{.*}}, {{.*}} : !u32i
+  // CIR: cir.llvm.intrinsic "x86.sse.ldmxcsr" {{.*}} : (!cir.ptr<!u32i>) -> !void
+
+  // LLVM-LABEL: test_mm_setcsr 
+  // LLVM: store i32
+  // LLVM: call void @llvm.x86.sse.ldmxcsr(ptr {{.*}})
+  _mm_setcsr(A);
+}
+
+unsigned int test_mm_getcsr(void) {
+  // CIR-LABEL: test_mm_getcsr
+  // CIR: cir.llvm.intrinsic "x86.sse.stmxcsr" %{{.*}} : (!cir.ptr<!u32i>) -> !void
+  // CIR: cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+
+  // LLVM-LABEL: test_mm_getcsr
+  // LLVM: call void @llvm.x86.sse.stmxcsr(ptr %{{.*}})
+  // LLVM: load i32
+  return _mm_getcsr();
+}
+
+__m128 test_mm_shuffle_ps(__m128 A, __m128 B) {
+  // CIR-LABEL: _mm_shuffle_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i] : !cir.vector<!cir.float x 4>
+
+  // CHECK-LABEL: test_mm_shuffle_ps
+  // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
+
+  // OGCG-LABEL: test_mm_shuffle_ps
+  // OGCG: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
+  return _mm_shuffle_ps(A, B, 0);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/sse2-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/sse2-builtins.c
new file mode 100644
index 0000000000000..a05ee633a7c2a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/sse2-builtins.c
@@ -0,0 +1,119 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR-CHECK,CIR-X64 --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+// This test mimics clang/test/CodeGen/X86/sse2-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+void test_mm_clflush(void* A) {
+  // CIR-LABEL: test_mm_clflush
+  // LLVM-LABEL: test_mm_clflush
+  _mm_clflush(A);
+  // CIR-CHECK: {{%.*}} = cir.llvm.intrinsic "x86.sse2.clflush" {{%.*}} : (!cir.ptr<!void>) -> !void
+  // LLVM-CHECK: call void @llvm.x86.sse2.clflush(ptr {{%.*}})
+}
+
+__m128d test_mm_undefined_pd(void) {
+  // CIR-X64-LABEL: _mm_undefined_pd
+  // CIR-X64: %{{.*}} = cir.const #cir.zero : !cir.vector<!cir.double x 2>
+  // CIR-X64: cir.return %{{.*}} : !cir.vector<!cir.double x 2>
+
+  // LLVM-X64-LABEL: test_mm_undefined_pd
+  // LLVM-X64: store <2 x double> zeroinitializer, ptr %[[A:.*]], align 16
+  // LLVM-X64: %{{.*}} = load <2 x double>, ptr %[[A]], align 16
+  // LLVM-X64: ret <2 x double> %{{.*}}
+  return _mm_undefined_pd();
+}
+
+__m128i test_mm_undefined_si128(void) {
+  // CIR-LABEL: _mm_undefined_si128
+  // CIR-CHECK: %[[A:.*]] = cir.const #cir.zero : !cir.vector<!cir.double x 2>
+  // CIR-CHECK: %{{.*}} = cir.cast bitcast %[[A]] : !cir.vector<!cir.double x 2> -> !cir.vector<!s64i x 2>
+  // CIR-CHECK: cir.return %{{.*}} : !cir.vector<!s64i x 2>
+
+  // LLVM-CHECK-LABEL: test_mm_undefined_si128
+  // LLVM-CHECK: store <2 x i64> zeroinitializer, ptr %[[A:.*]], align 16
+  // LLVM-CHECK: %{{.*}} = load <2 x i64>, ptr %[[A]], align 16
+  // LLVM-CHECK: ret <2 x i64> %{{.*}}
+  return _mm_undefined_si128();
+}
+
+// Lowering to pextrw requires optimization.
+int test_mm_extract_epi16(__m128i A) {
+    
+  // CIR-CHECK-LABEL: test_mm_extract_epi16
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s16i x 8>
+  // CIR-CHECK %{{.*}} = cir.cast integral %{{.*}} : !u16i -> !s32i
+
+  // LLVM-CHECK-LABEL: test_mm_extract_epi16
+  // LLVM-CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1
+  // LLVM-CHECK: zext i16 %{{.*}} to i32
+  return _mm_extract_epi16(A, 1);
+}
+
+void test_mm_lfence(void) {
+  // CIR-CHECK-LABEL: test_mm_lfence
+  // LLVM-CHECK-LABEL: test_mm_lfence
+  _mm_lfence();
+  // CIR-CHECK: {{%.*}} = cir.llvm.intrinsic "x86.sse2.lfence" : () -> !void
+  // LLVM-CHECK: call void @llvm.x86.sse2.lfence()
+}
+
+void test_mm_mfence(void) {
+  // CIR-CHECK-LABEL: test_mm_mfence
+  // LLVM-CHECK-LABEL: test_mm_mfence
+  _mm_mfence();
+  // CIR-CHECK: {{%.*}} = cir.llvm.intrinsic "x86.sse2.mfence" : () -> !void
+  // LLVM-CHECK: call void @llvm.x86.sse2.mfence()
+}
+
+__m128i test_mm_shufflelo_epi16(__m128i A) {
+  // CIR-LABEL: _mm_shufflelo_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 8>) [#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: test_mm_shufflelo_epi16
+  // LLVM: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
+
+  // OGCG-LABEL: test_mm_shufflelo_epi16
+  // OGCG: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
+  return _mm_shufflelo_epi16(A, 0);
+}
+
+__m128i test_mm_shufflehi_epi16(__m128i A) {
+  // CIR-LABEL: _mm_shufflehi_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i] : !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: test_mm_shufflehi_epi16
+  // LLVM: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+
+  // OGCG-LABEL: test_mm_shufflehi_epi16
+  // OGCG: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  return _mm_shufflehi_epi16(A, 0);
+}
+
+__m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
+  // CIR-LABEL: test_mm_shuffle_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 2>) [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!cir.double x 2>
+
+  // CHECK-LABEL: test_mm_shuffle_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
+
+  // OGCG-LABEL: test_mm_shuffle_pd
+  // OGCG: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
+  return _mm_shuffle_pd(A, B, 1);
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/sse3-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/sse3-builtins.c
new file mode 100644
index 0000000000000..c4269fae4960f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/sse3-builtins.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +ssse3 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+__m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
+  // CIR-LABEL: _mm_alignr_epi8
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 16>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i] : !cir.vector<!s8i x 16>
+
+  // LLVM-LABEL: test_mm_alignr_epi8
+  // LLVM: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+
+  // OGCG-LABEL: test_mm_alignr_epi8
+  // OGCG: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  return _mm_alignr_epi8(a, b, 2);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/sse41-builtins.c b/clang/test/CIR/Incubator/CodeGen/X86/sse41-builtins.c
new file mode 100644
index 0000000000000..20f78508a2a5a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/sse41-builtins.c
@@ -0,0 +1,125 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR-CHECK --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR-CHECK --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM-CHECK --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM-CHECK --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+// This test mimics clang/test/CodeGen/X86/sse41-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+int test_mm_extract_epi8(__m128i x) {
+  // CIR-CHECK-LABEL: test_mm_extract_epi8
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s8i x 16>
+  // CIR-CHECK %{{.*}} = cir.cast integral %{{.*}} : !u8i -> !s32i
+
+  // LLVM-CHECK-LABEL: test_mm_extract_epi8
+  // LLVM-CHECK: extractelement <16 x i8> %{{.*}}, {{i32|i64}} 1
+  // LLVM-CHECK: zext i8 %{{.*}} to i32
+  return _mm_extract_epi8(x, 1);
+}
+
+int test_mm_extract_epi32(__m128i x) {
+  // CIR-CHECK-LABEL: test_mm_extract_epi32
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s32i x 4>
+
+  // LLVM-CHECK-LABEL: test_mm_extract_epi32
+  // LLVM-CHECK: extractelement <4 x i32> %{{.*}}, {{i32|i64}} 1
+  return _mm_extract_epi32(x, 1);
+}
+
+long long test_mm_extract_epi64(__m128i x) {
+  // CIR-CHECK-LABEL: test_mm_extract_epi64
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!s64i x 2>
+
+  // LLVM-CHECK-LABEL: test_mm_extract_epi64
+  // LLVM-CHECK: extractelement <2 x i64> %{{.*}}, {{i32|i64}} 1
+  return _mm_extract_epi64(x, 1);
+}
+
+int test_mm_extract_ps(__m128 x) {
+  // CIR-CHECK-LABEL: test_mm_extract_ps
+  // CIR-CHECK %{{.*}} = cir.vec.extract %{{.*}}[%{{.*}} : {{!u32i|!u64i}}] : !cir.vector<!cir.float x 4>
+
+  // LLVM-CHECK-LABEL: test_mm_extract_ps
+  // LLVM-CHECK: extractelement <4 x float> %{{.*}}, {{i32|i64}} 1
+  return _mm_extract_ps(x, 1);
+}
+
+__m128i test_mm_insert_epi8(__m128i x, char b) {
+
+  // CIR-CHECK-LABEL: test_mm_insert_epi8
+  // CIR-CHECK-LABEL: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[{{%.*}} : {{!u32i|!u64i}}] : !cir.vector<{{!s8i|!u8i}} x 16>
+
+  // LLVM-CHECK-LABEL: test_mm_insert_epi8 
+  // LLVM-CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 1
+  return _mm_insert_epi8(x, b, 1);
+}
+
+__m128i test_mm_insert_epi32(__m128i x, int b) {
+
+  // CIR-CHECK-LABEL: test_mm_insert_epi32
+  // CIR-CHECK-LABEL: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[{{%.*}} : {{!u32i|!u64i}}] : !cir.vector<!s32i x 4>
+
+  // LLVM-CHECK-LABEL: test_mm_insert_epi32
+  // LLVM-CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 1
+  return _mm_insert_epi32(x, b, 1);
+}
+
+#ifdef __x86_64__
+__m128i test_mm_insert_epi64(__m128i x, long long b) {
+
+  // CIR-X64-LABEL: test_mm_insert_epi64
+  // CIR-X64: {{%.*}} = cir.vec.insert {{%.*}}, {{%.*}}[{{%.*}} : {{!u32i|!u64i}}] : !cir.vector<!s64i x 2>
+
+  // LLVM-X64-LABEL: test_mm_insert_epi64
+  // LLVM-X64: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 1
+  return _mm_insert_epi64(x, b, 1);
+}
+#endif
+
+__m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
+  // CIR-LABEL: test_mm_blend_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s16i x 8>) [#cir.int<0> : !s32i, #cir.int<9> : !s32i, #cir.int<2> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: test_mm_blend_epi16
+  // LLVM: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
+
+  // OGCG-LABEL: test_mm_blend_epi16
+  // OGCG: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
+  return _mm_blend_epi16(V1, V2, 42);
+}
+
+__m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
+  // CIR-LABEL: test_mm_blend_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s64i x 2>) [#cir.int<0> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: test_mm_blend_pd
+  // LLVM: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
+
+  // OGCG-LABEL: test_mm_blend_pd
+  // OGCG: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_blend_pd(V1, V2, 2);
+}
+
+__m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
+  // CIR-LABEL: test_mm_blend_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: test_mm_blend_ps
+  // LLVM: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+
+  // OGCG-LABEL: test_mm_blend_ps
+  // OGCG: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  return _mm_blend_ps(V1, V2, 6);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/X86/x86_64-xsave.c b/clang/test/CIR/Incubator/CodeGen/X86/x86_64-xsave.c
new file mode 100644
index 0000000000000..86a6c3bdce932
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/X86/x86_64-xsave.c
@@ -0,0 +1,339 @@
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-linux -target-feature +xsave -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XSAVE --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-linux -target-feature +xsave -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XSAVE --input-file=%t.ll %s
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-linux -target-feature +xsave -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XSAVE --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-linux -target-feature +xsave -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XSAVE --input-file=%t.ll %s
+
+// RUN: %clang_cc1 %s -DTEST_XGETBV -O0 -triple=x86_64-unknown-linux -target-feature +xsave -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XGETBV --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XGETBV -O0 -triple=x86_64-unknown-linux -target-feature +xsave -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XGETBV --input-file=%t.ll %s
+// RUN: %clang_cc1 %s -DTEST_XSETBV -O0 -triple=x86_64-unknown-linux -target-feature +xsave -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XSETBV --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XSETBV -O0 -triple=x86_64-unknown-linux -target-feature +xsave -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XSETBV --input-file=%t.ll %s
+
+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsaveopt -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XSAVEOPT --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsaveopt -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XSAVEOPT --input-file=%t.ll %s
+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XSAVEOPT --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XSAVEOPT --input-file=%t.ll %s
+
+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsavec -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XSAVEC --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsavec -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XSAVEC --input-file=%t.ll %s
+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsavec -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XSAVEC --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsavec -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XSAVEC --input-file=%t.ll %s
+
+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsaves -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XSAVES --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsaves -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XSAVES --input-file=%t.ll %s
+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsaves -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=CIR-XSAVES --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-linux -target-feature +xsave -target-feature +xsaves -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Wno-unused-but-set-variable -Werror
+// RUN: FileCheck --check-prefix=LLVM-XSAVES --input-file=%t.ll %s
+
+// This test mimics clang/test/CodeGen/X86/x86_64-xsave.c, which eventually
+// CIR shall be able to support fully.
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+#include <x86intrin.h>
+
+
+void test(void) {
+  unsigned long long tmp_ULLi;
+  unsigned int       tmp_Ui;
+  void*              tmp_vp;
+  tmp_ULLi = 0; tmp_Ui = 0; tmp_vp = 0;
+
+#ifdef TEST_XSAVE
+// CIR-XSAVE: [[tmp_vp_1:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVE: [[tmp_ULLi_1:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVE: [[high64_1:%.*]] = cir.shift(right, [[tmp_ULLi_1]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVE: [[high32_1:%.*]] = cir.cast integral [[high64_1]] : !u64i -> !s32i
+// CIR-XSAVE: [[low32_1:%.*]] = cir.cast integral [[tmp_ULLi_1]] : !u64i -> !s32i
+// CIR-XSAVE: %{{.*}} = cir.llvm.intrinsic "x86.xsave" [[tmp_vp_1]], [[high32_1]], [[low32_1]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVE: [[tmp_vp_1:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVE: [[tmp_ULLi_1:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVE: [[high64_1:%.*]] = lshr i64 [[tmp_ULLi_1]], 32
+// LLVM-XSAVE: [[high32_1:%.*]] = trunc i64 [[high64_1]] to i32
+// LLVM-XSAVE: [[low32_1:%.*]] = trunc i64 [[tmp_ULLi_1]] to i32
+// LLVM-XSAVE: call void @llvm.x86.xsave(ptr [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi);
+
+
+// CIR-XSAVE: [[tmp_vp_2:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVE: [[tmp_ULLi_2:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVE: [[high64_2:%.*]] = cir.shift(right, [[tmp_ULLi_2]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVE: [[high32_2:%.*]] = cir.cast integral [[high64_2]] : !u64i -> !s32i
+// CIR-XSAVE: [[low32_2:%.*]] = cir.cast integral [[tmp_ULLi_2]] : !u64i -> !s32i
+// CIR-XSAVE: %{{.*}} = cir.llvm.intrinsic "x86.xsave64" [[tmp_vp_2]], [[high32_2]], [[low32_2]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVE: [[tmp_vp_2:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVE: [[tmp_ULLi_2:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVE: [[high64_2:%.*]] = lshr i64 [[tmp_ULLi_2]], 32
+// LLVM-XSAVE: [[high32_2:%.*]] = trunc i64 [[high64_2]] to i32
+// LLVM-XSAVE: [[low32_2:%.*]] = trunc i64 [[tmp_ULLi_2]] to i32
+// LLVM-XSAVE: call void @llvm.x86.xsave64(ptr [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
+  (void)__builtin_ia32_xsave64(tmp_vp, tmp_ULLi);
+
+
+// CIR-XSAVE: [[tmp_vp_3:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVE: [[tmp_ULLi_3:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVE: [[high64_3:%.*]] = cir.shift(right, [[tmp_ULLi_3]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVE: [[high32_3:%.*]] = cir.cast integral [[high64_3]] : !u64i -> !s32i
+// CIR-XSAVE: [[low32_3:%.*]] = cir.cast integral [[tmp_ULLi_3]] : !u64i -> !s32i
+// CIR-XSAVE: %{{.*}} = cir.llvm.intrinsic "x86.xrstor" [[tmp_vp_3]], [[high32_3]], [[low32_3]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVE: [[tmp_vp_3:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVE: [[tmp_ULLi_3:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVE: [[high64_3:%.*]] = lshr i64 [[tmp_ULLi_3]], 32
+// LLVM-XSAVE: [[high32_3:%.*]] = trunc i64 [[high64_3]] to i32
+// LLVM-XSAVE: [[low32_3:%.*]] = trunc i64 [[tmp_ULLi_3]] to i32
+// LLVM-XSAVE: call void @llvm.x86.xrstor(ptr [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
+  (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);
+
+
+// CIR-XSAVE: [[tmp_vp_4:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVE: [[tmp_ULLi_4:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVE: [[high64_4:%.*]] = cir.shift(right, [[tmp_ULLi_4]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVE: [[high32_4:%.*]] = cir.cast integral [[high64_4]] : !u64i -> !s32i
+// CIR-XSAVE: [[low32_4:%.*]] = cir.cast integral [[tmp_ULLi_4]] : !u64i -> !s32i
+// CIR-XSAVE: %{{.*}} = cir.llvm.intrinsic "x86.xrstor64" [[tmp_vp_4]], [[high32_4]], [[low32_4]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVE: [[tmp_vp_4:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVE: [[tmp_ULLi_4:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVE: [[high64_4:%.*]] = lshr i64 [[tmp_ULLi_4]], 32
+// LLVM-XSAVE: [[high32_4:%.*]] = trunc i64 [[high64_4]] to i32
+// LLVM-XSAVE: [[low32_4:%.*]] = trunc i64 [[tmp_ULLi_4]] to i32
+// LLVM-XSAVE: call void @llvm.x86.xrstor64(ptr [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])
+  (void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi);
+  
+  
+// CIR-XSAVE: {{%.*}} = cir.llvm.intrinsic "x86.xsave" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+// LLVM-XSAVE: call void @llvm.x86.xsave 
+  (void)_xsave(tmp_vp, tmp_ULLi);
+
+// CIR-XSAVE: {{%.*}} = cir.llvm.intrinsic "x86.xsave64" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+// LLVM-XSAVE: call void @llvm.x86.xsave64
+  (void)_xsave64(tmp_vp, tmp_ULLi);
+
+// CIR-XSAVE: {{%.*}} = cir.llvm.intrinsic "x86.xrstor" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+// LLVM-XSAVE: call void @llvm.x86.xrstor
+  (void)_xrstor(tmp_vp, tmp_ULLi);
+
+// CIR-XSAVE: {{%.*}} = cir.llvm.intrinsic "x86.xrstor64" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+// LLVM-XSAVE: call void @llvm.x86.xrstor64
+  (void)_xrstor64(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XSAVEOPT
+// CIR-XSAVEOPT: [[tmp_vp_1:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVEOPT: [[tmp_ULLi_1:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVEOPT: [[high64_1:%.*]] = cir.shift(right, [[tmp_ULLi_1]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVEOPT: [[high32_1:%.*]] = cir.cast integral [[high64_1]] : !u64i -> !s32i
+// CIR-XSAVEOPT: [[low32_1:%.*]] = cir.cast integral [[tmp_ULLi_1]] : !u64i -> !s32i
+// CIR-XSAVEOPT: %{{.*}} = cir.llvm.intrinsic "x86.xsaveopt" [[tmp_vp_1]], [[high32_1]], [[low32_1]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVEOPT: [[tmp_vp_1:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVEOPT: [[tmp_ULLi_1:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVEOPT: [[high64_1:%.*]] = lshr i64 [[tmp_ULLi_1]], 32
+// LLVM-XSAVEOPT: [[high32_1:%.*]] = trunc i64 [[high64_1]] to i32
+// LLVM-XSAVEOPT: [[low32_1:%.*]] = trunc i64 [[tmp_ULLi_1]] to i32
+// LLVM-XSAVEOPT: call void @llvm.x86.xsaveopt(ptr [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);
+
+// CIR-XSAVEOPT: [[tmp_vp_2:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVEOPT: [[tmp_ULLi_2:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVEOPT: [[high64_2:%.*]] = cir.shift(right, [[tmp_ULLi_2]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVEOPT: [[high32_2:%.*]] = cir.cast integral [[high64_2]] : !u64i -> !s32i
+// CIR-XSAVEOPT: [[low32_2:%.*]] = cir.cast integral [[tmp_ULLi_2]] : !u64i -> !s32i
+// CIR-XSAVEOPT: %{{.*}} = cir.llvm.intrinsic "x86.xsaveopt64" [[tmp_vp_2]], [[high32_2]], [[low32_2]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVEOPT: [[tmp_vp_2:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVEOPT: [[tmp_ULLi_2:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVEOPT: [[high64_2:%.*]] = lshr i64 [[tmp_ULLi_2]], 32
+// LLVM-XSAVEOPT: [[high32_2:%.*]] = trunc i64 [[high64_2]] to i32
+// LLVM-XSAVEOPT: [[low32_2:%.*]] = trunc i64 [[tmp_ULLi_2]] to i32
+// LLVM-XSAVEOPT: call void @llvm.x86.xsaveopt64(ptr [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
+  (void)__builtin_ia32_xsaveopt64(tmp_vp, tmp_ULLi);
+
+// CIR-XSAVEOPT: {{%.*}} = cir.llvm.intrinsic "x86.xsaveopt" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void 
+// LLVM-XSAVEOPT: call void @llvm.x86.xsaveopt
+  (void)_xsaveopt(tmp_vp, tmp_ULLi);
+  
+// CIR-XSAVEOPT: {{%.*}} = cir.llvm.intrinsic "x86.xsaveopt64" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void 
+// LLVM-XSAVEOPT: call void @llvm.x86.xsaveopt64
+  (void)_xsaveopt64(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XSAVEC
+// CIR-XSAVEC: [[tmp_vp_1:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVEC: [[tmp_ULLi_1:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVEC: [[high64_1:%.*]] = cir.shift(right, [[tmp_ULLi_1]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVEC: [[high32_1:%.*]] = cir.cast integral [[high64_1]] : !u64i -> !s32i
+// CIR-XSAVEC: [[low32_1:%.*]] = cir.cast integral [[tmp_ULLi_1]] : !u64i -> !s32i
+// CIR-XSAVEC: %{{.*}} = cir.llvm.intrinsic "x86.xsavec" [[tmp_vp_1]], [[high32_1]], [[low32_1]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVEC: [[tmp_vp_1:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVEC: [[tmp_ULLi_1:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVEC: [[high64_1:%.*]] = lshr i64 [[tmp_ULLi_1]], 32
+// LLVM-XSAVEC: [[high32_1:%.*]] = trunc i64 [[high64_1]] to i32
+// LLVM-XSAVEC: [[low32_1:%.*]] = trunc i64 [[tmp_ULLi_1]] to i32
+// LLVM-XSAVEC: call void @llvm.x86.xsavec(ptr [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi);
+
+
+// CIR-XSAVEC: [[tmp_vp_2:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVEC: [[tmp_ULLi_2:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVEC: [[high64_2:%.*]] = cir.shift(right, [[tmp_ULLi_2]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVEC: [[high32_2:%.*]] = cir.cast integral [[high64_2]] : !u64i -> !s32i
+// CIR-XSAVEC: [[low32_2:%.*]] = cir.cast integral [[tmp_ULLi_2]] : !u64i -> !s32i
+// CIR-XSAVEC: %{{.*}} = cir.llvm.intrinsic "x86.xsavec64" [[tmp_vp_2]], [[high32_2]], [[low32_2]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVEC: [[tmp_vp_2:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVEC: [[tmp_ULLi_2:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVEC: [[high64_2:%.*]] = lshr i64 [[tmp_ULLi_2]], 32
+// LLVM-XSAVEC: [[high32_2:%.*]] = trunc i64 [[high64_2]] to i32
+// LLVM-XSAVEC: [[low32_2:%.*]] = trunc i64 [[tmp_ULLi_2]] to i32
+// LLVM-XSAVEC: call void @llvm.x86.xsavec64(ptr [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
+  (void)__builtin_ia32_xsavec64(tmp_vp, tmp_ULLi);
+  
+// CIR-XSAVEC: {{%.*}} = cir.llvm.intrinsic "x86.xsavec" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void 
+// LLVM-XSAVEC: call void @llvm.x86.xsavec
+  (void)_xsavec(tmp_vp, tmp_ULLi);
+  
+// CIR-XSAVEC: {{%.*}} = cir.llvm.intrinsic "x86.xsavec64" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void 
+// LLVM-XSAVEC: call void @llvm.x86.xsavec64
+  (void)_xsavec64(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XSAVES
+// CIR-XSAVES: [[tmp_vp_1:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVES: [[tmp_ULLi_1:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVES: [[high64_1:%.*]] = cir.shift(right, [[tmp_ULLi_1]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVES: [[high32_1:%.*]] = cir.cast integral [[high64_1]] : !u64i -> !s32i
+// CIR-XSAVES: [[low32_1:%.*]] = cir.cast integral [[tmp_ULLi_1]] : !u64i -> !s32i
+// CIR-XSAVES: %{{.*}} = cir.llvm.intrinsic "x86.xsaves" [[tmp_vp_1]], [[high32_1]], [[low32_1]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVES: [[tmp_vp_1:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVES: [[tmp_ULLi_1:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVES: [[high64_1:%.*]] = lshr i64 [[tmp_ULLi_1]], 32
+// LLVM-XSAVES: [[high32_1:%.*]] = trunc i64 [[high64_1]] to i32
+// LLVM-XSAVES: [[low32_1:%.*]] = trunc i64 [[tmp_ULLi_1]] to i32
+// LLVM-XSAVES: call void @llvm.x86.xsaves(ptr [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsaves(tmp_vp, tmp_ULLi);
+
+
+// CIR-XSAVES: [[tmp_vp_2:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVES: [[tmp_ULLi_2:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVES: [[high64_2:%.*]] = cir.shift(right, [[tmp_ULLi_2]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVES: [[high32_2:%.*]] = cir.cast integral [[high64_2]] : !u64i -> !s32i
+// CIR-XSAVES: [[low32_2:%.*]] = cir.cast integral [[tmp_ULLi_2]] : !u64i -> !s32i
+// CIR-XSAVES: %{{.*}} = cir.llvm.intrinsic "x86.xsaves64" [[tmp_vp_2]], [[high32_2]], [[low32_2]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVES: [[tmp_vp_2:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVES: [[tmp_ULLi_2:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVES: [[high64_2:%.*]] = lshr i64 [[tmp_ULLi_2]], 32
+// LLVM-XSAVES: [[high32_2:%.*]] = trunc i64 [[high64_2]] to i32
+// LLVM-XSAVES: [[low32_2:%.*]] = trunc i64 [[tmp_ULLi_2]] to i32
+// LLVM-XSAVES: call void @llvm.x86.xsaves64(ptr [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
+  (void)__builtin_ia32_xsaves64(tmp_vp, tmp_ULLi);
+
+
+// CIR-XSAVES: [[tmp_vp_3:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVES: [[tmp_ULLi_3:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVES: [[high64_3:%.*]] = cir.shift(right, [[tmp_ULLi_3]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVES: [[high32_3:%.*]] = cir.cast integral [[high64_3]] : !u64i -> !s32i
+// CIR-XSAVES: [[low32_3:%.*]] = cir.cast integral [[tmp_ULLi_3]] : !u64i -> !s32i
+// CIR-XSAVES: %{{.*}} = cir.llvm.intrinsic "x86.xrstors" [[tmp_vp_3]], [[high32_3]], [[low32_3]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVES: [[tmp_vp_3:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVES: [[tmp_ULLi_3:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVES: [[high64_3:%.*]] = lshr i64 [[tmp_ULLi_3]], 32
+// LLVM-XSAVES: [[high32_3:%.*]] = trunc i64 [[high64_3]] to i32
+// LLVM-XSAVES: [[low32_3:%.*]] = trunc i64 [[tmp_ULLi_3]] to i32
+// LLVM-XSAVES: call void @llvm.x86.xrstors(ptr [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
+  (void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi);
+
+
+// CIR-XSAVES: [[tmp_vp_4:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR-XSAVES: [[tmp_ULLi_4:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSAVES: [[high64_4:%.*]] = cir.shift(right, [[tmp_ULLi_4]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSAVES: [[high32_4:%.*]] = cir.cast integral [[high64_4]] : !u64i -> !s32i
+// CIR-XSAVES: [[low32_4:%.*]] = cir.cast integral [[tmp_ULLi_4]] : !u64i -> !s32i
+// CIR-XSAVES: %{{.*}} = cir.llvm.intrinsic "x86.xrstors64" [[tmp_vp_4]], [[high32_4]], [[low32_4]] : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+
+// LLVM-XSAVES: [[tmp_vp_4:%.*]] = load ptr, ptr %{{.*}}, align 8
+// LLVM-XSAVES: [[tmp_ULLi_4:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSAVES: [[high64_4:%.*]] = lshr i64 [[tmp_ULLi_4]], 32
+// LLVM-XSAVES: [[high32_4:%.*]] = trunc i64 [[high64_4]] to i32
+// LLVM-XSAVES: [[low32_4:%.*]] = trunc i64 [[tmp_ULLi_4]] to i32
+// LLVM-XSAVES: call void @llvm.x86.xrstors64(ptr [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])
+  (void)__builtin_ia32_xrstors64(tmp_vp, tmp_ULLi);
+  
+  
+// CIR-XSAVES: {{%.*}} = cir.llvm.intrinsic "x86.xsaves" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+// LLVM-XSAVES: call void @llvm.x86.xsaves
+  (void)_xsaves(tmp_vp, tmp_ULLi);
+
+// CIR-XSAVES: {{%.*}} = cir.llvm.intrinsic "x86.xsaves64" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+// LLVM-XSAVES: call void @llvm.x86.xsaves64
+  (void)_xsaves64(tmp_vp, tmp_ULLi);
+
+// CIR-XSAVES: {{%.*}} = cir.llvm.intrinsic "x86.xrstors" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+// LLVM-XSAVES: call void @llvm.x86.xrstors
+  (void)_xrstors(tmp_vp, tmp_ULLi);
+
+// CIR-XSAVES: {{%.*}} = cir.llvm.intrinsic "x86.xrstors64" {{%.*}} : (!cir.ptr<!void>, !s32i, !s32i) -> !void
+// LLVM-XSAVES: call void @llvm.x86.xrstors64
+  (void)_xrstors64(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XGETBV
+
+// CIR-XGETBV: [[tmp_Ui:%.*]] =  cir.load align(4) %{{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR-XGETBV: {{%.*}} = cir.llvm.intrinsic "x86.xgetbv" [[tmp_Ui]] : (!u32i) -> !u64i
+
+// LLVM-XGETBV: [[tmp_Ui:%.*]] = load i32, ptr %{{.*}}, align 4
+// LLVM-XGETBV: call i64 @llvm.x86.xgetbv(i32 [[tmp_Ui]])
+  tmp_ULLi = __builtin_ia32_xgetbv(tmp_Ui);
+  
+// CIR-XGETBV: {{%.*}} = cir.llvm.intrinsic "x86.xgetbv" {{%.*}} : (!u32i) -> !u64i
+// LLVM-XGETBV: call i64 @llvm.x86.xgetbv
+  tmp_ULLi = _xgetbv(tmp_Ui);
+#endif
+
+#ifdef TEST_XSETBV
+// CIR-XSETBV: [[tmp_Ui_1:%.*]] = cir.load align(4) %{{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR-XSETBV: [[tmp_ULLi_1:%.*]] = cir.load align(8) %{{.*}} : !cir.ptr<!u64i>, !u64i
+// CIR-XSETBV: [[high64_1:%.*]] = cir.shift(right, [[tmp_ULLi_1]] : !u64i, %{{.*}} : !u64i) -> !u64i
+// CIR-XSETBV: [[high32_1:%.*]] = cir.cast integral [[high64_1]] : !u64i -> !s32i
+// CIR-XSETBV: [[low32_1:%.*]] = cir.cast integral [[tmp_ULLi_1]] : !u64i -> !s32i
+// CIR-XSETBV: %{{.*}} = cir.llvm.intrinsic "x86.xsetbv" [[tmp_Ui_1]], [[high32_1]], [[low32_1]] : (!u32i, !s32i, !s32i) -> !void
+
+// LLVM-XSETBV: [[tmp_Ui_1:%.*]] = load i32, ptr %{{.*}}, align 4
+// LLVM-XSETBV: [[tmp_ULLi_1:%.*]] = load i64, ptr %{{.*}}, align 8
+// LLVM-XSETBV: [[high64_1:%.*]] = lshr i64 [[tmp_ULLi_1]], 32
+// LLVM-XSETBV: [[high32_1:%.*]] = trunc i64 [[high64_1]] to i32
+// LLVM-XSETBV: [[low32_1:%.*]] = trunc i64 [[tmp_ULLi_1]] to i32
+// LLVM-XSETBV: call void @llvm.x86.xsetbv(i32 [[tmp_Ui_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsetbv(tmp_Ui, tmp_ULLi);
+
+// CIR-XSETBV: {{%.*}} = cir.llvm.intrinsic "x86.xsetbv" {{%.*}} : (!u32i, !s32i, !s32i) -> !void
+// LLVM-XSETBV: call void @llvm.x86.xsetbv
+  (void)_xsetbv(tmp_Ui, tmp_ULLi);
+#endif
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/aapcs-volatile-bitfields.c b/clang/test/CIR/Incubator/CodeGen/aapcs-volatile-bitfields.c
new file mode 100644
index 0000000000000..c4ceaa262ee5f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/aapcs-volatile-bitfields.c
@@ -0,0 +1,285 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -fclangir -emit-cir -fdump-record-layouts %s -o %t.cir | FileCheck %s --check-prefix=CIR-LAYOUT
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -emit-llvm -fdump-record-layouts %s -o %t.ll | FileCheck %s --check-prefix=OGCG-LAYOUT
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+typedef struct  {
+    unsigned int a : 9;
+    volatile unsigned int b : 1;
+    unsigned int c : 1;
+} st1;
+
+// CIR-LAYOUT:  BitFields:[
+// CIR-LAYOUT-NEXT:    <CIRBitFieldInfo name:a offset:0 size:9 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:0 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:    <CIRBitFieldInfo name:b offset:9 size:1 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:9 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:    <CIRBitFieldInfo name:c offset:10 size:1 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:10 volatileStorageSize:32 volatileStorageOffset:0>
+
+// OGCG-LAYOUT:  BitFields:[
+// OGCG-LAYOUT-NEXT:    <CGBitFieldInfo Offset:0 Size:9 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:    <CGBitFieldInfo Offset:9 Size:1 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:9 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:    <CGBitFieldInfo Offset:10 Size:1 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:10 VolatileStorageSize:32 VolatileStorageOffset:0>
+
+// different base types
+typedef struct{
+    volatile  short a : 3;
+    volatile  int b: 13;
+    volatile  long c : 5;
+} st2;
+
+// CIR-LAYOUT: BitFields:[
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:a offset:0 size:3 isSigned:1 storageSize:32 storageOffset:0 volatileOffset:0 volatileStorageSize:16 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:b offset:3 size:13 isSigned:1 storageSize:32 storageOffset:0 volatileOffset:3 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:c offset:16 size:5 isSigned:1 storageSize:32 storageOffset:0 volatileOffset:16 volatileStorageSize:64 volatileStorageOffset:0>
+
+// OGCG-LAYOUT: BitFields:[
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:3 IsSigned:1 StorageSize:32 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:16 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:3 Size:13 IsSigned:1 StorageSize:32 StorageOffset:0 VolatileOffset:3 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:16 Size:5 IsSigned:1 StorageSize:32 StorageOffset:0 VolatileOffset:16 VolatileStorageSize:64 VolatileStorageOffset:0>
+
+typedef struct{
+    volatile unsigned int a : 3;
+    unsigned int : 0; // zero-length bit-field force next field to aligned int boundary
+    volatile unsigned int b : 5;
+} st3;
+
+// CIR-LAYOUT: BitFields:[
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:a offset:0 size:3 isSigned:0 storageSize:8 storageOffset:0 volatileOffset:0 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:b offset:0 size:5 isSigned:0 storageSize:8 storageOffset:4 volatileOffset:0 volatileStorageSize:0 volatileStorageOffset:0>
+
+// OGCG-LAYOUT: BitFields:[
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:5 IsSigned:0 StorageSize:8 StorageOffset:4 VolatileOffset:0 VolatileStorageSize:0 VolatileStorageOffset:0>
+
+typedef struct{
+    volatile unsigned int a : 3;
+    unsigned int z;
+    volatile unsigned long b : 16;
+} st4;
+
+// CIR-LAYOUT: BitFields:[
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:a offset:0 size:3 isSigned:0 storageSize:8 storageOffset:0 volatileOffset:0 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:b offset:0 size:16 isSigned:0 storageSize:16 storageOffset:8 volatileOffset:0 volatileStorageSize:64 volatileStorageOffset:1>
+
+// OGCG-LAYOUT: BitFields:[
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:16 IsSigned:0 StorageSize:16 StorageOffset:8 VolatileOffset:0 VolatileStorageSize:64 VolatileStorageOffset:1>
+
+
+void def () {
+  st1 s1;
+  st2 s2;
+  st3 s3;
+  st4 s4;
+}
+
+int check_load(st1 *s1) {
+  return s1->b;
+}
+
+// CIR:  cir.func {{.*}} @check_load
+// CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st1>>, !cir.ptr<!rec_st1>
+// CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][0] {name = "b"} : !cir.ptr<!rec_st1> -> !cir.ptr<!u16i>
+// CIR:    [[BITFI:%.*]] = cir.get_bitfield align(4) (#bfi_b, [[MEMBER]] {is_volatile} : !cir.ptr<!u16i>) -> !u32i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[BITFI]] : !u32i -> !s32i
+// CIR:    cir.store{{.*}} [[CAST]], [[RETVAL:%.*]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[RET:%.*]] = cir.load{{.*}} [[RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR:    cir.return [[RET]] : !s32i
+
+// LLVM:define dso_local i32 @check_load
+// LLVM:  [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM:  [[MEMBER:%.*]] = getelementptr %struct.st1, ptr [[LOAD]], i32 0, i32 0
+// LLVM:  [[LOADVOL:%.*]] = load volatile i32, ptr [[MEMBER]], align 4
+// LLVM:  [[LSHR:%.*]] = lshr i32 [[LOADVOL]], 9
+// LLVM:  [[CLEAR:%.*]] = and i32 [[LSHR]], 1
+// LLVM:  store i32 [[CLEAR]], ptr [[RETVAL:%.*]], align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr [[RETVAL]], align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG: define dso_local i32 @check_load
+// OGCG:   [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// OGCG:   [[LOADVOL:%.*]] = load volatile i32, ptr [[LOAD]], align 4
+// OGCG:   [[LSHR:%.*]] = lshr i32 [[LOADVOL]], 9
+// OGCG:   [[CLEAR:%.*]] = and i32 [[LSHR]], 1
+// OGCG:   ret i32 [[CLEAR]]
+
+// this volatile bit-field container overlaps with a zero-length bit-field,
+// so it may be accessed without using the container's width.
+int check_load_exception(st3 *s3) {
+  return s3->b;
+}
+
+// CIR:  cir.func {{.*}} @check_load_exception
+// CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st3>>, !cir.ptr<!rec_st3>
+// CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][2] {name = "b"} : !cir.ptr<!rec_st3> -> !cir.ptr<!u8i>
+// CIR:    [[BITFI:%.*]] = cir.get_bitfield align(4) (#bfi_b1, [[MEMBER]] {is_volatile} : !cir.ptr<!u8i>) -> !u32i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[BITFI]] : !u32i -> !s32i
+// CIR:    cir.store{{.*}} [[CAST]], [[RETVAL:%.*]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[RET:%.*]] = cir.load{{.*}} [[RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR:    cir.return [[RET]] : !s32i
+
+// LLVM:define dso_local i32 @check_load_exception
+// LLVM:  [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM:  [[MEMBER:%.*]] = getelementptr %struct.st3, ptr [[LOAD]], i32 0, i32 2
+// LLVM:  [[LOADVOL:%.*]] = load volatile i8, ptr [[MEMBER]], align 4
+// LLVM:  [[CLEAR:%.*]] = and i8 [[LOADVOL]], 31
+// LLVM:  [[CAST:%.*]] = zext i8 [[CLEAR]] to i32
+// LLVM:  store i32 [[CAST]], ptr [[RETVAL:%.*]], align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr [[RETVAL]], align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG: define dso_local i32 @check_load_exception
+// OGCG:   [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// OGCG:   [[MEMBER:%.*]] = getelementptr inbounds nuw %struct.st3, ptr [[LOAD]], i32 0, i32 2
+// OGCG:   [[LOADVOL:%.*]] = load volatile i8, ptr [[MEMBER]], align 4
+// OGCG:   [[CLEAR:%.*]] = and i8 [[LOADVOL]], 31
+// OGCG:   [[CAST:%.*]] = zext i8 [[CLEAR]] to i32
+// OGCG:   ret i32 [[CAST]]
+
+typedef struct {
+    volatile int a : 24;
+    char b;
+    volatile int c: 30;
+ } clip;
+
+int clip_load_exception2(clip *c) {
+  return c->a;
+}
+
+// CIR:  cir.func {{.*}} @clip_load_exception2
+// CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_clip>>, !cir.ptr<!rec_clip>
+// CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][0] {name = "a"} : !cir.ptr<!rec_clip> -> !cir.ptr<!cir.array<!u8i x 3>>
+// CIR:    [[BITFI:%.*]] = cir.get_bitfield align(4) (#bfi_a1, [[MEMBER]] {is_volatile} : !cir.ptr<!cir.array<!u8i x 3>>) -> !s32i
+// CIR:    cir.store{{.*}} [[BITFI]], [[RETVAL:%.*]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[RET:%.*]] = cir.load{{.*}} [[RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR:    cir.return [[RET]] : !s32i
+
+// LLVM:define dso_local i32 @clip_load_exception2
+// LLVM:  [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM:  [[MEMBER:%.*]] = getelementptr %struct.clip, ptr [[LOAD]], i32 0, i32 0
+// LLVM:  [[LOADVOL:%.*]] = load volatile i24, ptr [[MEMBER]], align 4
+// LLVM:  [[CAST:%.*]] = sext i24 [[LOADVOL]] to i32
+// LLVM:  store i32 [[CAST]], ptr [[RETVAL:%.*]], align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr [[RETVAL]], align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG: define dso_local i32 @clip_load_exception2
+// OGCG:   [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// OGCG:   [[LOADVOL:%.*]] = load volatile i24, ptr [[LOAD]], align 4
+// OGCG:   [[CAST:%.*]] = sext i24 [[LOADVOL]] to i32
+// OGCG:   ret i32 [[CAST]]
+
+void check_store(st2 *s2) {
+  s2->a = 1;
+}
+
+// CIR:  cir.func {{.*}} @check_store
+// CIR:    [[CONST:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[CONST]] : !s32i -> !s16i
+// CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st2>>, !cir.ptr<!rec_st2>
+// CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][0] {name = "a"} : !cir.ptr<!rec_st2> -> !cir.ptr<!u32i>
+// CIR:    [[SETBF:%.*]] = cir.set_bitfield align(8) (#bfi_a, [[MEMBER]] : !cir.ptr<!u32i>, [[CAST]] : !s16i) {is_volatile} -> !s16i
+// CIR:    cir.return
+
+// LLVM:define dso_local void @check_store
+// LLVM:  [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM:  [[MEMBER:%.*]] = getelementptr %struct.st2, ptr [[LOAD]], i32 0, i32 0
+// LLVM:  [[LOADVOL:%.*]] = load volatile i16, ptr [[MEMBER]], align 8
+// LLVM:  [[CLEAR:%.*]] = and i16 [[LOADVOL]], -8
+// LLVM:  [[SET:%.*]] = or i16 [[CLEAR]], 1
+// LLVM:  store volatile i16 [[SET]], ptr [[MEMBER]], align 8
+// LLVM:  ret void
+
+// OGCG: define dso_local void @check_store
+// OGCG:   [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// OGCG:   [[LOADVOL:%.*]] = load volatile i16, ptr [[LOAD]], align 8
+// OGCG:   [[CLEAR:%.*]] = and i16 [[LOADVOL]], -8
+// OGCG:   [[SET:%.*]] = or i16 [[CLEAR]], 1
+// OGCG:   store volatile i16 [[SET]], ptr [[LOAD]], align 8
+// OGCG:   ret void
+
+// this volatile bit-field container overlaps with a zero-length bit-field,
+// so it may be accessed without using the container's width.
+void check_store_exception(st3 *s3) {
+  s3->b = 2;
+}
+
+// CIR:  cir.func {{.*}} @check_store_exception
+// CIR:    [[CONST:%.*]] = cir.const #cir.int<2> : !s32i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[CONST]] : !s32i -> !u32i
+// CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st3>>, !cir.ptr<!rec_st3>
+// CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][2] {name = "b"} : !cir.ptr<!rec_st3> -> !cir.ptr<!u8i>
+// CIR:    [[SETBF:%.*]] = cir.set_bitfield align(4) (#bfi_b1, [[MEMBER]] : !cir.ptr<!u8i>, [[CAST]] : !u32i) {is_volatile} -> !u32i
+// CIR:    cir.return
+
+// LLVM:define dso_local void @check_store_exception
+// LLVM:  [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM:  [[MEMBER:%.*]] = getelementptr %struct.st3, ptr [[LOAD]], i32 0, i32 2
+// LLVM:  [[LOADVOL:%.*]] = load volatile i8, ptr [[MEMBER]], align 4
+// LLVM:  [[CLEAR:%.*]] = and i8 [[LOADVOL]], -32
+// LLVM:  [[SET:%.*]] = or i8 [[CLEAR]], 2
+// LLVM:  store volatile i8 [[SET]], ptr [[MEMBER]], align 4
+// LLVM:  ret void
+
+// OGCG: define dso_local void @check_store_exception
+// OGCG:   [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// OGCG:   [[MEMBER:%.*]] = getelementptr inbounds nuw %struct.st3, ptr [[LOAD]], i32 0, i32 2
+// OGCG:   [[LOADVOL:%.*]] = load volatile i8, ptr [[MEMBER]], align 4
+// OGCG:   [[CLEAR:%.*]] = and i8 [[LOADVOL]], -32
+// OGCG:   [[SET:%.*]] = or i8 [[CLEAR]], 2
+// OGCG:   store volatile i8 [[SET]], ptr [[MEMBER]], align 4
+// OGCG:   ret void
+
+void clip_store_exception2(clip *c) {
+  c->a = 3;
+}
+
+// CIR:  cir.func {{.*}} @clip_store_exception2
+// CIR:    [[CONST:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_clip>>, !cir.ptr<!rec_clip>
+// CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][0] {name = "a"} : !cir.ptr<!rec_clip> -> !cir.ptr<!cir.array<!u8i x 3>>
+// CIR:    [[SETBF:%.*]] = cir.set_bitfield align(4) (#bfi_a1, [[MEMBER]] : !cir.ptr<!cir.array<!u8i x 3>>, [[CONST]] : !s32i) {is_volatile} -> !s32i
+// CIR:    cir.return
+
+// LLVM:define dso_local void @clip_store_exception2
+// LLVM:  [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM:  [[MEMBER:%.*]] = getelementptr %struct.clip, ptr [[LOAD]], i32 0, i32 0
+// LLVM:  store volatile i24 3, ptr [[MEMBER]], align 4
+// LLVM:  ret void
+
+// OGCG: define dso_local void @clip_store_exception2
+// OGCG:   [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// OGCG:   store volatile i24 3, ptr [[LOAD]], align 4
+// OGCG:   ret void
+
+void check_store_second_member (st4 *s4) {
+  s4->b = 1;
+}
+
+// CIR:  cir.func {{.*}} @check_store_second_member
+// CIR:    [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR:    [[CAST:%.*]] = cir.cast integral [[ONE]] : !s32i -> !u64i
+// CIR:    [[LOAD:%.*]] = cir.load align(8) {{.*}} : !cir.ptr<!cir.ptr<!rec_st4>>, !cir.ptr<!rec_st4>
+// CIR:    [[MEMBER:%.*]] = cir.get_member [[LOAD]][2] {name = "b"} : !cir.ptr<!rec_st4> -> !cir.ptr<!u16i>
+// CIR:    cir.set_bitfield align(8) (#bfi_b2, [[MEMBER]] : !cir.ptr<!u16i>, [[CAST]] : !u64i) {is_volatile} -> !u64i
+
+// LLVM: define dso_local void @check_store_second_member
+// LLVM:   [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM:   [[MEMBER:%.*]] = getelementptr %struct.st4, ptr [[LOAD]], i32 0, i32 2
+// LLVM:   [[VAL:%.*]] = load volatile i64, ptr [[MEMBER]], align 8
+// LLVM:   [[CLEAR:%.*]] = and i64 [[VAL]], -65536
+// LLVM:   [[SET:%.*]] = or i64 [[CLEAR]], 1
+// LLVM:   store volatile i64 [[SET]], ptr [[MEMBER]], align 8
+
+// OGCG: define dso_local void @check_store_second_member
+// OGCG:   [[LOAD:%.*]] = load ptr, ptr {{.*}}, align 8
+// OGCG:   [[MEMBER:%.*]] = getelementptr inbounds i64, ptr [[LOAD]], i64 1
+// OGCG:   [[LOADBF:%.*]] = load volatile i64, ptr [[MEMBER]], align 8
+// OGCG:   [[CLR:%.*]] = and i64 [[LOADBF]], -65536
+// OGCG:   [[SET:%.*]] = or i64 [[CLR]], 1
+// OGCG:   store volatile i64 [[SET]], ptr [[MEMBER]], align 8
diff --git a/clang/test/CIR/Incubator/CodeGen/aarch64-neon-vdup-lane.c b/clang/test/CIR/Incubator/CodeGen/aarch64-neon-vdup-lane.c
new file mode 100644
index 0000000000000..e9b95525c0f07
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/aarch64-neon-vdup-lane.c
@@ -0,0 +1,228 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:            -emit-llvm -target-feature +neon %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// Tetsting normal situation of vdup lane intrinsics.
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+#include <arm_neon.h>
+
+int8_t test_vdupb_lane_s8(int8x8_t src) {
+  return vdupb_lane_s8(src, 7);
+}
+
+// CIR-LABEL: test_vdupb_lane_s8
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
+
+// LLVM: define dso_local i8 @test_vdupb_lane_s8(<8 x i8> [[ARG:%.*]])
+// LLVM: alloca <8 x i8>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8
+// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <8 x i8> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <8 x i8>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <8 x i8> [[INTRN_ARG]], i32 7
+// LLVM: ret i8 {{%.*}}
+
+int8_t test_vdupb_laneq_s8(int8x16_t a) {
+  return vdupb_laneq_s8(a, 15);
+}
+
+// CIR-LABEL: test_vdupb_laneq_s8
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<15> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
+
+// LLVM: define dso_local i8 @test_vdupb_laneq_s8(<16 x i8> [[ARG:%.*]])
+// LLVM: alloca <16 x i8>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16
+// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <16 x i8> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <16 x i8>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <16 x i8> [[INTRN_ARG]], i32 15
+// LLVM: ret i8 {{%.*}}
+
+int16_t test_vduph_lane_s16(int16x4_t src) {
+  return vduph_lane_s16(src, 3);
+}
+
+// CIR-LABEL: test_vduph_lane_s16
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4>
+
+
+// LLVM: define dso_local i16 @test_vduph_lane_s16(<4 x i16> [[ARG:%.*]])
+// LLVM: alloca <4 x i16>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8
+// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <4 x i16> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <4 x i16>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <4 x i16> [[INTRN_ARG]], i32 3
+// LLVM: ret i16 {{%.*}}
+
+int16_t test_vduph_laneq_s16(int16x8_t a) {
+  return vduph_laneq_s16(a, 7);
+}
+
+// CIR-LABEL: test_vduph_laneq_s16
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<7> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
+
+// LLVM: define dso_local i16 @test_vduph_laneq_s16(<8 x i16> [[ARG:%.*]])
+// LLVM: alloca <8 x i16>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16
+// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <8 x i16> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <8 x i16>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <8 x i16> [[INTRN_ARG]], i32 7
+// LLVM: ret i16 {{%.*}}
+
+int32_t test_vdups_lane_s32(int32x2_t a) {
+  return vdups_lane_s32(a, 1);
+}
+
+// CIR-LABEL: test_vdups_lane_s32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
+
+// LLVM: define dso_local i32 @test_vdups_lane_s32(<2 x i32> [[ARG:%.*]])
+// LLVM: alloca <2 x i32>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <2 x i32> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x i32>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <2 x i32> [[INTRN_ARG]], i32 1
+// LLVM: ret i32 {{%.*}}
+
+int32_t test_vdups_laneq_s32(int32x4_t a) {
+  return vdups_laneq_s32(a, 3);
+}
+
+// CIR-LABEL: test_vdups_laneq_s32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
+
+// LLVM: define dso_local i32 @test_vdups_laneq_s32(<4 x i32> [[ARG:%.*]])
+// LLVM: alloca <4 x i32>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <4 x i32> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <4 x i32>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <4 x i32> [[INTRN_ARG]], i32 3
+// LLVM: ret i32 {{%.*}}
+
+int64_t test_vdupd_lane_s64(int64x1_t src) {
+  return vdupd_lane_s64(src, 0);
+}
+
+// CIR-LABEL: test_vdupd_lane_s64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
+
+// LLVM: define dso_local i64 @test_vdupd_lane_s64(<1 x i64> [[ARG:%.*]])
+// LLVM: alloca <1 x i64>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8
+// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <1 x i64> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <1 x i64>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <1 x i64> [[INTRN_ARG]], i32 0
+// LLVM: ret i64 {{%.*}}
+
+int64_t test_vdupd_laneq_s64(int64x2_t a) {
+  return vdupd_laneq_s64(a, 1);
+}
+
+// CIR-LABEL: test_vdupd_laneq_s64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
+
+// LLVM: define dso_local i64 @test_vdupd_laneq_s64(<2 x i64> [[ARG:%.*]])
+// LLVM: alloca <2 x i64>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16
+// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <2 x i64> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x i64>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <2 x i64> [[INTRN_ARG]], i32 1
+// LLVM: ret i64 {{%.*}}
+
+float32_t test_vdups_lane_f32(float32x2_t src) {
+  return vdups_lane_f32(src, 1);
+}
+
+// CIR-LABEL: test_vdups_lane_f32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
+
+// LLVM: define dso_local float @test_vdups_lane_f32(<2 x float> [[ARG:%.*]])
+// LLVM: alloca <2 x float>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8
+// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <2 x float> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x float>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <2 x float> [[INTRN_ARG]], i32 1
+// LLVM: ret float {{%.*}}
+
+float64_t test_vdupd_lane_f64(float64x1_t src) {
+  return vdupd_lane_f64(src, 0);
+}
+
+// CIR-LABEL: test_vdupd_lane_f64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<0> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
+
+// LLVM: define dso_local double @test_vdupd_lane_f64(<1 x double> [[ARG:%.*]])
+// LLVM: alloca <1 x double>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8
+// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8
+// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8
+// LLVM: store <1 x double> [[TMP]], ptr [[S0:%.*]], align 8
+// LLVM: [[INTRN_ARG:%.*]] = load <1 x double>, ptr [[S0]], align 8
+// LLVM: {{%.*}} = extractelement <1 x double> [[INTRN_ARG]], i32 0
+// LLVM: ret double {{%.*}}
+
+float32_t test_vdups_laneq_f32(float32x4_t src) {
+  return vdups_laneq_f32(src, 3);
+}
+
+// CIR-LABEL: test_vdups_laneq_f32
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<3> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
+
+// LLVM: define dso_local float @test_vdups_laneq_f32(<4 x float> [[ARG:%.*]])
+// LLVM: alloca <4 x float>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16
+// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <4 x float> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <4 x float>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <4 x float> [[INTRN_ARG]], i32 3
+// LLVM: ret float {{%.*}}
+
+float64_t test_vdupd_laneq_f64(float64x2_t src) {
+  return vdupd_laneq_f64(src, 1);
+}
+
+// CIR-LABEL: test_vdupd_laneq_f64
+// CIR: [[IDX:%.*]]  = cir.const #cir.int<1> : !s32i
+// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
+
+// LLVM: define dso_local double @test_vdupd_laneq_f64(<2 x double> [[ARG:%.*]])
+// LLVM: alloca <2 x double>
+// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16
+// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16
+// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16
+// LLVM: store <2 x double> [[TMP]], ptr [[S0:%.*]], align 16
+// LLVM: [[INTRN_ARG:%.*]] = load <2 x double>, ptr [[S0]], align 16
+// LLVM: {{%.*}} = extractelement <2 x double> [[INTRN_ARG]], i32 1
+// LLVM: ret double {{%.*}}
diff --git a/clang/test/CIR/Incubator/CodeGen/abstract-cond.c b/clang/test/CIR/Incubator/CodeGen/abstract-cond.c
new file mode 100644
index 0000000000000..3c2ef4b2d1ae6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/abstract-cond.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// ?: in "lvalue"
+struct s6 { int f0; };
+int f6(int a0, struct s6 a1, struct s6 a2) {
+  return (a0 ? a1 : a2).f0;
+}
+
+// CIR-LABEL: @f6
+// CIR:  %[[A0:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a0"
+// CIR:  %[[A1:.*]] = cir.alloca !rec_s6, !cir.ptr<!rec_s6>, ["a1"
+// CIR:  %[[A2:.*]] = cir.alloca !rec_s6, !cir.ptr<!rec_s6>, ["a2"
+// CIR:  cir.scope {
+// CIR:    %[[TMP:.*]] = cir.alloca !rec_s6, !cir.ptr<!rec_s6>, ["ref.tmp0"]
+// CIR:    %[[LOAD_A0:.*]] = cir.load{{.*}} %[[A0]] : !cir.ptr<!s32i>, !s32i
+// CIR:    %[[COND:.*]] = cir.cast int_to_bool %[[LOAD_A0]] : !s32i -> !cir.bool
+// CIR:    cir.if %[[COND]] {
+// CIR:      cir.copy %[[A1]] to %[[TMP]] : !cir.ptr<!rec_s6>
+// CIR:    } else {
+// CIR:      cir.copy %[[A2]] to %[[TMP]] : !cir.ptr<!rec_s6>
+// CIR:    }
+// CIR:    cir.get_member %[[TMP]][0] {name = "f0"} : !cir.ptr<!rec_s6> -> !cir.ptr<!s32i>
+
+// LLVM-LABEL: @f6
+// LLVM:    %[[LOAD_A0:.*]] = load i32, ptr {{.*}}
+// LLVM:    %[[COND:.*]] = icmp ne i32 %[[LOAD_A0]], 0
+// LLVM:    br i1 %[[COND]], label %[[A1_PATH:.*]], label %[[A2_PATH:.*]]
+// LLVM:  [[A1_PATH]]:
+// LLVM:    call void @llvm.memcpy.p0.p0.i32(ptr %[[TMP:.*]], ptr {{.*}}, i32 4, i1 false)
+// LLVM:    br label %[[EXIT:[a-z0-9]+]]
+// LLVM:  [[A2_PATH]]:
+// LLVM:    call void @llvm.memcpy.p0.p0.i32(ptr %[[TMP]], ptr {{.*}}, i32 4, i1 false)
+// LLVM:    br label %[[EXIT]]
+// LLVM:  [[EXIT]]:
+// LLVM:    getelementptr {{.*}}, ptr %[[TMP]], i32 0, i32 0
diff --git a/clang/test/CIR/Incubator/CodeGen/address-space-cast-subscript.cpp b/clang/test/CIR/Incubator/CodeGen/address-space-cast-subscript.cpp
new file mode 100644
index 0000000000000..a2cfe0bac6ed9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/address-space-cast-subscript.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+// Test address space conversion via emitPointerWithAlignment (array subscript path).
+// This exercises the CK_AddressSpaceConversion case in emitPointerWithAlignment
+// where an explicit cast to a different address space is followed by an array
+// subscript operation.
+
+#define AS1 __attribute__((address_space(1)))
+#define AS2 __attribute__((address_space(2)))
+
+// CIR-LABEL: @_Z24test_cast_then_subscriptPU3AS1i
+// LLVM-LABEL: @_Z24test_cast_then_subscriptPU3AS1i
+// OGCG-LABEL: @_Z24test_cast_then_subscriptPU3AS1i
+void test_cast_then_subscript(AS1 int *p1) {
+  // Explicit cast to AS2, then subscript - this goes through emitPointerWithAlignment
+  int val = ((AS2 int *)p1)[0];
+  // CIR:      %[[#LOAD:]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i, target_address_space(1)>>, !cir.ptr<!s32i, target_address_space(1)>
+  // CIR-NEXT: %[[#CAST:]] = cir.cast address_space %[[#LOAD]] : !cir.ptr<!s32i, target_address_space(1)> -> !cir.ptr<!s32i, target_address_space(2)>
+  // CIR-NEXT: %[[#IDX:]] = cir.const #cir.int<0> : !s32i
+  // CIR-NEXT: %[[#PTR:]] = cir.ptr_stride %[[#CAST]], %[[#IDX]] : (!cir.ptr<!s32i, target_address_space(2)>, !s32i) -> !cir.ptr<!s32i, target_address_space(2)>
+  // CIR-NEXT: %{{.+}} = cir.load {{.*}} %[[#PTR]] : !cir.ptr<!s32i, target_address_space(2)>, !s32i
+
+  // LLVM:      %[[#LOAD:]] = load ptr addrspace(1), ptr %{{.+}}, align 8
+  // LLVM-NEXT: %[[#CAST:]] = addrspacecast ptr addrspace(1) %[[#LOAD]] to ptr addrspace(2)
+  // LLVM-NEXT: %[[#GEP:]] = getelementptr i32, ptr addrspace(2) %[[#CAST]], i64 0
+  // LLVM-NEXT: %{{.+}} = load i32, ptr addrspace(2) %[[#GEP]], align 4
+
+  // OGCG:      %[[#LOAD:]] = load ptr addrspace(1), ptr %{{.+}}, align 8
+  // OGCG-NEXT: %[[#CAST:]] = addrspacecast ptr addrspace(1) %[[#LOAD]] to ptr addrspace(2)
+  // OGCG:      getelementptr inbounds i32, ptr addrspace(2) %[[#CAST]], i64 0
+}
+
+// CIR-LABEL: @_Z30test_cast_then_subscript_writePU3AS1ii
+// LLVM-LABEL: @_Z30test_cast_then_subscript_writePU3AS1ii
+// OGCG-LABEL: @_Z30test_cast_then_subscript_writePU3AS1ii
+void test_cast_then_subscript_write(AS1 int *p1, int val) {
+  // Explicit cast to AS2, then subscript for write
+  ((AS2 int *)p1)[0] = val;
+  // CIR:      %[[#CAST:]] = cir.cast address_space %{{.+}} : !cir.ptr<!s32i, target_address_space(1)> -> !cir.ptr<!s32i, target_address_space(2)>
+  // CIR:      %[[#PTR:]] = cir.ptr_stride %[[#CAST]], %{{.+}} : (!cir.ptr<!s32i, target_address_space(2)>, !s32i) -> !cir.ptr<!s32i, target_address_space(2)>
+  // CIR-NEXT: cir.store {{.*}}, %[[#PTR]] : !s32i, !cir.ptr<!s32i, target_address_space(2)>
+
+  // LLVM:      %[[#CAST:]] = addrspacecast ptr addrspace(1) %{{.+}} to ptr addrspace(2)
+  // LLVM-NEXT: %[[#GEP:]] = getelementptr i32, ptr addrspace(2) %[[#CAST]], i64 0
+  // LLVM-NEXT: store i32 %{{.+}}, ptr addrspace(2) %[[#GEP]], align 4
+
+  // OGCG:      %[[#CAST:]] = addrspacecast ptr addrspace(1) %{{.+}} to ptr addrspace(2)
+  // OGCG:      getelementptr inbounds i32, ptr addrspace(2) %[[#CAST]], i64 0
+}
+
+// CIR-LABEL: @_Z38test_cast_then_subscript_nonzero_indexPU3AS1i
+// LLVM-LABEL: @_Z38test_cast_then_subscript_nonzero_indexPU3AS1i
+// OGCG-LABEL: @_Z38test_cast_then_subscript_nonzero_indexPU3AS1i
+void test_cast_then_subscript_nonzero_index(AS1 int *p1) {
+  // Cast then subscript with non-zero index
+  int val = ((AS2 int *)p1)[5];
+  // CIR:      %[[#CAST:]] = cir.cast address_space %{{.+}} : !cir.ptr<!s32i, target_address_space(1)> -> !cir.ptr<!s32i, target_address_space(2)>
+  // CIR:      %[[#IDX:]] = cir.const #cir.int<5> : !s32i
+  // CIR-NEXT: %[[#PTR:]] = cir.ptr_stride %[[#CAST]], %[[#IDX]] : (!cir.ptr<!s32i, target_address_space(2)>, !s32i) -> !cir.ptr<!s32i, target_address_space(2)>
+  // CIR-NEXT: %{{.+}} = cir.load {{.*}} %[[#PTR]] : !cir.ptr<!s32i, target_address_space(2)>, !s32i
+
+  // LLVM:      %[[#CAST:]] = addrspacecast ptr addrspace(1) %{{.+}} to ptr addrspace(2)
+  // LLVM:      %[[#GEP:]] = getelementptr i32, ptr addrspace(2) %[[#CAST]], i64 5
+  // LLVM-NEXT: %{{.+}} = load i32, ptr addrspace(2) %[[#GEP]], align 4
+
+  // OGCG:      %[[#CAST:]] = addrspacecast ptr addrspace(1) %{{.+}} to ptr addrspace(2)
+  // OGCG:      getelementptr inbounds i32, ptr addrspace(2) %[[#CAST]], i64 5
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/address-space-conversion.cpp b/clang/test/CIR/Incubator/CodeGen/address-space-conversion.cpp
new file mode 100644
index 0000000000000..b9a5aec1c2b5a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/address-space-conversion.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+using pi1_t = int __attribute__((address_space(1))) *;
+using pi2_t = int __attribute__((address_space(2))) *;
+
+using ri1_t = int __attribute__((address_space(1))) &;
+using ri2_t = int __attribute__((address_space(2))) &;
+
+// CIR: cir.func {{.*}} @{{.*test_ptr.*}}
+// LLVM: define dso_local void @{{.*test_ptr.*}}
+void test_ptr() {
+  pi1_t ptr1;
+  pi2_t ptr2 = (pi2_t)ptr1;
+  // CIR:      %[[#PTR1:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.ptr<!s32i, target_address_space(1)>>, !cir.ptr<!s32i, target_address_space(1)>
+  // CIR-NEXT: %[[#CAST:]] = cir.cast address_space %[[#PTR1]] : !cir.ptr<!s32i, target_address_space(1)> -> !cir.ptr<!s32i, target_address_space(2)>
+  // CIR-NEXT: cir.store{{.*}} %[[#CAST]], %{{[0-9]+}} : !cir.ptr<!s32i, target_address_space(2)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(2)>>
+
+  // LLVM:      %[[#PTR1:]] = load ptr addrspace(1), ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#CAST:]] = addrspacecast ptr addrspace(1) %[[#PTR1]] to ptr addrspace(2)
+  // LLVM-NEXT: store ptr addrspace(2) %[[#CAST]], ptr %{{[0-9]+}}, align 8
+}
+
+// CIR: cir.func {{.*}} @{{.*test_ref.*}}
+// LLVM: define dso_local void @{{.*test_ref.*}}
+void test_ref() {
+  pi1_t ptr;
+  ri1_t ref1 = *ptr;
+  ri2_t ref2 = (ri2_t)ref1;
+  // CIR:      %[[#DEREF:]] = cir.load deref{{.*}}  %{{[0-9]+}} : !cir.ptr<!cir.ptr<!s32i, target_address_space(1)>>, !cir.ptr<!s32i, target_address_space(1)>
+  // CIR-NEXT: cir.store{{.*}} %[[#DEREF]], %[[#ALLOCAREF1:]] : !cir.ptr<!s32i, target_address_space(1)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(1)>>
+  // CIR-NEXT: %[[#REF1:]] = cir.load{{.*}} %[[#ALLOCAREF1]] : !cir.ptr<!cir.ptr<!s32i, target_address_space(1)>>, !cir.ptr<!s32i, target_address_space(1)>
+  // CIR-NEXT: %[[#CAST:]] = cir.cast address_space %[[#REF1]] : !cir.ptr<!s32i, target_address_space(1)> -> !cir.ptr<!s32i, target_address_space(2)>
+  // CIR-NEXT: cir.store{{.*}} %[[#CAST]], %{{[0-9]+}} : !cir.ptr<!s32i, target_address_space(2)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(2)>>
+
+  // LLVM:      %[[#DEREF:]] = load ptr addrspace(1), ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: store ptr addrspace(1) %[[#DEREF]], ptr %[[#ALLOCAREF1:]], align 8
+  // LLVM-NEXT: %[[#REF1:]] = load ptr addrspace(1), ptr %[[#ALLOCAREF1]], align 8
+  // LLVM-NEXT: %[[#CAST:]] = addrspacecast ptr addrspace(1) %[[#REF1]] to ptr addrspace(2)
+  // LLVM-NEXT: store ptr addrspace(2) %[[#CAST]], ptr %{{[0-9]+}}, align 8
+}
+
+// CIR: cir.func {{.*}} @{{.*test_nullptr.*}}
+// LLVM: define dso_local void @{{.*test_nullptr.*}}
+void test_nullptr() {
+  constexpr pi1_t null1 = nullptr;
+  pi2_t ptr = (pi2_t)null1;
+  // CIR:      %[[#NULL1:]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i, target_address_space(1)>
+  // CIR-NEXT: cir.store{{.*}} %[[#NULL1]], %{{[0-9]+}} : !cir.ptr<!s32i, target_address_space(1)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(1)>>
+  // CIR-NEXT: %[[#NULL2:]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i, target_address_space(2)>
+  // CIR-NEXT: cir.store{{.*}} %[[#NULL2]], %{{[0-9]+}} : !cir.ptr<!s32i, target_address_space(2)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(2)>>
+
+  // LLVM:      store ptr addrspace(1) null, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: store ptr addrspace(2) null, ptr %{{[0-9]+}}, align 8
+}
+
+void test_side_effect(pi1_t b) {
+  pi2_t p = (pi2_t)(*b++, (int*)0);
+  // CIR:      %{{[0-9]+}} = cir.ptr_stride %{{[0-9]+}}, %{{[0-9]+}} : (!cir.ptr<!s32i, target_address_space(1)>, !s32i) -> !cir.ptr<!s32i, target_address_space(1)>
+  // CIR:      %[[#CAST:]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i, target_address_space(2)>
+  // CIR-NEXT: cir.store{{.*}} %[[#CAST]], %{{[0-9]+}} : !cir.ptr<!s32i, target_address_space(2)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(2)>>
+
+  // LLVM:      %{{[0-9]+}} = getelementptr i32, ptr addrspace(1) %{{[0-9]+}}, i64 1
+  // LLVM:      store ptr addrspace(2) null, ptr %{{[0-9]+}}, align 8
+
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/address-space.c b/clang/test/CIR/Incubator/CodeGen/address-space.c
new file mode 100644
index 0000000000000..84b6cbaae7ae7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/address-space.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+// CIR: cir.func {{.*}} {{@.*foo.*}}(%arg0: !cir.ptr<!s32i, target_address_space(1)>
+// LLVM: define dso_local void @foo(ptr addrspace(1) %0)
+void foo(int __attribute__((address_space(1))) *arg) {
+  return;
+}
+
+// CIR: cir.func {{.*}} {{@.*bar.*}}(%arg0: !cir.ptr<!s32i, target_address_space(0)>
+// LLVM: define dso_local void @bar(ptr %0)
+void bar(int __attribute__((address_space(0))) *arg) {
+  return;
+}
+
+// CIR: cir.func {{.*}} {{@.*baz.*}}(%arg0: !cir.ptr<!s32i>
+// LLVM: define dso_local void @baz(ptr %0)
+void baz(int *arg) {
+  return;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/agg-copy.c b/clang/test/CIR/Incubator/CodeGen/agg-copy.c
new file mode 100644
index 0000000000000..e05c6e39c3bdf
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/agg-copy.c
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef struct {} S;
+
+typedef struct {
+    int a;
+    int b;
+    S s;
+} A;
+
+// CHECK: cir.func {{.*}} @foo1
+// CHECK:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["a1", init]
+// CHECK:   [[TMP1:%.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["a2", init]
+// CHECK:   cir.store{{.*}} %arg0, [[TMP0]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CHECK:   cir.store{{.*}} %arg1, [[TMP1]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CHECK:   [[TMP2:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+// CHECK:   [[TMP3:%.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:   [[TMP4:%.*]] = cir.ptr_stride [[TMP2]], [[TMP3]] : (!cir.ptr<!rec_A>, !s32i) -> !cir.ptr<!rec_A>
+// CHECK:   [[TMP5:%.*]] = cir.load{{.*}} [[TMP1]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+// CHECK:   [[TMP6:%.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:   [[TMP7:%.*]] = cir.ptr_stride [[TMP5]], [[TMP6]] : (!cir.ptr<!rec_A>, !s32i) -> !cir.ptr<!rec_A>
+// CHECK:   cir.copy [[TMP7]] to [[TMP4]] : !cir.ptr<!rec_A>
+void foo1(A* a1, A* a2) {
+    a1[1] = a2[1];
+}
+
+// CHECK: cir.func {{.*}} @foo2
+// CHECK:    [[TMP0:%.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["a1", init]
+// CHECK:    [[TMP1:%.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["a2", init]
+// CHECK:    cir.store{{.*}} %arg0, [[TMP0]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CHECK:    cir.store{{.*}} %arg1, [[TMP1]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CHECK:    [[TMP2:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+// CHECK:    [[TMP3:%.*]] = cir.get_member [[TMP2]][2] {name = "s"} : !cir.ptr<!rec_A> -> !cir.ptr<!rec_S>
+// CHECK:    [[TMP4:%.*]] = cir.load{{.*}} [[TMP1]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+// CHECK:    [[TMP5:%.*]] = cir.get_member [[TMP4]][2] {name = "s"} : !cir.ptr<!rec_A> -> !cir.ptr<!rec_S>
+// CHECK:    cir.copy [[TMP5]] to [[TMP3]] : !cir.ptr<!rec_S>
+void foo2(A* a1, A* a2) {
+    a1->s = a2->s;
+}
+
+// CHECK: cir.global external @a = #cir.zero : !rec_A
+// CHECK: cir.func {{.*}} @foo3
+// CHECK:    [[TMP0]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["__retval"] {alignment = 4 : i64}
+// CHECK:    [[TMP1]] = cir.get_global @a : !cir.ptr<!rec_A>
+// CHECK:    cir.copy [[TMP1]] to [[TMP0]] : !cir.ptr<!rec_A>
+// CHECK:    [[TMP2]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!rec_A>, !rec_A
+// CHECK:    cir.return [[TMP2]] : !rec_A
+A a;
+A foo3(void) {
+    return a;
+}
+
+// CHECK: cir.func {{.*}} @foo4
+// CHECK:    [[TMP0]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["a1", init]
+// CHECK:    [[TMP1]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a2", init]
+// CHECK:    cir.store{{.*}} %arg0, [[TMP0]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CHECK:    [[TMP2]] = cir.load deref{{.*}}  [[TMP0]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+// CHECK:    cir.copy [[TMP2]] to [[TMP1]] : !cir.ptr<!rec_A>
+void foo4(A* a1) {
+    A a2 = *a1;
+}
+
+A create() { A a; return a; }
+
+// CHECK: cir.func {{.*@foo5}}
+// CHECK:   [[TMP0:%.*]] = cir.alloca !rec_A, !cir.ptr<!rec_A>,
+// CHECK:   [[TMP1:%.*]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["tmp"] {alignment = 4 : i64}
+// CHECK:   [[TMP2:%.*]] = cir.call @create() : () -> !rec_A
+// CHECK:   cir.store{{.*}} [[TMP2]], [[TMP1]] : !rec_A, !cir.ptr<!rec_A>
+// CHECK:   cir.copy [[TMP1]] to [[TMP0]] : !cir.ptr<!rec_A>
+void foo5() {
+    A a;
+    a = create();
+}
+
+void foo6(A* a1) {
+  A a2 = (*a1);
+// CHECK: cir.func {{.*@foo6}}
+// CHECK:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["a1", init] {alignment = 8 : i64}
+// CHECK:   [[TMP1:%.*]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a2", init] {alignment = 4 : i64}
+// CHECK:   cir.store{{.*}} %arg0, [[TMP0]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CHECK:   [[TMP2:%.*]] = cir.load deref{{.*}}  [[TMP0]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+// CHECK:   cir.copy [[TMP2]] to [[TMP1]] : !cir.ptr<!rec_A>
+}
+
+volatile A vol_a;
+A foo7() {
+  return vol_a;
+}
+// CHECK: cir.func {{.*@foo7}}
+// CHECK:   %0 = cir.alloca
+// CHECK:   %1 = cir.get_global @vol_a
+// CHECK:   cir.copy %1 to %0 volatile
diff --git a/clang/test/CIR/Incubator/CodeGen/agg-init-inherit.cpp b/clang/test/CIR/Incubator/CodeGen/agg-init-inherit.cpp
new file mode 100644
index 0000000000000..321d7752ed6c1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/agg-init-inherit.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+
+struct A1 {
+  A1();
+};
+
+class B : public A1 {};
+
+void f1() {
+  B v{};
+}
+
+// CIR: cir.func {{.*}} @_Z2f1v()
+// CIR:     %0 = cir.alloca !rec_B, !cir.ptr<!rec_B>, ["v", init]
+// CIR:     %1 = cir.base_class_addr %0 : !cir.ptr<!rec_B> nonnull [0] -> !cir.ptr<!rec_A1>
+// CIR:     cir.call @_ZN2A1C2Ev(%1) : (!cir.ptr<!rec_A1>) -> ()
+// CIR:     cir.return
+// LLVM: define dso_local void @_Z2f1v()
+// LLVM:    %1 = alloca %class.B, i64 1, align 1
+// LLVM:    call void @_ZN2A1C2Ev(ptr %1)
+// LLVM:    ret void
+
+struct A2 {
+    A2();
+};
+class C : public A1, public A2 {};
+
+void f2() {
+  C v{};
+}
+
+// CIR: cir.func {{.*}} @_Z2f2v()
+// CIR:     %0 = cir.alloca !rec_C, !cir.ptr<!rec_C>, ["v", init]
+// CIR:     %1 = cir.base_class_addr %0 : !cir.ptr<!rec_C> nonnull [0] -> !cir.ptr<!rec_A1>
+// CIR:     cir.call @_ZN2A1C2Ev(%1) : (!cir.ptr<!rec_A1>) -> ()
+// CIR:     %2 = cir.base_class_addr %0 : !cir.ptr<!rec_C> nonnull [0] -> !cir.ptr<!rec_A2>
+// CIR:     cir.call @_ZN2A2C2Ev(%2) : (!cir.ptr<!rec_A2>) -> ()
+// CIR:     cir.return
+// LLVM: define dso_local void @_Z2f2v()
+// LLVM:    %1 = alloca %class.C, i64 1, align 1
+// LLVM:    call void @_ZN2A1C2Ev(ptr %1)
+// LLVM:    call void @_ZN2A2C2Ev(ptr %1)
+// LLVM:    ret void
+
+struct A3 {
+    A3();
+    ~A3();
+};
+class D : public A3 {};
+
+void f3() {
+  D v{};
+}
+
+// CIR: cir.func {{.*}} @_Z2f3v()
+// CIR:     %0 = cir.alloca !rec_D, !cir.ptr<!rec_D>, ["v", init]
+// CIR:     %1 = cir.base_class_addr %0 : !cir.ptr<!rec_D> nonnull [0] -> !cir.ptr<!rec_A3>
+// CIR:     cir.call @_ZN2A3C2Ev(%1) : (!cir.ptr<!rec_A3>) -> ()
+// CIR:     cir.call @_ZN1DD1Ev(%0) : (!cir.ptr<!rec_D>) -> ()
+// CIR:     cir.return
+// LLVM: define dso_local void @_Z2f3v()
+// LLVM:    %1 = alloca %class.D, i64 1, align 1
+// LLVM:    call void @_ZN2A3C2Ev(ptr %1)
+// LLVM:    call void @_ZN1DD1Ev(ptr %1)
+// LLVM:    ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/agg-init.cpp b/clang/test/CIR/Incubator/CodeGen/agg-init.cpp
new file mode 100644
index 0000000000000..577f67e735291
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/agg-init.cpp
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: !rec_yep_ = !cir.record<struct "yep_" {!u32i, !u32i}>
+
+typedef enum xxy_ {
+  xxy_Low = 0,
+  xxy_High = 0x3f800000,
+  xxy_EnumSize = 0x7fffffff
+} xxy;
+
+typedef struct yep_ {
+  unsigned int Status;
+  xxy HC;
+} yop;
+
+void use() { yop{}; }
+
+// CHECK: cir.func {{.*}} @_Z3usev()
+// CHECK:   %0 = cir.alloca !rec_yep_, !cir.ptr<!rec_yep_>, ["agg.tmp.ensured"] {alignment = 4 : i64}
+// CHECK:   %1 = cir.get_member %0[0] {name = "Status"} : !cir.ptr<!rec_yep_> -> !cir.ptr<!u32i>
+// CHECK:   %2 = cir.const #cir.int<0> : !u32i
+// CHECK:   cir.store{{.*}} %2, %1 : !u32i, !cir.ptr<!u32i>
+// CHECK:   %3 = cir.get_member %0[1] {name = "HC"} : !cir.ptr<!rec_yep_> -> !cir.ptr<!u32i>
+// CHECK:   %4 = cir.const #cir.int<0> : !u32i
+// CHECK:   cir.store{{.*}} %4, %3 : !u32i, !cir.ptr<!u32i>
+// CHECK:   cir.return
+// CHECK: }
+
+typedef unsigned long long Flags;
+
+typedef enum XType {
+    A = 0,
+    Y = 1000066001,
+    X = 1000070000
+} XType;
+
+typedef struct Yo {
+    XType type;
+    const void* __attribute__((__may_alias__)) next;
+    Flags createFlags;
+} Yo;
+
+void yo() {
+  Yo ext = {X};
+  Yo ext2 = {Y, &ext};
+}
+
+// CHECK: cir.func {{.*}} @_Z2yov()
+// CHECK:   %0 = cir.alloca !rec_Yo, !cir.ptr<!rec_Yo>, ["ext", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !rec_Yo, !cir.ptr<!rec_Yo>, ["ext2", init] {alignment = 8 : i64}
+// CHECK:   %2 = cir.const #cir.const_record<{#cir.int<1000070000> : !u32i, #cir.ptr<null> : !cir.ptr<!void>, #cir.int<0> : !u64i}> : !rec_Yo
+// CHECK:   cir.store{{.*}} %2, %0 : !rec_Yo, !cir.ptr<!rec_Yo>
+// CHECK:   %3 = cir.get_member %1[0] {name = "type"} : !cir.ptr<!rec_Yo> -> !cir.ptr<!u32i>
+// CHECK:   %4 = cir.const #cir.int<1000066001> : !u32i
+// CHECK:   cir.store{{.*}} %4, %3 : !u32i, !cir.ptr<!u32i>
+// CHECK:   %5 = cir.get_member %1[1] {name = "next"} : !cir.ptr<!rec_Yo> -> !cir.ptr<!cir.ptr<!void>>
+// CHECK:   %6 = cir.cast bitcast %0 : !cir.ptr<!rec_Yo> -> !cir.ptr<!void>
+// CHECK:   cir.store{{.*}} %6, %5 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK:   %7 = cir.get_member %1[2] {name = "createFlags"} : !cir.ptr<!rec_Yo> -> !cir.ptr<!u64i>
+// CHECK:   %8 = cir.const #cir.int<0> : !u64i
+// CHECK:   cir.store{{.*}} %8, %7 : !u64i, !cir.ptr<!u64i>
diff --git a/clang/test/CIR/Incubator/CodeGen/agg-init2.cpp b/clang/test/CIR/Incubator/CodeGen/agg-init2.cpp
new file mode 100644
index 0000000000000..1b08ba558a39a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/agg-init2.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -Wno-unused-value -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+// CHECK: !rec_Zero = !cir.record<struct "Zero" padded {!u8i}>
+
+struct Zero {
+  void yolo();
+};
+
+void f() {
+  Zero z0 = Zero();
+  // {} no element init.
+  Zero z1 = Zero{};
+}
+
+// TODO: In this case, z1 gets "initialized" with an undef value. Should we
+//       treat that as uninitialized? Should it even be happening?
+
+// Trivial default constructor call is lowered away since it does nothing.
+// CHECK: cir.func {{.*}} @_Z1fv()
+// CHECK:     %[[Z0:.*]] = cir.alloca !rec_Zero, !cir.ptr<!rec_Zero>, ["z0", init]
+// CHECK:     %[[Z1:.*]] = cir.alloca !rec_Zero, !cir.ptr<!rec_Zero>, ["z1", init]
+// CHECK-NOT: cir.call @_ZN4ZeroC1Ev
+// CHECK:     %[[UNDEF:.*]] = cir.const #cir.undef : !rec_Zero
+// CHECK:     cir.store{{.*}} %[[UNDEF]], %[[Z1]] : !rec_Zero, !cir.ptr<!rec_Zero>
+// CHECK:     cir.return
+
+// LLVM-LABEL: define {{.*}} @_Z1fv()
+// LLVM-NOT:     call {{.*}} @_ZN4ZeroC1Ev
+// LLVM:         ret void
+
+// OGCG-LABEL: define {{.*}} @_Z1fv()
+// OGCG-NOT:     call {{.*}} @_ZN4ZeroC1Ev
+// OGCG:         ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/align-load.c b/clang/test/CIR/Incubator/CodeGen/align-load.c
new file mode 100644
index 0000000000000..a2e386d7f9169
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/align-load.c
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+struct S {
+  char b;
+  short s;
+  int i;
+  float f;
+  double d;
+};
+
+void accessStruct(struct S u) {
+  u.b;
+  u.s;
+  u.i;
+  u.f;
+  u.d;
+}
+
+// CIR: cir.func {{.*}} @accessStruct
+// CIR:   cir.load align(8)
+// CIR:   cir.load align(2)
+// CIR:   cir.load align(4)
+// CIR:   cir.load align(8)
+// CIR:   cir.load align(8)
+
+// LLVM: define{{.*}} @accessStruct
+// LLVM:   load i8, ptr {{.*}}, align 8
+// LLVM:   load i16, ptr {{.*}}, align 2
+// LLVM:   load i32, ptr {{.*}}, align 4
+// LLVM:   load float, ptr {{.*}}, align 8
+// LLVM:   load double, ptr {{.*}}, align 8
+
+// OGCG: define{{.*}} @accessStruct
+// OGCG:   load i8, ptr {{.*}}, align 8
+// OGCG:   load i16, ptr {{.*}}, align 2
+// OGCG:   load i32, ptr {{.*}}, align 4
+// OGCG:   load float, ptr {{.*}}, align 8
+// OGCG:   load double, ptr {{.*}}, align 8
+
+union U {
+  char b;
+  short s;
+  int i;
+  float f;
+  double d;
+};
+
+void accessUnion(union U u) {
+  u.b;
+  u.s;
+  u.i;
+  u.f;
+  u.d;
+}
+
+// CIR: cir.func {{.*}} @accessUnion
+// CIR:   cir.load align(8)
+// CIR:   cir.load align(8)
+// CIR:   cir.load align(8)
+// CIR:   cir.load align(8)
+// CIR:   cir.load align(8)
+
+// LLVM: define{{.*}} @accessUnion
+// LLVM:   load i8, ptr {{.*}}, align 8
+// LLVM:   load i16, ptr {{.*}}, align 8
+// LLVM:   load i32, ptr {{.*}}, align 8
+// LLVM:   load float, ptr {{.*}}, align 8
+// LLVM:   load double, ptr {{.*}}, align 8
+
+// OGCG: define{{.*}} @accessUnion
+// OGCG:   load i8, ptr {{.*}}, align 8
+// OGCG:   load i16, ptr {{.*}}, align 8
+// OGCG:   load i32, ptr {{.*}}, align 8
+// OGCG:   load float, ptr {{.*}}, align 8
+// OGCG:   load double, ptr {{.*}}, align 8
diff --git a/clang/test/CIR/Incubator/CodeGen/align-store.c b/clang/test/CIR/Incubator/CodeGen/align-store.c
new file mode 100644
index 0000000000000..9cc4282705de6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/align-store.c
@@ -0,0 +1,92 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+// PR5279 - Reduced alignment on typedef.
+typedef int myint __attribute__((aligned(1)));
+
+void test1(myint *p) {
+  *p = 0;
+}
+
+// CIR-LABEL: @test1
+// CIR:  cir.store align(1) %{{.*}}, %{{.*}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM: @test1
+// LLVM: store i32 0, ptr {{.*}}, align 1
+
+// OGCG: @test1
+// OGCG: store i32 0, ptr {{.*}}, align 1
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+typedef struct
+{
+   uint16_t i16;
+   uint32_t i32;
+   uint16_t i16_2;
+   uint32_t i32_2;
+} StructA;
+
+void test2(StructA* p) {
+  p->i16 = 1;
+  p->i32 = 2;
+  p->i16_2 = 3;
+  p->i32_2 = 4;
+}
+
+// CIR-LABEL: @test2
+// CIR:  cir.store align(4) %{{.*}}, %{{.*}} : !u16i, !cir.ptr<!u16i>
+// CIR:  cir.store align(4) %{{.*}}, %{{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR:  cir.store align(4) %{{.*}}, %{{.*}} : !u16i, !cir.ptr<!u16i>
+// CIR:  cir.store align(4) %{{.*}}, %{{.*}} : !u32i, !cir.ptr<!u32i>
+
+// LLVM: @test2
+// LLVM: store i16 1, ptr {{.*}}, align 4
+// LLVM: store i32 2, ptr {{.*}}, align 4
+// LLVM: store i16 3, ptr {{.*}}, align 4
+// LLVM: store i32 4, ptr {{.*}}, align 4
+
+// OGCG: @test2
+// OGCG: store i16 1, ptr {{.*}}, align 4
+// OGCG: store i32 2, ptr {{.*}}, align 4
+// OGCG: store i16 3, ptr {{.*}}, align 4
+// OGCG: store i32 4, ptr {{.*}}, align 4
+
+typedef struct {
+  short a;
+  short b;
+  short c;
+  short d;
+  long e;   // Make the struct 8-byte aligned
+} StructB;
+
+void test3(StructB *ptr) {
+  ptr->a = 1;  // align 8
+  ptr->b = 2;  // align 2
+  ptr->c = 3;  // align 4
+  ptr->d = 4;  // align 2
+}
+
+// CIR-LABEL: @test3
+// CIR:  cir.store align(8) %{{.*}}, %{{.*}} : !s16i, !cir.ptr<!s16i>
+// CIR:  cir.store align(2) %{{.*}}, %{{.*}} : !s16i, !cir.ptr<!s16i>
+// CIR:  cir.store align(4) %{{.*}}, %{{.*}} : !s16i, !cir.ptr<!s16i>
+// CIR:  cir.store align(2) %{{.*}}, %{{.*}} : !s16i, !cir.ptr<!s16i>
+
+// LLVM: @test3
+// LLVM: store i16 1, ptr {{.*}}, align 8
+// LLVM: store i16 2, ptr {{.*}}, align 2
+// LLVM: store i16 3, ptr {{.*}}, align 4
+// LLVM: store i16 4, ptr {{.*}}, align 2
+
+// OGCG: @test3
+// OGCG: store i16 1, ptr {{.*}}, align 8
+// OGCG: store i16 2, ptr {{.*}}, align 2
+// OGCG: store i16 3, ptr {{.*}}, align 4
+// OGCG: store i16 4, ptr {{.*}}, align 2
diff --git a/clang/test/CIR/Incubator/CodeGen/alignment.c b/clang/test/CIR/Incubator/CodeGen/alignment.c
new file mode 100644
index 0000000000000..c01eaa9ca6492
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/alignment.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+__attribute((aligned(32))) float a[128];
+union {int a[4]; __attribute((aligned(32))) float b[4];} b;
+
+// CIR: @a = #cir.zero {{.*}}alignment = 32
+// CIR: @b = #cir.zero{{.*}}alignment = 32
+
+// LLVM: @a = {{.*}}zeroinitializer, align 32
+// LLVM: @b = {{.*}}zeroinitializer, align 32
+
+// OGCG: @a = {{.*}}zeroinitializer, align 32
+// OGCG: @b = {{.*}}zeroinitializer, align 32
+
+long long int test5[1024];
+// CIR: @test5 = #cir.zero {{.*}}alignment = 16
+// LLVM: @test5 = {{.*}}global [1024 x i64] zeroinitializer, align 16
+// OGCG: @test5 = {{.*}}global [1024 x i64] zeroinitializer, align 16
diff --git a/clang/test/CIR/Incubator/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CIR/Incubator/CodeGen/amdgpu-address-spaces.cpp
new file mode 100644
index 0000000000000..35ceed46189dc
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/amdgpu-address-spaces.cpp
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// Test address space handling for AMDGPU target in C++ mode (non-OpenCL/HIP).
+// This exercises getGlobalVarAddressSpace.
+
+// Test default address space for globals without explicit AS.
+// For AMDGPU in non-OpenCL/HIP mode, globals default to AS 1 (global).
+int globalVar = 123;
+
+// CIR-DAG: cir.global external lang_address_space(offload_global) @globalVar = #cir.int<123> : !s32i
+// LLVM-DAG: @globalVar = addrspace(1) global i32 123, align 4
+// OGCG-DAG: @globalVar = addrspace(1) global i32 123, align 4
+
+// Test non-const global array goes to global AS.
+int globalArray[4] = {1, 2, 3, 4};
+
+// CIR-DAG: cir.global external lang_address_space(offload_global) @globalArray = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 4>
+// LLVM-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4
+// OGCG-DAG: @globalArray = addrspace(1) global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4
+
+// Test static global goes to global AS.
+static int staticGlobal = 555;
+
+// CIR-DAG: cir.global "private" internal{{.*}}lang_address_space(offload_global) @_ZL12staticGlobal = #cir.int<555> : !s32i
+// LLVM-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4
+// OGCG-DAG: @_ZL12staticGlobal = internal addrspace(1) global i32 555, align 4
+
+// Test constant initialization promotion to AS 4 (constant).
+// Use extern to force emission since const globals are otherwise optimized away.
+extern const int constGlobal = 456;
+
+// CIR-DAG: cir.global constant external target_address_space(4) @constGlobal = #cir.int<456> : !s32i
+// LLVM-DAG: @constGlobal = addrspace(4) constant i32 456, align 4
+// OGCG-DAG: @constGlobal = addrspace(4) constant i32 456, align 4
+
+// Test extern const array goes to constant AS.
+extern const int constArray[3] = {10, 20, 30};
+
+// CIR-DAG: cir.global constant external target_address_space(4) @constArray = #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3>
+// LLVM-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4
+// OGCG-DAG: @constArray = addrspace(4) constant [3 x i32] [i32 10, i32 20, i32 30], align 4
+
+// Use the static variable to ensure it's emitted.
+int getStaticGlobal() { return staticGlobal; }
diff --git a/clang/test/CIR/Incubator/CodeGen/analysis-only.cpp b/clang/test/CIR/Incubator/CodeGen/analysis-only.cpp
new file mode 100644
index 0000000000000..7f427f0de92fd
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/analysis-only.cpp
@@ -0,0 +1,8 @@
+// Check `-fclangir-analysis-only` would generate code correctly.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir-analysis-only -std=c++20 \
+// RUN:     -O2 -emit-llvm %s -o - | FileCheck %s
+
+extern "C" void foo() {}
+
+// CHECK: define{{.*}} @foo(
+
diff --git a/clang/test/CIR/Incubator/CodeGen/annotations-declaration.c b/clang/test/CIR/Incubator/CodeGen/annotations-declaration.c
new file mode 100644
index 0000000000000..42791f09dd772
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/annotations-declaration.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+__attribute__((annotate("bar"))) int foo();
+
+int main() {
+  return foo();
+}
+
+// CIR: module {{.*}}annotations-declaration.c" attributes
+// CIR-SAME: {cir.global_annotations = #cir<global_annotations [
+// CIR-SAME: ["foo", #cir.annotation<name = "bar", args = []>]
+
+// LLVM: target triple
+// LLVM-DAG: private unnamed_addr constant [4 x i8] c"bar\00", section "llvm.metadata"
+
+// LLVM: @llvm.global.annotations = appending global [1 x { ptr, ptr, ptr, i32, ptr }] [{
+// LLVM-SAME: { ptr @foo,
+// LLVM-SAME: }], section "llvm.metadata"
+
+// OGCG: target triple
+// OGCG-DAG: private unnamed_addr constant [4 x i8] c"bar\00", section "llvm.metadata"
+
+// OGCG: @llvm.global.annotations = appending global [1 x { ptr, ptr, ptr, i32, ptr }] [{
+// OGCG-SAME: { ptr @foo,
+// OGCG-SAME: }], section "llvm.metadata"
diff --git a/clang/test/CIR/Incubator/CodeGen/annotations-var.c b/clang/test/CIR/Incubator/CodeGen/annotations-var.c
new file mode 100644
index 0000000000000..7d7fb31be260b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/annotations-var.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+// CIR-DAG:  cir.global external @globalvar = #cir.int<3> : !s32i [#cir.annotation<name = "globalvar_ann_0", args = []>] {alignment = 4 : i64}
+// CIR-DAG:  cir.global external @globalvar2 = #cir.int<2> : !s32i [#cir.annotation<name = "common_ann", args = ["os", 21 : i32]>] {alignment = 4 : i64}
+
+// LLVM-DAG: @.str.annotation = private unnamed_addr constant [15 x i8] c"localvar_ann_0\00", section "llvm.metadata"
+// LLVM-DAG: @.str.1.annotation = private unnamed_addr constant [{{[0-9]+}} x i8] c"{{.*}}annotations-var.c\00", section "llvm.metadata"
+// LLVM-DAG: @.str.2.annotation = private unnamed_addr constant [15 x i8] c"localvar_ann_1\00", section "llvm.metadata"
+// LLVM-DAG: @.str.3.annotation = private unnamed_addr constant [11 x i8] c"common_ann\00", section "llvm.metadata"
+// LLVM-DAG: @.str.annotation.arg = private unnamed_addr constant [3 x i8] c"os\00", align 1
+// LLVM-DAG: @.args.annotation = private unnamed_addr constant { ptr, i32 } { ptr @.str.annotation.arg, i32 21 }, section "llvm.metadata"
+// LLVM-DAG: @.str.4.annotation = private unnamed_addr constant [16 x i8] c"globalvar_ann_0\00", section "llvm.metadata"
+// LLVM-DAG: @llvm.global.annotations = appending global [2 x { ptr, ptr, ptr, i32, ptr }]
+// LLVM-DAG-SAME: [{ ptr, ptr, ptr, i32, ptr } { ptr @globalvar, ptr @.str.4.annotation, ptr @.str.1.annotation, i32 18, ptr null }, { ptr, ptr, ptr, i32, ptr }
+// LLVM-DAG-SAME: { ptr @globalvar2, ptr @.str.3.annotation, ptr @.str.1.annotation, i32 19, ptr @.args.annotation }], section "llvm.metadata"
+
+int globalvar __attribute__((annotate("globalvar_ann_0"))) = 3;
+int globalvar2 __attribute__((annotate("common_ann", "os", 21))) = 2;
+void local(void) {
+    int localvar __attribute__((annotate("localvar_ann_0"))) __attribute__((annotate("localvar_ann_1"))) = 3;
+    int localvar2 __attribute__((annotate("localvar_ann_0"))) = 3;
+    int localvar3 __attribute__((annotate("common_ann", "os", 21)))  = 3;
+// CIR-LABEL: @local
+// CIR: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["localvar", init] [#cir.annotation<name = "localvar_ann_0", args = []>, #cir.annotation<name = "localvar_ann_1", args = []>]
+// CIR: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["localvar2", init] [#cir.annotation<name = "localvar_ann_0", args = []>]
+// CIR: %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["localvar3", init] [#cir.annotation<name = "common_ann", args = ["os", 21 : i32]>]
+
+
+// LLVM-LABEL: @local
+// LLVM: %[[ALLOC:.*]] = alloca i32
+// LLVM: call void @llvm.var.annotation.p0.p0(ptr %[[ALLOC]], ptr @.str.annotation, ptr @.str.1.annotation, i32 23, ptr null)
+// LLVM: call void @llvm.var.annotation.p0.p0(ptr %[[ALLOC]], ptr @.str.2.annotation, ptr @.str.1.annotation, i32 23, ptr null)
+// LLVM: %[[ALLOC2:.*]] = alloca i32
+// LLVM: call void @llvm.var.annotation.p0.p0(ptr %[[ALLOC2]], ptr @.str.annotation, ptr @.str.1.annotation, i32 24, ptr null)
+// LLVM: %[[ALLOC3:.*]] = alloca i32
+// LLVM: call void @llvm.var.annotation.p0.p0(ptr %[[ALLOC3]], ptr @.str.3.annotation, 
+// LLVM-SAME: ptr @.str.1.annotation, i32 25, ptr @.args.annotation)
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/applearm64-array-cookies.cpp b/clang/test/CIR/Incubator/CodeGen/applearm64-array-cookies.cpp
new file mode 100644
index 0000000000000..eea677a09606c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/applearm64-array-cookies.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -std=c++20 -triple=arm64e-apple-darwin -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+class C {
+  public:
+    ~C();
+};
+
+void t_constant_size_nontrivial() {
+  auto p = new C[3];
+}
+
+// CHECK:  cir.func{{.*}} @_Z26t_constant_size_nontrivialv()
+// CHECK:    %[[#NUM_ELEMENTS:]] = cir.const #cir.int<3> : !u64i
+// CHECK:    %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<3> : !u64i
+// CHECK:    %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<19> : !u64i
+// CHECK:    %[[#ALLOC_PTR:]] = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %[[#COOKIE_PTR:]] = cir.cast bitcast %[[#ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CHECK:    %[[#ELEMENT_SIZE:]] = cir.const #cir.int<1> : !u64i
+// CHECK:    cir.store{{.*}} %[[#ELEMENT_SIZE]], %[[#COOKIE_PTR]] : !u64i, !cir.ptr<!u64i>
+// CHECK:    %[[#SECOND_COOKIE_OFFSET:]] = cir.const #cir.int<1> : !s32i
+// CHECK:    %[[#COOKIE_PTR2:]] = cir.ptr_stride %[[#COOKIE_PTR]], %[[#SECOND_COOKIE_OFFSET]] : (!cir.ptr<!u64i>, !s32i) -> !cir.ptr<!u64i>
+// CHECK:    cir.store{{.*}} %[[#NUM_ELEMENTS]], %[[#COOKIE_PTR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK:    %[[#COOKIE_SIZE:]] = cir.const #cir.int<16> : !s32i
+// CHECK:    %[[#ALLOC_AS_I8:]] = cir.cast bitcast %[[#ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!u8i>
+// CHECK:    cir.ptr_stride %[[#ALLOC_AS_I8]], %[[#COOKIE_SIZE]] : (!cir.ptr<!u8i>, !s32i) -> !cir.ptr<!u8i>
+
+class D {
+  public:
+    int x;
+    ~D();
+};
+
+void t_constant_size_nontrivial2() {
+  auto p = new D[3];
+}
+
+// In this test SIZE_WITHOUT_COOKIE isn't used, but it would be if there were
+// an initializer.
+
+// CHECK:  cir.func{{.*}} @_Z27t_constant_size_nontrivial2v()
+// CHECK:    %[[#NUM_ELEMENTS:]] = cir.const #cir.int<3> : !u64i
+// CHECK:    %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<12> : !u64i
+// CHECK:    %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<28> : !u64i
+// CHECK:    %[[#ALLOC_PTR:]] = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %[[#COOKIE_PTR:]] = cir.cast bitcast %[[#ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CHECK:    %[[#ELEMENT_SIZE:]] = cir.const #cir.int<4> : !u64i
+// CHECK:    cir.store{{.*}} %[[#ELEMENT_SIZE]], %[[#COOKIE_PTR]] : !u64i, !cir.ptr<!u64i>
+// CHECK:    %[[#SECOND_COOKIE_OFFSET:]] = cir.const #cir.int<1> : !s32i
+// CHECK:    %[[#COOKIE_PTR2:]] = cir.ptr_stride %[[#COOKIE_PTR]], %[[#SECOND_COOKIE_OFFSET]] : (!cir.ptr<!u64i>, !s32i) -> !cir.ptr<!u64i>
+// CHECK:    cir.store{{.*}} %[[#NUM_ELEMENTS]], %[[#COOKIE_PTR2]] : !u64i, !cir.ptr<!u64i>
+// CHECK:    %[[#COOKIE_SIZE:]] = cir.const #cir.int<16> : !s32i
+// CHECK:    %[[#ALLOC_AS_I8:]] = cir.cast bitcast %[[#ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!u8i>
+// CHECK:    cir.ptr_stride %[[#ALLOC_AS_I8]], %[[#COOKIE_SIZE]] : (!cir.ptr<!u8i>, !s32i) -> !cir.ptr<!u8i>
diff --git a/clang/test/CIR/Incubator/CodeGen/array-init-destroy.cpp b/clang/test/CIR/Incubator/CodeGen/array-init-destroy.cpp
new file mode 100644
index 0000000000000..938931c2d8720
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/array-init-destroy.cpp
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir &> %t1.cir
+// RUN: FileCheck --input-file=%t1.cir -check-prefix=BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir &> %t2.cir
+// RUN: FileCheck --input-file=%t2.cir -check-prefix=AFTER %s
+// Note: The run lines above send the final CIR to %t.cir, but that's ignored.
+//       The test checks the CIR before and after the cir-lowering-prepare pass.
+void foo() noexcept;
+
+class xpto {
+public:
+  xpto() {
+    foo();
+  }
+  int i;
+  float f;
+  ~xpto() {
+    foo();
+  }
+};
+
+void x() {
+  xpto array[2];
+}
+
+// BEFORE: cir.func {{.*}} @_Z1xv()
+// BEFORE:   %[[ArrayAddr:.*]] = cir.alloca !cir.array<!rec_xpto x 2>
+
+// BEFORE:   cir.array.ctor(%[[ArrayAddr]] : !cir.ptr<!cir.array<!rec_xpto x 2>>) {
+// BEFORE:   ^bb0(%arg0: !cir.ptr<!rec_xpto>
+// BEFORE:     cir.call @_ZN4xptoC1Ev(%arg0) : (!cir.ptr<!rec_xpto>) -> ()
+// BEFORE:     cir.yield
+// BEFORE:   }
+
+// BEFORE:   cir.array.dtor(%[[ArrayAddr]] : !cir.ptr<!cir.array<!rec_xpto x 2>>) {
+// BEFORE:   ^bb0(%arg0: !cir.ptr<!rec_xpto>
+// BEFORE:     cir.call @_ZN4xptoD1Ev(%arg0) : (!cir.ptr<!rec_xpto>) -> ()
+// BEFORE:     cir.yield
+// BEFORE:   }
+
+// AFTER: cir.func {{.*}} @_Z1xv()
+// AFTER: %[[ArrayAddr0:.*]] = cir.alloca !cir.array<!rec_xpto x 2>
+// AFTER: %[[ConstTwo:.*]] = cir.const #cir.int<2> : !u64i
+// AFTER: %[[ArrayBegin:.*]] = cir.cast array_to_ptrdecay %[[ArrayAddr0]] : !cir.ptr<!cir.array<!rec_xpto x 2>> -> !cir.ptr<!rec_xpto>
+// AFTER: %[[ArrayPastEnd:.*]] = cir.ptr_stride %[[ArrayBegin]], %[[ConstTwo]] : (!cir.ptr<!rec_xpto>, !u64i) -> !cir.ptr<!rec_xpto>
+// AFTER: %[[TmpIdx:.*]] = cir.alloca !cir.ptr<!rec_xpto>, !cir.ptr<!cir.ptr<!rec_xpto>>, ["__array_idx"] {alignment = 1 : i64}
+// AFTER: cir.store %[[ArrayBegin]], %[[TmpIdx]] : !cir.ptr<!rec_xpto>, !cir.ptr<!cir.ptr<!rec_xpto>>
+// AFTER: cir.do {
+// AFTER:   %[[ArrayElt:.*]] = cir.load %[[TmpIdx]] : !cir.ptr<!cir.ptr<!rec_xpto>>, !cir.ptr<!rec_xpto>
+// AFTER:   cir.call @_ZN4xptoC1Ev(%[[ArrayElt]]) : (!cir.ptr<!rec_xpto>) -> ()
+// AFTER:   %[[ConstOne:.*]] = cir.const #cir.int<1> : !u64i
+// AFTER:   %[[NextElt:.*]] = cir.ptr_stride %[[ArrayElt]], %[[ConstOne]] : (!cir.ptr<!rec_xpto>, !u64i) -> !cir.ptr<!rec_xpto>
+// AFTER:   cir.store %[[NextElt]], %[[TmpIdx]] : !cir.ptr<!rec_xpto>, !cir.ptr<!cir.ptr<!rec_xpto>>
+// AFTER:   cir.yield
+// AFTER: } while {
+// AFTER:   %[[ArrayElt:.*]] = cir.load %[[TmpIdx]] : !cir.ptr<!cir.ptr<!rec_xpto>>, !cir.ptr<!rec_xpto>
+// AFTER:   %[[ExitCond:.*]] = cir.cmp(ne, %[[ArrayElt]], %[[ArrayPastEnd]]) : !cir.ptr<!rec_xpto>, !cir.bool
+// AFTER:   cir.condition(%[[ExitCond]])
+// AFTER: }
+// AFTER: %[[ConstOne:.*]] = cir.const #cir.int<1> : !u64i
+// AFTER: %[[ArrayBegin:.*]] = cir.cast array_to_ptrdecay %[[ArrayAddr0]] : !cir.ptr<!cir.array<!rec_xpto x 2>> -> !cir.ptr<!rec_xpto>
+// AFTER: %[[ArrayEnd:.*]] = cir.ptr_stride %[[ArrayBegin]], %[[ConstOne]] : (!cir.ptr<!rec_xpto>, !u64i) -> !cir.ptr<!rec_xpto>
+// AFTER: %[[TmpIdx:.*]] = cir.alloca !cir.ptr<!rec_xpto>, !cir.ptr<!cir.ptr<!rec_xpto>>, ["__array_idx"] {alignment = 1 : i64}
+// AFTER: cir.store %[[ArrayEnd]], %[[TmpIdx]] : !cir.ptr<!rec_xpto>, !cir.ptr<!cir.ptr<!rec_xpto>>
+// AFTER  cir.do {
+// AFTER    %[[ArrElt:.*]] = cir.load{{.*}} %[[TmpIdx]]
+// AFTER    cir.call @_ZN13array_elementD1Ev(%[[ArrElt]])  : (!cir.ptr<!rec_xpto>) -> ()
+// AFTER    %[[ConstNegOne:.*]] = cir.const #cir.int<-1> : !s64i
+// AFTER    %[[NextElt:.*]] = cir.ptr_stride %[[ARR_CUR]], %[[ConstNegOne]] : (!cir.ptr<!rec_xpto>, !s64i) -> !cir.ptr<!rec_xpto>
+// AFTER    cir.store %[[NextElt]], %[[TmpIdx]] : !cir.ptr<!rec_xpto>, !cir.ptr<!cir.ptr<!rec_xpto>>
+// AFTER    cir.yield
+// AFTER  } while {
+// AFTER    %[[ArrElt:.*]] = cir.load %[[TmpIdx]] : !cir.ptr<!cir.ptr<!rec_xpto>>, !cir.ptr<!rec_xpto>
+// AFTER:   %[[ExitCond:.*]] = cir.cmp(ne, %[[ArrayElt]], %[[ArrayBegin]]) : !cir.ptr<!rec_xpto>, !cir.bool
+// AFTER    cir.condition(%[[ExitCond]])
+// AFTER   }
+// AFTER: cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/array-init-partial.cpp b/clang/test/CIR/Incubator/CodeGen/array-init-partial.cpp
new file mode 100644
index 0000000000000..b6299840bf897
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/array-init-partial.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+struct HasDtor {
+  HasDtor();
+  HasDtor(int x);
+  ~HasDtor();
+  int value;
+};
+
+// Test: Partial explicit initialization (3 of 5 elements)
+// This should create endOfInit tracking for exception safety
+
+// CIR-LABEL: @_Z{{.*}}test_partial
+// CIR: arrayinit.endOfInit
+// CIR: cir.call @_ZN7HasDtorC1Ei
+// CIR: cir.call @_ZN7HasDtorC1Ei
+// CIR: cir.call @_ZN7HasDtorC1Ei
+// CIR: cir.do {
+// CIR: cir.call @_ZN7HasDtorC1Ev
+// CIR: cir.do {
+// CIR: cir.call @_ZN7HasDtorD1Ev
+
+// LLVM-LABEL: define {{.*}}void @_Z12test_partialv()
+// LLVM: %[[ENDOFINIT:.*]] = alloca ptr
+// LLVM: %[[ARR:.*]] = alloca [5 x %struct.HasDtor]
+// LLVM: store ptr %{{.*}}, ptr %[[ENDOFINIT]]
+// LLVM: call void @_ZN7HasDtorC1Ei(ptr %{{.*}}, i32 1)
+// LLVM: store ptr %{{.*}}, ptr %[[ENDOFINIT]]
+// LLVM: call void @_ZN7HasDtorC1Ei(ptr %{{.*}}, i32 2)
+// LLVM: store ptr %{{.*}}, ptr %[[ENDOFINIT]]
+// LLVM: call void @_ZN7HasDtorC1Ei(ptr %{{.*}}, i32 3)
+// LLVM: call void @_ZN7HasDtorC1Ev(ptr %{{.*}})
+// LLVM: call void @_ZN7HasDtorD1Ev(ptr %{{.*}})
+
+// OGCG-LABEL: define {{.*}}void @_Z12test_partialv()
+// OGCG: %[[ARR:.*]] = alloca [5 x %struct.HasDtor]
+// OGCG: call void @_ZN7HasDtorC1Ei(ptr {{.*}} %[[ARR]], i32 {{.*}} 1)
+// OGCG: %[[ELEM1:.*]] = getelementptr {{.*}} %struct.HasDtor, ptr %[[ARR]], i64 1
+// OGCG: call void @_ZN7HasDtorC1Ei(ptr {{.*}} %[[ELEM1]], i32 {{.*}} 2)
+// OGCG: %[[ELEM2:.*]] = getelementptr {{.*}} %struct.HasDtor, ptr %[[ARR]], i64 2
+// OGCG: call void @_ZN7HasDtorC1Ei(ptr {{.*}} %[[ELEM2]], i32 {{.*}} 3)
+// OGCG: call void @_ZN7HasDtorC1Ev(ptr {{.*}})
+// OGCG: call void @_ZN7HasDtorD1Ev(ptr {{.*}})
+
+void test_partial() {
+  HasDtor arr[5] = {HasDtor(1), HasDtor(2), HasDtor(3)};
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/array-init.c b/clang/test/CIR/Incubator/CodeGen/array-init.c
new file mode 100644
index 0000000000000..f8ed55bdc15b7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/array-init.c
@@ -0,0 +1,221 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// CIR-DAG: cir.global "private" constant cir_private @__const.foo.bar = #cir.const_array<[#cir.fp<9.000000e+00> : !cir.double, #cir.fp<8.000000e+00> : !cir.double, #cir.fp<7.000000e+00> : !cir.double]> : !cir.array<!cir.double x 3>
+typedef struct {
+  int a;
+  long b;
+} T;
+
+// Test array initialization with different elements.
+typedef struct {
+     long a0;
+     int a1;
+} Inner;
+typedef struct {
+     int b0;
+     Inner b1[1];
+} Outer;
+Outer outers[2] = {
+    {1, {0, 1} },
+    {1, {0, 0} }
+};
+// CIR:  cir.global{{.*}} @outers =
+// CIR-SAME: #cir.const_record<{
+// CIR-SAME:   #cir.const_record<{
+// CIR-SAME:     #cir.int<1> : !s32i,
+// CIR-SAME:     #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 4>,
+// CIR-SAME:     #cir.const_array<[
+// CIR-SAME:       #cir.const_record<{#cir.int<0> : !s64i,
+// CIR-SAME:                          #cir.int<1> : !s32i,
+// CIR-SAME:                          #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 4>
+// CIR-SAME:       }> : !rec_anon_struct
+// CIR-SAME:     ]> : !cir.array<!rec_anon_struct x 1>
+// CIR-SAME:   }> : !rec_anon_struct2,
+// CIR-SAME:   #cir.const_record<{#cir.int<1> : !s32i,
+// CIR-SAME:                      #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 4>,
+// CIR-SAME:                      #cir.zero : !cir.array<!rec_Inner x 1>
+// CIR-SAME:   }> : !rec_anon_struct1
+// CIR-SAME: }> : !rec_anon_struct3
+// LLVM: @outers = {{.*}}global
+// LLVM-SAME: {
+// LLVM-SAME:   { i32, [4 x i8], [1 x { i64, i32, [4 x i8] }] },
+// LLVM-SAME:   { i32, [4 x i8], [1 x %struct.Inner] }
+// LLVM-SAME: }
+// LLVM-SAME: {
+// LLVM-SAME:   { i32, [4 x i8], [1 x { i64, i32, [4 x i8] }] }
+// LLVM-SAME:    { i32 1, [4 x i8] zeroinitializer, [1 x { i64, i32, [4 x i8] }] [{ i64, i32, [4 x i8] } { i64 0, i32 1, [4 x i8] zeroinitializer }] },
+// LLVM-SAME:   { i32, [4 x i8], [1 x %struct.Inner] }
+// LLVM-SAME:    { i32 1, [4 x i8] zeroinitializer, [1 x %struct.Inner] zeroinitializer }
+// LLVM-SAME: }
+
+void buz(int x) {
+  T arr[] = { {x, x}, {0, 0} };
+}
+// CIR: cir.func {{.*}} @buz
+// CIR-NEXT: [[X_ALLOCA:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CIR-NEXT: [[ARR:%.*]] = cir.alloca !cir.array<!rec_T x 2>, !cir.ptr<!cir.array<!rec_T x 2>>, ["arr", init] {alignment = 16 : i64}
+// CIR-NEXT: cir.store{{.*}} %arg0, [[X_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT: [[FI_EL:%.*]] = cir.get_element [[ARR]][[[ZERO]]] : (!cir.ptr<!cir.array<!rec_T x 2>>, !s32i) -> !cir.ptr<!rec_T>
+// CIR-NEXT: [[A_STORAGE0:%.*]] = cir.get_member [[FI_EL]][0] {name = "a"} : !cir.ptr<!rec_T> -> !cir.ptr<!s32i>
+// CIR-NEXT: [[XA_VAL:%.*]] = cir.load{{.*}} [[X_ALLOCA]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.store{{.*}} [[XA_VAL]], [[A_STORAGE0]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: [[B_STORAGE0:%.*]] = cir.get_member [[FI_EL]][1] {name = "b"} : !cir.ptr<!rec_T> -> !cir.ptr<!s64i>
+// CIR-NEXT: [[XB_VAL:%.*]] = cir.load{{.*}} [[X_ALLOCA]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: [[XB_CASTED:%.*]] = cir.cast integral [[XB_VAL]] : !s32i -> !s64i
+// CIR-NEXT: cir.store{{.*}} [[XB_CASTED]], [[B_STORAGE0]] : !s64i, !cir.ptr<!s64i>
+// CIR-NEXT: [[ONE:%.*]] = cir.const #cir.int<1> : !s64i
+// CIR-NEXT: [[SE_EL:%.*]] = cir.get_element [[ARR]][[[ONE]]] : (!cir.ptr<!cir.array<!rec_T x 2>>, !s64i) -> !cir.ptr<!rec_T>
+// CIR-NEXT: [[A_STORAGE1:%.*]] = cir.get_member [[SE_EL]][0] {name = "a"} : !cir.ptr<!rec_T> -> !cir.ptr<!s32i>
+// CIR-NEXT: [[A1_ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT: cir.store{{.*}} [[A1_ZERO]], [[A_STORAGE1]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: [[B_STORAGE1:%.*]] = cir.get_member [[SE_EL]][1] {name = "b"} : !cir.ptr<!rec_T> -> !cir.ptr<!s64i>
+// CIR-NEXT: [[B1_ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT: [[B1_CASTED:%.*]] = cir.cast integral [[B1_ZERO]] : !s32i -> !s64i
+// CIR-NEXT: cir.store{{.*}} [[B1_CASTED]], [[B_STORAGE1]] : !s64i, !cir.ptr<!s64i>
+// CIR-NEXT: cir.return
+
+void foo() {
+  double bar[] = {9,8,7};
+}
+// CIR-LABEL: @foo
+// CIR:  %[[DST:.*]] = cir.alloca !cir.array<!cir.double x 3>, !cir.ptr<!cir.array<!cir.double x 3>>, ["bar", init]
+// CIR:  %[[SRC:.*]] = cir.get_global @__const.foo.bar : !cir.ptr<!cir.array<!cir.double x 3>>
+// CIR:  cir.copy %[[SRC]] to %[[DST]] : !cir.ptr<!cir.array<!cir.double x 3>>
+
+void bar(int a, int b, int c) {
+  int arr[] = {a,b,c};
+}
+// CIR: cir.func {{.*}} @bar
+// CIR:      [[ARR:%.*]] = cir.alloca !cir.array<!s32i x 3>, !cir.ptr<!cir.array<!s32i x 3>>, ["arr", init] {alignment = 4 : i64}
+// CIR-NEXT: cir.store{{.*}} %arg0, [[A:%.*]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: cir.store{{.*}} %arg1, [[B:%.*]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: cir.store{{.*}} %arg2, [[C:%.*]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT: [[ELEM0:%.*]] = cir.get_element [[ARR]][[[ZERO]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!s32i>
+// CIR-NEXT: [[LOAD_A:%.*]] = cir.load{{.*}} [[A]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.store{{.*}} [[LOAD_A]], [[ELEM0]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: [[ONE:%.*]] = cir.const #cir.int<1> : !s64i
+// CIR-NEXT: [[ELEM1:%.*]] = cir.get_element [[ARR]][[[ONE]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s64i) -> !cir.ptr<!s32i>
+// CIR-NEXT: [[LOAD_B:%.*]] = cir.load{{.*}} [[B]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.store{{.*}} [[LOAD_B]], [[ELEM1]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: [[TWO:%.*]] = cir.const #cir.int<2> : !s64i
+// CIR-NEXT: [[ELEM2:%.*]] = cir.get_element [[ARR]][[[TWO]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s64i) -> !cir.ptr<!s32i>
+// CIR-NEXT: [[LOAD_C:%.*]] = cir.load{{.*}} [[C]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.store{{.*}} [[LOAD_C]], [[ELEM2]] : !s32i, !cir.ptr<!s32i>
+
+void zero_init(int x) {
+  int arr[3] = {x};
+}
+// CIR:  cir.func {{.*}} @zero_init
+// CIR:    [[VAR_ALLOC:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CIR:    [[ARR:%.*]] = cir.alloca !cir.array<!s32i x 3>, !cir.ptr<!cir.array<!s32i x 3>>, ["arr", init] {alignment = 4 : i64}
+// CIR:    [[TEMP:%.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp", init] {alignment = 8 : i64}
+// CIR:    cir.store{{.*}} %arg0, [[VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR:    [[BEGIN:%.*]] = cir.get_element [[ARR]][[[ZERO]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!s32i>
+// CIR:    [[VAR:%.*]] = cir.load{{.*}} [[VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+// CIR:    cir.store{{.*}} [[VAR]], [[BEGIN]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[ONE:%.*]] = cir.const #cir.int<1> : !s64i
+// CIR:    [[ZERO_INIT_START:%.*]] = cir.get_element [[ARR]][[[ONE]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s64i) -> !cir.ptr<!s32i>
+// CIR:    cir.store{{.*}} [[ZERO_INIT_START]], [[TEMP]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR:    [[SIZE:%.*]] = cir.const #cir.int<3> : !s64i
+// CIR:    [[END:%.*]] = cir.get_element [[ARR]][[[SIZE]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s64i) -> !cir.ptr<!s32i>
+// CIR:    cir.do {
+// CIR:      [[CUR:%.*]] = cir.load{{.*}} [[TEMP]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR:      [[FILLER:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR:      cir.store{{.*}} [[FILLER]], [[CUR]] : !s32i, !cir.ptr<!s32i>
+// CIR:      [[ONE:%.*]] = cir.const #cir.int<1> : !s64i
+// CIR:      [[NEXT:%.*]] = cir.ptr_stride [[CUR]], [[ONE]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CIR:      cir.store{{.*}} [[NEXT]], [[TEMP]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR:      cir.yield
+// CIR:    } while {
+// CIR:      [[CUR:%.*]] = cir.load{{.*}} [[TEMP]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR:      [[CMP:%.*]] = cir.cmp(ne, [[CUR]], [[END]]) : !cir.ptr<!s32i>, !cir.bool
+// CIR:      cir.condition([[CMP]])
+// CIR:    }
+// CIR:    cir.return
+
+void aggr_init() {
+  int g = 5;
+  int g_arr[5] = {1, 2, 3, g};
+}
+// CIR-LABEL:  cir.func {{.*}} @aggr_init
+// CIR:    [[VAR_ALLOC:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["g", init] {alignment = 4 : i64}
+// CIR:    %1 = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["g_arr", init] {alignment = 16 : i64}
+// CIR:    [[TEMP:%.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp", init] {alignment = 8 : i64}
+// CIR:    %3 = cir.const #cir.int<5> : !s32i
+// CIR:    cir.store{{.*}} %3, [[VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[OFFSET0:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR:    [[BEGIN:%.*]] = cir.get_element %1[[[OFFSET0]]] : (!cir.ptr<!cir.array<!s32i x 5>>, !s32i) -> !cir.ptr<!s32i>
+// CIR:    [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR:    cir.store{{.*}} [[ONE]], [[BEGIN]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[OFFSET1:%.*]] = cir.const #cir.int<1> : !s64i
+// CIR:    [[ELEM1:%.*]] = cir.get_element %1[[[OFFSET1]]] : (!cir.ptr<!cir.array<!s32i x 5>>, !s64i) -> !cir.ptr<!s32i>
+// CIR:    [[TWO:%.*]] = cir.const #cir.int<2> : !s32i
+// CIR:    cir.store{{.*}} [[TWO]], [[ELEM1]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[OFFSET2:%.*]] = cir.const #cir.int<2> : !s64i
+// CIR:    [[ELEM2:%.*]] = cir.get_element %1[[[OFFSET2]]] : (!cir.ptr<!cir.array<!s32i x 5>>, !s64i) -> !cir.ptr<!s32i>
+// CIR:    [[THREE:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR:    cir.store{{.*}} [[THREE]], [[ELEM2]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[OFFSET3:%.*]] = cir.const #cir.int<3> : !s64i
+// CIR:    [[ELEM3:%.*]] = cir.get_element %1[[[OFFSET3]]] : (!cir.ptr<!cir.array<!s32i x 5>>, !s64i) -> !cir.ptr<!s32i>
+// CIR:    [[VAR:%.*]] = cir.load{{.*}} [[VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+// CIR:    cir.store{{.*}} [[VAR]], [[ELEM3]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[ONE_VAR:%.*]] = cir.const #cir.int<1> : !s64i
+// CIR:    [[OFFSET4:%.*]] = cir.binop(add, [[OFFSET3]], [[ONE_VAR]]) : !s64i
+// CIR:    [[LAST:%.*]] = cir.get_element %1[[[OFFSET4]]] : (!cir.ptr<!cir.array<!s32i x 5>>, !s64i) -> !cir.ptr<!s32i>
+// CIR:    cir.store{{.*}} [[LAST]], [[TEMP]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR:    [[SIZE:%.*]] = cir.const #cir.int<5> : !s64i
+// CIR:    [[END:%.*]] = cir.get_element %1[[[SIZE]]] : (!cir.ptr<!cir.array<!s32i x 5>>, !s64i) -> !cir.ptr<!s32i>
+// CIR:    cir.do {
+// CIR:      [[CUR:%.*]] = cir.load{{.*}} [[TEMP]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR:      [[FILLER:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR:      cir.store{{.*}} [[FILLER]], [[CUR]] : !s32i, !cir.ptr<!s32i>
+// CIR:      [[ONE:%.*]] = cir.const #cir.int<1> : !s64i
+// CIR:      [[NEXT:%.*]] = cir.ptr_stride [[CUR]], [[ONE]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CIR:      cir.store{{.*}} [[NEXT]], [[TEMP]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR:      cir.yield
+// CIR:    } while {
+// CIR:      [[CUR:%.*]] = cir.load{{.*}} [[TEMP]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR:      [[CMP:%.*]] = cir.cmp(ne, [[CUR]], [[END]]) : !cir.ptr<!s32i>, !cir.bool
+// CIR:      cir.condition([[CMP]])
+// CIR:    }
+// CIR:    cir.return
+//
+// LLVM-LABEL:  @aggr_init
+// LLVM:   [[VAR_ALLOC:%.*]] = alloca i32, i64 1, align 4
+// LLVM:   %2 = alloca [5 x i32], i64 1, align 16
+// LLVM:   [[TEMP:%.*]] = alloca ptr, i64 1, align 8
+// LLVM:   store i32 5, ptr [[VAR_ALLOC]], align 4
+// LLVM:   [[BEGIN:%.*]] = getelementptr [5 x i32], ptr %2, i32 0, i64 0
+// LLVM:   store i32 1, ptr [[BEGIN]], align 4
+// LLVM:   [[ONE:%.*]] = getelementptr [5 x i32], ptr %2, i32 0, i64 1
+// LLVM:   store i32 2, ptr [[ONE]], align 4
+// LLVM:   [[TWO:%.*]] = getelementptr [5 x i32], ptr %2, i32 0, i64 2
+// LLVM:   store i32 3, ptr [[TWO]], align 4
+// LLVM:   [[THREE:%.*]] = getelementptr [5 x i32], ptr %2, i32 0, i64 3
+// LLVM:   [[VAR:%.*]] = load i32, ptr [[VAR_ALLOC]], align 4
+// LLVM:   store i32 [[VAR]], ptr [[THREE]], align 4
+// LLVM:   [[LAST:%.*]] = getelementptr [5 x i32], ptr %2, i32 0, i64 4
+// LLVM:   store ptr [[LAST]], ptr [[TEMP]], align 8
+// LLVM:   [[END:%.*]] = getelementptr [5 x i32], ptr %2, i32 0, i64 5
+// LLVM:   br label %14
+//
+// LLVM: 11:                                               ; preds = %14
+// LLVM:   [[CUR:%.*]] = load ptr, ptr [[TEMP]], align 8
+// LLVM:   [[CMP:%.*]] = icmp ne ptr [[CUR]], [[END]]
+// LLVM:   br i1 [[CMP]], label %14, label %17
+//
+// LLVM: 14:                                               ; preds = %11, %0
+// LLVM:   [[CUR:%.*]] = load ptr, ptr [[TEMP]], align 8
+// LLVM:   store i32 0, ptr [[CUR]], align 4
+// LLVM:   [[NEXT:%.*]] = getelementptr i32, ptr [[CUR]], i64 1
+// LLVM:   store ptr [[NEXT]], ptr [[TEMP]], align 8
+// LLVM:   br label %11
+//
+// LLVM: 17:                                               ; preds = %11
+// LLVM:   ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/array-init.cpp b/clang/test/CIR/Incubator/CodeGen/array-init.cpp
new file mode 100644
index 0000000000000..8a2bb223e92d2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/array-init.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef struct {
+  int a;
+  int b[2];
+} A;
+
+int bar() {
+  return 42;
+}
+
+void foo() {
+  A a = {bar(), {}};
+}
+// CHECK: %[[VAL_0:.*]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a", init]
+// CHECK: %[[VAL_1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arrayinit.temp", init]
+// CHECK: %[[VAL_2:.*]] = cir.get_member %[[VAL_0]][0] {name = "a"} : !cir.ptr<!rec_A> -> !cir.ptr<!s32i>
+// CHECK: %[[VAL_3:.*]] = cir.call @_Z3barv() : () -> !s32i
+// CHECK: cir.store{{.*}} %[[VAL_3]], %[[VAL_2]] : !s32i, !cir.ptr<!s32i>
+// CHECK: %[[VAL_4:.*]] = cir.get_member %[[VAL_0]][1] {name = "b"} : !cir.ptr<!rec_A> -> !cir.ptr<!cir.array<!s32i x 2>>
+// CHECK: %[[VAL_5:.*]] = cir.cast array_to_ptrdecay %[[VAL_4]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!s32i>
+// CHECK: cir.store{{.*}} %[[VAL_5]], %[[VAL_1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %[[VAL_6:.*]] = cir.const #cir.int<2> : !s64i
+// CHECK: %[[VAL_7:.*]] = cir.get_element %[[VAL_4]][%[[VAL_6]]] : (!cir.ptr<!cir.array<!s32i x 2>>, !s64i) -> !cir.ptr<!s32i>
+// CHECK: cir.do {
+// CHECK:     %[[VAL_8:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:     %[[VAL_9:.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:     cir.store{{.*}} %[[VAL_9]], %[[VAL_8]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     %[[VAL_10:.*]] = cir.const #cir.int<1> : !s64i
+// CHECK:     %[[VAL_11:.*]] = cir.ptr_stride %[[VAL_8]], %[[VAL_10]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CHECK:     cir.store{{.*}} %[[VAL_11]], %[[VAL_1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:     cir.yield
+// CHECK: } while {
+// CHECK:     %[[VAL_8:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:     %[[VAL_9:.*]] = cir.cmp(ne, %[[VAL_8]], %[[VAL_7]]) : !cir.ptr<!s32i>, !cir.bool
+// CHECK:     cir.condition(%[[VAL_9]])
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/array-new-init.cpp b/clang/test/CIR/Incubator/CodeGen/array-new-init.cpp
new file mode 100644
index 0000000000000..efe56a509f636
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/array-new-init.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-cir  -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck -check-prefix=BEFORE %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-cir  -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck -check-prefix=AFTER %s
+
+class E {
+  public:
+    E();
+    ~E();
+};
+
+void t_new_constant_size_constructor() {
+  auto p = new E[3];
+}
+
+// BEFORE:  cir.func {{.*}} @_Z31t_new_constant_size_constructorv
+// BEFORE:    %[[NUM_ELEMENTS:.*]] = cir.const #cir.int<3> : !u64i
+// BEFORE:    %[[SIZE_WITHOUT_COOKIE:.*]] = cir.const #cir.int<3> : !u64i
+// BEFORE:    %[[ALLOC_SIZE:.*]] = cir.const #cir.int<11> : !u64i
+// BEFORE:    %[[ALLOC_PTR:.*]] = cir.call @_Znam(%[[ALLOC_SIZE]])
+// BEFORE:    %[[COOKIE_PTR:.*]] = cir.cast bitcast %[[ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// BEFORE:    cir.store{{.*}} %[[NUM_ELEMENTS]], %[[COOKIE_PTR]] : !u64i, !cir.ptr<!u64i>
+// BEFORE:    %[[PTR_AS_U8:.*]] = cir.cast bitcast %[[ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!u8i>
+// BEFORE:    %[[OFFSET:.*]] = cir.const #cir.int<8> : !s32i
+// BEFORE:    %[[OBJ_PTR:.*]] = cir.ptr_stride %[[PTR_AS_U8]], %[[OFFSET]] : (!cir.ptr<!u8i>, !s32i) -> !cir.ptr<!u8i>
+// BEFORE:    %[[OBJ_ELEM_PTR:.*]] = cir.cast bitcast %[[OBJ_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!rec_E>
+// BEFORE:    %[[OBJ_ARRAY_PTR:.*]] = cir.cast bitcast %[[OBJ_ELEM_PTR]] : !cir.ptr<!rec_E> -> !cir.ptr<!cir.array<!rec_E x 3>>
+// BEFORE:    cir.array.ctor(%[[OBJ_ARRAY_PTR]] : !cir.ptr<!cir.array<!rec_E x 3>>) {
+// BEFORE:    ^bb0(%arg0: !cir.ptr<!rec_E>
+// BEFORE:      cir.call @_ZN1EC1Ev(%arg0) : (!cir.ptr<!rec_E>) -> ()
+// BEFORE:      cir.yield
+// BEFORE:    }
+
+// AFTER:  cir.func {{.*}} @_Z31t_new_constant_size_constructorv
+// AFTER:    %[[NUM_ELEMENTS:.*]] = cir.const #cir.int<3> : !u64i
+// AFTER:    %[[SIZE_WITHOUT_COOKIE:.*]] = cir.const #cir.int<3> : !u64i
+// AFTER:    %[[ALLOC_SIZE:.*]] = cir.const #cir.int<11> : !u64i
+// AFTER:    %[[ALLOC_PTR:.*]] = cir.call @_Znam(%[[ALLOC_SIZE]])
+// AFTER:    %[[COOKIE_PTR:.*]] = cir.cast bitcast %[[ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// AFTER:    cir.store{{.*}} %[[NUM_ELEMENTS]], %[[COOKIE_PTR]] : !u64i, !cir.ptr<!u64i>
+// AFTER:    %[[PTR_AS_U8:.*]] = cir.cast bitcast %[[ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!u8i>
+// AFTER:    %[[OFFSET:.*]] = cir.const #cir.int<8> : !s32i
+// AFTER:    %[[OBJ_PTR:.*]] = cir.ptr_stride %[[PTR_AS_U8]], %[[OFFSET]] : (!cir.ptr<!u8i>, !s32i) -> !cir.ptr<!u8i>
+// AFTER:    %[[OBJ_ELEM_PTR:.*]] = cir.cast bitcast %[[OBJ_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!rec_E>
+// AFTER:    %[[OBJ_ARRAY_PTR:.*]] = cir.cast bitcast %[[OBJ_ELEM_PTR]] : !cir.ptr<!rec_E> -> !cir.ptr<!cir.array<!rec_E x 3>>
+// AFTER:    %[[NUM_ELEMENTS2:.*]] = cir.const #cir.int<3> : !u64i
+// AFTER:    %[[ELEM_PTR:.*]] = cir.cast array_to_ptrdecay %10 : !cir.ptr<!cir.array<!rec_E x 3>> -> !cir.ptr<!rec_E>
+// AFTER:    %[[END_PTR:.*]] = cir.ptr_stride %[[ELEM_PTR]], %[[NUM_ELEMENTS2]] : (!cir.ptr<!rec_E>, !u64i) -> !cir.ptr<!rec_E>
+// AFTER:    %[[CUR_ELEM_ALLOCA:.*]] = cir.alloca !cir.ptr<!rec_E>, !cir.ptr<!cir.ptr<!rec_E>>, ["__array_idx"] {alignment = 1 : i64}
+// AFTER:    cir.store{{.*}} %[[ELEM_PTR]], %[[CUR_ELEM_ALLOCA]] : !cir.ptr<!rec_E>, !cir.ptr<!cir.ptr<!rec_E>>
+// AFTER:    cir.do {
+// AFTER:      %[[CUR_ELEM_PTR:.*]] = cir.load %[[CUR_ELEM_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_E>>, !cir.ptr<!rec_E>
+// AFTER:      cir.call @_ZN1EC1Ev(%[[CUR_ELEM_PTR]]) : (!cir.ptr<!rec_E>) -> ()
+// AFTER:      %[[OFFSET:.*]] = cir.const #cir.int<1> : !u64i
+// AFTER:      %[[NEXT_PTR:.*]] = cir.ptr_stride %[[CUR_ELEM_PTR]], %[[OFFSET]] : (!cir.ptr<!rec_E>, !u64i) -> !cir.ptr<!rec_E>
+// AFTER:      cir.store{{.*}} %[[NEXT_PTR]], %[[CUR_ELEM_ALLOCA]] : !cir.ptr<!rec_E>, !cir.ptr<!cir.ptr<!rec_E>>
+// AFTER:      cir.yield
+// AFTER:    } while {
+// AFTER:      %[[CUR_ELEM_PTR2:.*]] = cir.load %[[CUR_ELEM_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_E>>, !cir.ptr<!rec_E>
+// AFTER:      %[[END_TEST:.*]] = cir.cmp(ne, %[[CUR_ELEM_PTR2]], %[[END_PTR]]) : !cir.ptr<!rec_E>, !cir.bool
+// AFTER:      cir.condition(%[[END_TEST]])
+// AFTER:    }
diff --git a/clang/test/CIR/Incubator/CodeGen/array-unknown-bound.cpp b/clang/test/CIR/Incubator/CodeGen/array-unknown-bound.cpp
new file mode 100644
index 0000000000000..93c0f97fa905c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/array-unknown-bound.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+extern int table[];
+// CHECK: cir.global external @table = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]> : !cir.array<!s32i x 3>
+
+int *table_ptr = table;
+// CHECK: cir.global external @table_ptr = #cir.global_view<@table> : !cir.ptr<!s32i>
+
+int test() { return table[1]; }
+//      CHECK: cir.func {{.*}} @_Z4testv()
+// CHECK-NEXT:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:    %1 = cir.get_global @table : !cir.ptr<!cir.array<!s32i x 3>>
+
+int table[3] {1, 2, 3};
diff --git a/clang/test/CIR/Incubator/CodeGen/array.c b/clang/test/CIR/Incubator/CodeGen/array.c
new file mode 100644
index 0000000000000..01edf2f34ab6d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/array.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Should implicitly zero-initialize global array elements.
+struct S {
+  int i;
+} arr[3] = {{1}};
+// CHECK: cir.global external @arr = #cir.const_array<[#cir.const_record<{#cir.int<1> : !s32i}> : !rec_S, #cir.zero : !rec_S, #cir.zero : !rec_S]> : !cir.array<!rec_S x 3>
+
+int a[4];
+// CHECK: cir.global external @a = #cir.zero : !cir.array<!s32i x 4>
+
+// Should create a pointer to a complete array.
+int (*complete_ptr_a)[4] = &a;
+// CHECK: cir.global external @complete_ptr_a = #cir.global_view<@a> : !cir.ptr<!cir.array<!s32i x 4>>
+
+// Should create a pointer to an incomplete array.
+int (*incomplete_ptr_a)[] = &a;
+// CHECK: cir.global external @incomplete_ptr_a = #cir.global_view<@a> : !cir.ptr<!cir.array<!s32i x 0>>
+
+// Should access incomplete array if external.
+extern int foo[];
+// CHECK: cir.global "private" external @foo : !cir.array<!s32i x 0>
+void useFoo(int i) {
+  foo[i] = 42;
+}
+// CHECK: @useFoo
+// CHECK: %[[#V2:]] = cir.get_global @foo : !cir.ptr<!cir.array<!s32i x 0>>
+// CHECK: %[[#V3:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[#V4:]] = cir.get_element %[[#V2]][%[[#V3]]] : (!cir.ptr<!cir.array<!s32i x 0>>, !s32i) -> !cir.ptr<!s32i>
+// CHECK: cir.store{{.*}} %{{.+}}, %[[#V4]] : !s32i, !cir.ptr<!s32i>
diff --git a/clang/test/CIR/Incubator/CodeGen/array.cpp b/clang/test/CIR/Incubator/CodeGen/array.cpp
new file mode 100644
index 0000000000000..f56386b719179
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/array.cpp
@@ -0,0 +1,95 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -Wno-return-stack-address -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void a0() {
+  int a[10];
+}
+
+// CHECK: cir.func {{.*}} @_Z2a0v()
+// CHECK-NEXT:   %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a"] {alignment = 16 : i64}
+
+void a1() {
+  int a[10];
+  a[0] = 1;
+}
+
+// CHECK: cir.func {{.*}} @_Z2a1v()
+// CHECK-NEXT:  %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a"] {alignment = 16 : i64}
+// CHECK-NEXT:  %1 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:  %2 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:  %3 = cir.get_element %0[%2] : (!cir.ptr<!cir.array<!s32i x 10>>, !s32i) -> !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.store{{.*}} %1, %3 : !s32i, !cir.ptr<!s32i>
+
+int *a2() {
+  int a[4];
+  return &a[0];
+}
+
+// CHECK: cir.func {{.*}} @_Z2a2v() -> !cir.ptr<!s32i>
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["__retval"] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !cir.array<!s32i x 4>, !cir.ptr<!cir.array<!s32i x 4>>, ["a"] {alignment = 16 : i64}
+// CHECK-NEXT:   %2 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   %3 = cir.get_element %1[%2] : (!cir.ptr<!cir.array<!s32i x 4>>, !s32i) -> !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.store{{.*}} %3, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT:   %4 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.return %4 : !cir.ptr<!s32i>
+
+void local_stringlit() {
+  const char *s = "whatnow";
+}
+
+// CHECK: cir.global "private" constant cir_private dso_local @".str" = #cir.const_array<"whatnow\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+// CHECK: cir.func {{.*}} @_Z15local_stringlitv()
+// CHECK-NEXT:  %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+// CHECK-NEXT:  %1 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 8>>
+// CHECK-NEXT:  %2 = cir.cast array_to_ptrdecay %1 : !cir.ptr<!cir.array<!s8i x 8>> -> !cir.ptr<!s8i>
+// CHECK-NEXT:  cir.store{{.*}} %2, %0 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+
+int multidim(int i, int j) {
+  int arr[2][2];
+  return arr[i][j];
+}
+
+// CHECK: %3 = cir.alloca !cir.array<!cir.array<!s32i x 2> x 2>, !cir.ptr<!cir.array<!cir.array<!s32i x 2> x 2>>
+// Index first dimension (index = 2)
+// CHECK: %4 = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %5 = cir.get_element %3[%4] : (!cir.ptr<!cir.array<!cir.array<!s32i x 2> x 2>>, !s32i) -> !cir.ptr<!cir.array<!s32i x 2>>
+// Index second dimension (index = 1)
+// CHECK: %6 = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %7 = cir.get_element %5[%6] : (!cir.ptr<!cir.array<!s32i x 2>>, !s32i) -> !cir.ptr<!s32i>
+
+// Should globally zero-initialize null arrays.
+int globalNullArr[] = {0, 0};
+// CHECK: cir.global external @globalNullArr = #cir.zero : !cir.array<!s32i x 2>
+
+// Should implicitly zero-initialize global array elements.
+struct S {
+  int i;
+} arr[3] = {{1}};
+// CHECK: cir.global external @arr = #cir.const_array<[#cir.const_record<{#cir.int<1> : !s32i}> : !rec_S, #cir.zero : !rec_S, #cir.zero : !rec_S]> : !cir.array<!rec_S x 3>
+
+void testPointerDecaySubscriptAccess(int arr[]) {
+// CHECK: cir.func {{.*}} @{{.+}}testPointerDecaySubscriptAccess
+  arr[1] = 2;
+  // CHECK: %[[#TWO:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: %[[#BASE:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CHECK: %[[#DIM1:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: %[[#ELEM:]] = cir.ptr_stride %[[#BASE]], %[[#DIM1]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+  // CHECK: cir.store{{.*}} %[[#TWO]], %[[#ELEM]] : !s32i, !cir.ptr<!s32i>
+}
+
+void testPointerDecayedArrayMultiDimSubscriptAccess(int arr[][3]) {
+// CHECK: cir.func {{.*}} @{{.+}}testPointerDecayedArrayMultiDimSubscriptAccess
+  arr[1][2] = 3;
+  // CHECK: %[[#THREE:]] = cir.const #cir.int<3> : !s32i
+  // CHECK: %[[#ARRAY:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!cir.array<!s32i x 3>>>, !cir.ptr<!cir.array<!s32i x 3>>
+  // CHECK: %[[#ONE:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: %[[#OUTER:]] = cir.ptr_stride %[[#ARRAY]], %[[#ONE]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!cir.array<!s32i x 3>>
+  // CHECK: %[[#TWO:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: %[[#INNER:]] = cir.get_element %[[#OUTER]][%[[#TWO]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!s32i>
+  // CHECK: cir.store{{.*}} %[[#THREE]], %[[#INNER]] : !s32i, !cir.ptr<!s32i>
+}
+
+void testArrayOfComplexType() { int _Complex a[4]; }
+
+// CHECK: %[[ARR:.*]] = cir.alloca !cir.array<!cir.complex<!s32i> x 4>, !cir.ptr<!cir.array<!cir.complex<!s32i> x 4>>, ["a"]
diff --git a/clang/test/CIR/Incubator/CodeGen/asm.c b/clang/test/CIR/Incubator/CodeGen/asm.c
new file mode 100644
index 0000000000000..8b5d03c7c754b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/asm.c
@@ -0,0 +1,349 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [],
+// CHECK:   in = [],
+// CHECK:   in_out = [],
+// CHECK:   {"" "~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty1() {
+  __asm__ volatile("" : : : );
+}
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [],
+// CHECK:   in = [],
+// CHECK:   in_out = [],
+// CHECK:   {"xyz" "~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty2() {
+  __asm__ volatile("xyz" : : : );
+}
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:   in = [],
+// CHECK:   in_out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:   {"" "=*m,*m,~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty3(int x) {
+  __asm__ volatile("" : "+m"(x));
+}
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [],
+// CHECK:   in = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:   in_out = [],
+// CHECK:   {"" "*m,~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty4(int x) {
+  __asm__ volatile("" : : "m"(x));
+}
+
+// CHECK: cir.asm(x86_att, 
+// CHECK:   out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:   in = [],
+// CHECK:   in_out = [],
+// CHECK:   {"" "=*m,~{dirflag},~{fpsr},~{flags}"}) side_effects
+void empty5(int x) {
+  __asm__ volatile("" : "=m"(x));
+}
+
+// CHECK: %3 = cir.asm(x86_att, 
+// CHECK:   out = [],
+// CHECK:   in = [],
+// CHECK:   in_out = [%2 : !s32i],
+// CHECK:   {"" "=&r,=&r,1,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !rec_anon2E0
+void empty6(int x) {
+  __asm__ volatile("" : "=&r"(x), "+&r"(x));
+}
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] 
+// CHECK: [[TMP1:%.*]] = cir.load{{.*}} %0 : !cir.ptr<!u32i>, !u32i
+// CHECK: [[TMP2:%.*]] = cir.asm(x86_att, 
+// CHECK:       out = [],
+// CHECK:       in = [%3 : !u32i],
+// CHECK:       in_out = [],
+// CHECK:       {"addl $$42, $1" "=r,r,~{dirflag},~{fpsr},~{flags}"}) -> !s32i
+// CHECK: cir.store{{.*}} [[TMP2]], [[TMP0]] : !s32i, !cir.ptr<!s32i> loc(#loc42)
+unsigned add1(unsigned int x) {
+  int a;
+  __asm__("addl $42, %[val]"
+      : "=r" (a)
+      : [val] "r" (x)
+      );
+  
+  return a;
+}
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+// CHECK: cir.store{{.*}} %arg0, [[TMP0]] : !u32i, !cir.ptr<!u32i>
+// CHECK: [[TMP1:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!u32i>, !u32i
+// CHECK: [[TMP2:%.*]] = cir.asm(x86_att, 
+// CHECK:       out = [],
+// CHECK:       in = [],
+// CHECK:       in_out = [%2 : !u32i],
+// CHECK:       {"addl $$42, $0" "=r,0,~{dirflag},~{fpsr},~{flags}"}) -> !u32i
+// CHECK: cir.store{{.*}} [[TMP2]], [[TMP0]] : !u32i, !cir.ptr<!u32i>
+unsigned add2(unsigned int x) {
+  __asm__("addl $42, %[val]"
+      : [val] "+r" (x)
+      );
+  return x;
+}
+
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init]
+// CHECK: [[TMP1:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!u32i>, !u32i
+// CHECK: [[TMP2:%.*]] = cir.asm(x86_att, 
+// CHECK:       out = [],
+// CHECK:       in = [],
+// CHECK:       in_out = [%2 : !u32i],
+// CHECK:       {"addl $$42, $0  \0A\09          subl $$1, $0    \0A\09          imul $$2, $0" "=r,0,~{dirflag},~{fpsr},~{flags}"}) -> !u32i
+// CHECK: cir.store{{.*}} [[TMP2]], [[TMP0]]  : !u32i, !cir.ptr<!u32i>
+unsigned add3(unsigned int x) { // ((42 + x) - 1) * 2
+  __asm__("addl $42, %[val]  \n\t\
+          subl $1, %[val]    \n\t\
+          imul $2, %[val]"
+      : [val] "+r" (x)
+      );  
+  return x;
+}
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["x", init] 
+// CHECK: cir.store{{.*}} %arg0, [[TMP0]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: [[TMP1:%.*]] = cir.load deref{{.*}}  [[TMP0]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: cir.asm(x86_att, 
+// CHECK:       out = [%1 : !cir.ptr<!s32i> (maybe_memory)],
+// CHECK:       in = [],
+// CHECK:       in_out = [],
+// CHECK:       {"addl $$42, $0" "=*m,~{dirflag},~{fpsr},~{flags}"}) 
+// CHECK-NEXT: cir.return
+void add4(int *x) {    
+  __asm__("addl $42, %[addr]" : [addr] "=m" (*x));
+}
+
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["x", init]
+// CHECK: [[TMP1:%.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["y", init]
+// CHECK: [[TMP2:%.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["r"]
+// CHECK: cir.store{{.*}} %arg0, [[TMP0]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK: cir.store{{.*}} %arg1, [[TMP1]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK: [[TMP3:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK: [[TMP4:%.*]] = cir.load{{.*}} [[TMP1]] : !cir.ptr<!cir.float>, !cir.float
+// CHECK: [[TMP5:%.*]] = cir.asm(x86_att, 
+// CHECK:       out = [],
+// CHECK:       in = [%4 : !cir.float, %5 : !cir.float],
+// CHECK:       in_out = [],
+// CHECK:       {"flds $1; flds $2; faddp" "=&{st},imr,imr,~{dirflag},~{fpsr},~{flags}"}) -> !cir.float
+// CHECK: cir.store{{.*}} [[TMP5]], [[TMP2]] : !cir.float, !cir.ptr<!cir.float>
+float add5(float x, float y) {
+   float r;
+  __asm__("flds %[x]; flds %[y]; faddp"
+          : "=&t" (r)
+          : [x] "g" (x), [y] "g" (y));
+  return r;
+}
+
+/*
+There are tests from clang/test/CodeGen/asm.c. No checks for now - we just make
+sure no crashes happen
+*/
+
+
+void t1(int len) {
+  __asm__ volatile("" : "=&r"(len), "+&r"(len));
+}
+
+void t2(unsigned long long t)  {
+  __asm__ volatile("" : "+m"(t));
+}
+
+void t3(unsigned char *src, unsigned long long temp) {
+  __asm__ volatile("" : "+m"(temp), "+r"(src));
+}
+
+void t4(void) {
+  unsigned long long a;
+  struct reg { unsigned long long a, b; } b;
+
+  __asm__ volatile ("":: "m"(a), "m"(b));
+}
+
+void t5(int i) {
+  asm("nop" : "=r"(i) : "0"(t5));
+}
+
+void t6(void) {
+  __asm__ volatile("" : : "i" (t6));
+}
+
+void t7(int a) {
+  __asm__ volatile("T7 NAMED: %[input]" : "+r"(a): [input] "i" (4));  
+}
+
+void t8(void) {
+  __asm__ volatile("T8 NAMED MODIFIER: %c[input]" :: [input] "i" (4));  
+}
+
+unsigned t9(unsigned int a) {
+  asm("bswap %0 %1" : "+r" (a));
+  return a;
+}
+
+void t10(int r) {
+  __asm__("PR3908 %[lf] %[xx] %[li] %[r]" : [r] "+r" (r) : [lf] "mx" (0), [li] "mr" (0), [xx] "x" ((double)(0)));
+}
+
+unsigned t11(signed char input) {
+  unsigned  output;
+  __asm__("xyz"
+          : "=a" (output)
+          : "0" (input));
+  return output;
+}
+
+unsigned char t12(unsigned input) {
+  unsigned char output;
+  __asm__("xyz"
+          : "=a" (output)
+          : "0" (input));
+  return output;
+}
+
+unsigned char t13(unsigned input) {
+  unsigned char output;
+  __asm__("xyz %1"
+          : "=a" (output)
+          : "0" (input));
+  return output;
+}
+
+struct large {
+  int x[1000];
+};
+
+unsigned long t15(int x, struct large *P) {
+  __asm__("xyz "
+          : "=r" (x)
+          : "m" (*P), "0" (x));
+  return x;
+}
+
+// bitfield destination of an asm.
+struct S {
+  int a : 4;
+};
+
+void t14(struct S *P) {
+  __asm__("abc %0" : "=r"(P->a) );
+}
+
+int t16(void) {
+  int a,b;
+  asm ( "nop;"
+       :"=%c" (a)
+       : "r" (b)
+       );
+  return 0;
+}
+
+void t17(void) {
+  int i;
+  __asm__ ( "nop": "=m"(i));
+}
+
+int t18(unsigned data) {
+  int a, b;
+
+  asm("xyz" :"=a"(a), "=d"(b) : "a"(data));
+  return a + b;
+}
+
+int t19(unsigned data) {
+  int a, b;
+
+  asm("x{abc|def|ghi}z" :"=r"(a): "r"(data));
+  return a + b;
+}
+
+// skip t20 and t21: long double is not supported
+
+// accept 'l' constraint
+unsigned char t22(unsigned char a, unsigned char b) {
+  unsigned int la = a;
+  unsigned int lb = b;
+  unsigned int bigres;
+  unsigned char res;
+  __asm__ ("0:\n1:\n" : [bigres] "=la"(bigres) : [la] "0"(la), [lb] "c"(lb) :
+                        "edx", "cc");
+  res = bigres;
+  return res;
+}
+
+// accept 'l' constraint
+unsigned char t23(unsigned char a, unsigned char b) {
+  unsigned int la = a;
+  unsigned int lb = b;
+  unsigned char res;
+  __asm__ ("0:\n1:\n" : [res] "=la"(res) : [la] "0"(la), [lb] "c"(lb) :
+                        "edx", "cc");
+  return res;
+}
+
+void *t24(char c) {
+  void *addr;
+  __asm__ ("foobar" : "=a" (addr) : "0" (c));
+  return addr;
+}
+
+void t25(void)
+{
+  __asm__ __volatile__(					   \
+		       "finit"				   \
+		       :				   \
+		       :				   \
+		       :"st","st(1)","st(2)","st(3)",	   \
+			"st(4)","st(5)","st(6)","st(7)",   \
+			"fpsr","fpcr"			   \
+							   );
+}
+
+//t26 skipped - no vector type support
+
+// Check to make sure the inline asm non-standard dialect attribute _not_ is
+// emitted.
+void t27(void) {
+  asm volatile("nop");
+}
+
+// Check handling of '*' and '#' constraint modifiers.
+void t28(void)
+{
+  asm volatile ("/* %0 */" : : "i#*X,*r" (1));
+}
+
+static unsigned t29_var[1];
+
+void t29(void) {
+  asm volatile("movl %%eax, %0"
+               :
+               : "m"(t29_var));
+}
+
+void t30(int len) {
+  __asm__ volatile(""
+                   : "+&&rm"(len));
+}
+
+void t31(int len) {
+  __asm__ volatile(""
+                   : "+%%rm"(len), "+rm"(len));
+}
+
+//t32 skipped: no goto
+
+void *t33(void *ptr)
+{
+  void *ret;
+  asm ("lea %1, %0" : "=r" (ret) : "p" (ptr));
+  return ret;  
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/assign-operator.cpp b/clang/test/CIR/Incubator/CodeGen/assign-operator.cpp
new file mode 100644
index 0000000000000..c633556a91078
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/assign-operator.cpp
@@ -0,0 +1,216 @@
+// RUN: %clang_cc1 -std=c++17 -mconstructor-aliases -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int strlen(char const *);
+
+struct String {
+  long size;
+  long capacity;
+
+  String() : size{0}, capacity{0} {}
+  String(char const *s) : size{strlen(s)}, capacity{size} {}
+  // StringView::StringView(String const&)
+  //
+  // CHECK: cir.func {{.*}} @_ZN10StringViewC2ERK6String
+  // CHECK:   %0 = cir.alloca !cir.ptr<!rec_StringView>, !cir.ptr<!cir.ptr<!rec_StringView>>, ["this", init] {alignment = 8 : i64}
+  // CHECK:   %1 = cir.alloca !cir.ptr<!rec_String>, !cir.ptr<!cir.ptr<!rec_String>>, ["s", init, const] {alignment = 8 : i64}
+  // CHECK:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_StringView>
+  // CHECK:   cir.store{{.*}} %arg1, %1 : !cir.ptr<!rec_String>
+  // CHECK:   %2 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_StringView>>
+
+  // Get address of `this->size`
+
+  // CHECK:   %3 = cir.get_member %2[0] {name = "size"}
+
+  // Get address of `s`
+
+  // CHECK:   %4 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!rec_String>>
+
+  // Get the address of s.size
+
+  // CHECK:   %5 = cir.get_member %4[0] {name = "size"}
+
+  // Load value from s.size and store in this->size
+
+  // CHECK:   %6 = cir.load{{.*}} %5 : !cir.ptr<!s64i>, !s64i
+  // CHECK:   cir.store{{.*}} %6, %3 : !s64i, !cir.ptr<!s64i>
+  // CHECK:   cir.return
+  // CHECK: }
+
+  // StringView::operator=(StringView&&)
+  //
+  // CHECK: cir.func {{.*}} @_ZN10StringViewaSEOS_
+  // CHECK-SAME:                  special_member<#cir.cxx_assign<!rec_StringView, move>>
+  // CHECK:   %0 = cir.alloca !cir.ptr<!rec_StringView>, !cir.ptr<!cir.ptr<!rec_StringView>>, ["this", init] {alignment = 8 : i64}
+  // CHECK:   %1 = cir.alloca !cir.ptr<!rec_StringView>, !cir.ptr<!cir.ptr<!rec_StringView>>, ["", init, const] {alignment = 8 : i64}
+  // CHECK:   %2 = cir.alloca !cir.ptr<!rec_StringView>, !cir.ptr<!cir.ptr<!rec_StringView>>, ["__retval"] {alignment = 8 : i64}
+  // CHECK:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_StringView>
+  // CHECK:   cir.store{{.*}} %arg1, %1 : !cir.ptr<!rec_StringView>
+  // CHECK:   %3 = cir.load{{.*}} deref %0 : !cir.ptr<!cir.ptr<!rec_StringView>>
+  // CHECK:   %4 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!rec_StringView>>
+  // CHECK:   %5 = cir.get_member %4[0] {name = "size"}
+  // CHECK:   %6 = cir.load{{.*}} %5 : !cir.ptr<!s64i>, !s64i
+  // CHECK:   %7 = cir.get_member %3[0] {name = "size"}
+  // CHECK:   cir.store{{.*}} %6, %7 : !s64i, !cir.ptr<!s64i>
+  // CHECK:   cir.store{{.*}} %3, %2 : !cir.ptr<!rec_StringView>
+  // CHECK:   %8 = cir.load{{.*}} %2 : !cir.ptr<!cir.ptr<!rec_StringView>>
+  // CHECK:   cir.return %8 : !cir.ptr<!rec_StringView>
+  // CHECK: }
+};
+
+struct StringView {
+  long size;
+
+  StringView(const String &s) : size{s.size} {}
+  StringView() : size{0} {}
+};
+
+int main() {
+  StringView sv;
+  {
+    String s = "Hi";
+    sv = s;
+  }
+}
+
+// CHECK: cir.func {{.*}} @main() -> !s32i
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:     %1 = cir.alloca !rec_StringView, !cir.ptr<!rec_StringView>, ["sv", init] {alignment = 8 : i64}
+// CHECK:     cir.call @_ZN10StringViewC2Ev(%1) : (!cir.ptr<!rec_StringView>) -> ()
+// CHECK:     cir.scope {
+// CHECK:       %3 = cir.alloca !rec_String, !cir.ptr<!rec_String>, ["s", init] {alignment = 8 : i64}
+// CHECK:       %4 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 3>>
+// CHECK:       %5 = cir.cast array_to_ptrdecay %4 : !cir.ptr<!cir.array<!s8i x 3>> -> !cir.ptr<!s8i>
+// CHECK:       cir.call @_ZN6StringC2EPKc(%3, %5) : (!cir.ptr<!rec_String>, !cir.ptr<!s8i>) -> ()
+// CHECK:       cir.scope {
+// CHECK:         %6 = cir.alloca !rec_StringView, !cir.ptr<!rec_StringView>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK:         cir.call @_ZN10StringViewC2ERK6String(%6, %3) : (!cir.ptr<!rec_StringView>, !cir.ptr<!rec_String>) -> ()
+// CHECK:         %7 = cir.call @_ZN10StringViewaSEOS_(%1, %6) : (!cir.ptr<!rec_StringView>, !cir.ptr<!rec_StringView>) -> !cir.ptr<!rec_StringView>
+// CHECK:       }
+// CHECK:     }
+// CHECK:     %2 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.return %2 : !s32i
+// CHECK: }
+
+struct HasNonTrivialAssignOp {
+  HasNonTrivialAssignOp &operator=(const HasNonTrivialAssignOp &);
+};
+
+struct ContainsNonTrivial {
+  HasNonTrivialAssignOp start;
+  int i;
+  int *j;
+  HasNonTrivialAssignOp middle;
+  int k : 4;
+  int l : 4;
+  int m : 4;
+  HasNonTrivialAssignOp end;
+  ContainsNonTrivial &operator=(const ContainsNonTrivial &);
+};
+
+// CHECK-LABEL: cir.func {{.*}} @_ZN18ContainsNonTrivialaSERKS_(
+// CHECK-SAME:    special_member<#cir.cxx_assign<!rec_ContainsNonTrivial, copy>>
+// CHECK-NEXT:    %[[#THIS:]] = cir.alloca !cir.ptr<!rec_ContainsNonTrivial>
+// CHECK-NEXT:    %[[#OTHER:]] = cir.alloca !cir.ptr<!rec_ContainsNonTrivial>
+// CHECK-NEXT:    %[[#RETVAL:]] = cir.alloca !cir.ptr<!rec_ContainsNonTrivial>
+// CHECK-NEXT:    cir.store{{.*}} %arg0, %[[#THIS]]
+// CHECK-NEXT:    cir.store{{.*}} %arg1, %[[#OTHER]]
+// CHECK-NEXT:    %[[#THIS_LOAD:]] = cir.load{{.*}} deref %[[#THIS]]
+// CHECK-NEXT:    %[[#THIS_START:]] = cir.get_member %[[#THIS_LOAD]][0] {name = "start"}
+// CHECK-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CHECK-NEXT:    %[[#OTHER_START:]] = cir.get_member %[[#OTHER_LOAD]][0] {name = "start"}
+// CHECK-NEXT:    cir.call @_ZN21HasNonTrivialAssignOpaSERKS_(%[[#THIS_START]], %[[#OTHER_START]])
+// CHECK-NEXT:    %[[#THIS_I:]] = cir.get_member %[[#THIS_LOAD]][2] {name = "i"}
+// CHECK-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CHECK-NEXT:    %[[#OTHER_I:]] = cir.get_member %[[#OTHER_LOAD]][2] {name = "i"}
+// CHECK-NEXT:    %[[#MEMCPY_SIZE:]] = cir.const #cir.int<12> : !u64i
+// CHECK-NEXT:    %[[#THIS_I_CAST:]] = cir.cast bitcast %[[#THIS_I]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CHECK-NEXT:    %[[#OTHER_I_CAST:]] = cir.cast bitcast %[[#OTHER_I]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CHECK-NEXT:    cir.libc.memcpy %[[#MEMCPY_SIZE]] bytes from %[[#OTHER_I_CAST]] to %[[#THIS_I_CAST]]
+// CHECK-NEXT:    %[[#THIS_MIDDLE:]] = cir.get_member %[[#THIS_LOAD]][4] {name = "middle"}
+// CHECK-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CHECK-NEXT:    %[[#OTHER_MIDDLE:]] = cir.get_member %[[#OTHER_LOAD]][4] {name = "middle"}
+// CHECK-NEXT:    cir.call @_ZN21HasNonTrivialAssignOpaSERKS_(%[[#THIS_MIDDLE]], %[[#OTHER_MIDDLE]])
+// CHECK-NEXT:    %[[#THIS_K:]] = cir.get_member %[[#THIS_LOAD]][5] {name = "k"}
+// CHECK-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CHECK-NEXT:    %[[#OTHER_K:]] = cir.get_member %[[#OTHER_LOAD]][5] {name = "k"}
+// CHECK-NEXT:    %[[#MEMCPY_SIZE:]] = cir.const #cir.int<2> : !u64i
+// CHECK-NEXT:    %[[#THIS_K_CAST:]] = cir.cast bitcast %[[#THIS_K]] : !cir.ptr<!u16i> -> !cir.ptr<!void>
+// CHECK-NEXT:    %[[#OTHER_K_CAST:]] = cir.cast bitcast %[[#OTHER_K]] : !cir.ptr<!u16i> -> !cir.ptr<!void>
+// CHECK-NEXT:    cir.libc.memcpy %[[#MEMCPY_SIZE]] bytes from %[[#OTHER_K_CAST]] to %[[#THIS_K_CAST]]
+// CHECK-NEXT:    %[[#THIS_END:]] = cir.get_member %[[#THIS_LOAD]][6] {name = "end"}
+// CHECK-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CHECK-NEXT:    %[[#OTHER_END:]] = cir.get_member %[[#OTHER_LOAD]][6] {name = "end"}
+// CHECK-NEXT:    cir.call @_ZN21HasNonTrivialAssignOpaSERKS_(%[[#THIS_END]], %[[#OTHER_END]])
+// CHECK-NEXT:    cir.store{{.*}} %[[#THIS_LOAD]], %[[#RETVAL]]
+// CHECK-NEXT:    %[[#RETVAL_LOAD:]] = cir.load{{.*}} %[[#RETVAL]]
+// CHECK-NEXT:    cir.return %[[#RETVAL_LOAD]]
+// CHECK-NEXT:  }
+ContainsNonTrivial &
+ContainsNonTrivial::operator=(const ContainsNonTrivial &) = default;
+
+struct Trivial {
+  int i;
+  int *j;
+  double k;
+  int l[3];
+};
+
+// CHECK-LABEL: cir.func {{.*}} @_ZN7TrivialaSERKS_(
+// CHECK-NEXT:    %[[#THIS:]] = cir.alloca !cir.ptr<!rec_Trivial>
+// CHECK-NEXT:    %[[#OTHER:]] = cir.alloca !cir.ptr<!rec_Trivial>
+// CHECK-NEXT:    %[[#RETVAL:]] = cir.alloca !cir.ptr<!rec_Trivial>
+// CHECK-NEXT:    cir.store{{.*}} %arg0, %[[#THIS]]
+// CHECK-NEXT:    cir.store{{.*}} %arg1, %[[#OTHER]]
+// CHECK-NEXT:    %[[#THIS_LOAD:]] = cir.load{{.*}} deref %[[#THIS]]
+// CHECK-NEXT:    %[[#THIS_I:]] = cir.get_member %[[#THIS_LOAD]][0] {name = "i"}
+// CHECK-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CHECK-NEXT:    %[[#OTHER_I:]] = cir.get_member %[[#OTHER_LOAD]][0] {name = "i"}
+// Note that tail padding bytes are not included.
+// CHECK-NEXT:    %[[#MEMCPY_SIZE:]] = cir.const #cir.int<36> : !u64i
+// CHECK-NEXT:    %[[#THIS_I_CAST:]] = cir.cast bitcast %[[#THIS_I]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CHECK-NEXT:    %[[#OTHER_I_CAST:]] = cir.cast bitcast %[[#OTHER_I]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CHECK-NEXT:    cir.libc.memcpy %[[#MEMCPY_SIZE]] bytes from %[[#OTHER_I_CAST]] to %[[#THIS_I_CAST]]
+// CHECK-NEXT:    cir.store{{.*}} %[[#THIS_LOAD]], %[[#RETVAL]]
+// CHECK-NEXT:    %[[#RETVAL_LOAD:]] = cir.load{{.*}} %[[#RETVAL]]
+// CHECK-NEXT:    cir.return %[[#RETVAL_LOAD]]
+// CHECK-NEXT:  }
+
+// We should explicitly call operator= even for trivial types.
+// CHECK-LABEL: cir.func {{.*}} @_Z11copyTrivialR7TrivialS0_(
+// CHECK:         cir.call @_ZN7TrivialaSERKS_(
+void copyTrivial(Trivial &a, Trivial &b) {
+  a = b;
+}
+
+struct ContainsTrivial {
+  Trivial t1;
+  Trivial t2;
+  ContainsTrivial &operator=(const ContainsTrivial &);
+};
+
+// We should explicitly call operator= even for trivial types.
+// CHECK-LABEL: cir.func {{.*}} @_ZN15ContainsTrivialaSERKS_(
+// CHECK-SAME:    special_member<#cir.cxx_assign<!rec_ContainsTrivial, copy>>
+// CHECK:         cir.call @_ZN7TrivialaSERKS_(
+// CHECK:         cir.call @_ZN7TrivialaSERKS_(
+ContainsTrivial &ContainsTrivial::operator=(const ContainsTrivial &) = default;
+
+struct ContainsTrivialArray {
+  Trivial arr[2];
+  ContainsTrivialArray &operator=(const ContainsTrivialArray &);
+};
+
+// We should be calling operator= here but don't currently.
+// CHECK-LABEL: cir.func {{.*}} @_ZN20ContainsTrivialArrayaSERKS_(
+// CHECK-SAME:    special_member<#cir.cxx_assign<!rec_ContainsTrivialArray, copy>>
+// CHECK:         %[[#THIS_LOAD:]] = cir.load{{.*}} deref %[[#]]
+// CHECK-NEXT:    %[[#THIS_ARR:]] = cir.get_member %[[#THIS_LOAD]][0] {name = "arr"}
+// CHECK-NEXT:    %[[#THIS_ARR_CAST:]] = cir.cast bitcast %[[#THIS_ARR]] : !cir.ptr<!cir.array<!rec_Trivial x 2>> -> !cir.ptr<!void>
+// CHECK-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#]]
+// CHECK-NEXT:    %[[#OTHER_ARR:]] = cir.get_member %[[#OTHER_LOAD]][0] {name = "arr"}
+// CHECK-NEXT:    %[[#OTHER_ARR_CAST:]] = cir.cast bitcast %[[#OTHER_ARR]] : !cir.ptr<!cir.array<!rec_Trivial x 2>> -> !cir.ptr<!void>
+// CHECK-NEXT:    %[[#MEMCPY_SIZE:]] = cir.const #cir.int<80> : !u64i
+// CHECK-NEXT:    cir.libc.memcpy %[[#MEMCPY_SIZE]] bytes from %[[#OTHER_ARR_CAST]] to %[[#THIS_ARR_CAST]]
+ContainsTrivialArray &
+ContainsTrivialArray::operator=(const ContainsTrivialArray &) = default;
diff --git a/clang/test/CIR/Incubator/CodeGen/atomic-runtime.cpp b/clang/test/CIR/Incubator/CodeGen/atomic-runtime.cpp
new file mode 100644
index 0000000000000..2e5e4cd15df88
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/atomic-runtime.cpp
@@ -0,0 +1,309 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test __atomic_* built-ins that have a memory order parameter with a runtime
+// value.  This requires generating a switch statement, so the amount of
+// generated code is surprisingly large.
+//
+// Only a representative sample of atomic operations are tested: one read-only
+// operation (atomic_load), one write-only operation (atomic_store), one
+// read-write operation (atomic_exchange), and the most complex operation
+// (atomic_compare_exchange).
+
+int runtime_load(int *ptr, int order) {
+  return __atomic_load_n(ptr, order);
+}
+
+// CHECK: %[[ptr:.*]] = cir.load{{.*}} %[[ptr_var:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[order:.*]] = cir.load{{.*}} %[[order_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.switch (%[[order]] : !s32i) {
+// CHECK: cir.case(default, []) {
+// CHECK:   %[[T8:.*]] = cir.load{{.*}} atomic(relaxed) %[[ptr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store{{.*}} %[[T8]], %[[temp_var:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK:   %[[T8:.*]] = cir.load{{.*}} atomic(acquire) %[[ptr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store{{.*}} %[[T8]], %[[temp_var]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(equal, [#cir.int<5> : !s32i]) {
+// CHECK:   %[[T8:.*]] = cir.load{{.*}} atomic(seq_cst) %[[ptr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store{{.*}} %[[T8]], %[[temp_var]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: }
+
+void atomic_store_n(int* ptr, int val, int order) {
+  __atomic_store_n(ptr, val, order);
+}
+
+// CHECK: %[[ptr:.*]] = cir.load{{.*}} %[[ptr_var:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[order:.*]] = cir.load{{.*}} %[[order_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[val:.*]] = cir.load{{.*}} %[[val_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store{{.*}} %[[val]], %[[temp_var:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK: cir.switch (%[[order]] : !s32i) {
+// CHECK: cir.case(default, []) {
+// CHECK:   %[[T7:.*]] = cir.load{{.*}} %[[temp_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store{{.*}} atomic(relaxed) %[[T7]], %[[ptr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(equal, [#cir.int<3> : !s32i]) {
+// CHECK:   %[[T7:.*]] = cir.load{{.*}} %[[temp_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store{{.*}} atomic(release) %[[T7]], %[[ptr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(equal, [#cir.int<5> : !s32i]) {
+// CHECK:   %[[T7:.*]] = cir.load{{.*}} %[[temp_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store{{.*}} atomic(seq_cst) %[[T7]], %[[ptr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: }
+
+int atomic_exchange_n(int* ptr, int val, int order) {
+  return __atomic_exchange_n(ptr, val, order);
+}
+
+// CHECK: %[[ptr:.*]] = cir.load{{.*}} %[[ptr_var:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[order:.*]] = cir.load{{.*}} %[[order_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[val:.*]] = cir.load{{.*}} %[[val_var:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store{{.*}} %[[val]], %[[temp_var:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK: cir.switch (%[[order]] : !s32i) {
+// CHECK: cir.case(default, []) {
+// CHECK:   %[[T11:.*]] = cir.load{{.*}} %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, relaxed) : !s32i
+// CHECK:   cir.store{{.*}} %[[T12]], %[[result:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK:   %[[T11:.*]] = cir.load{{.*}} %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, acquire) : !s32i
+// CHECK:   cir.store{{.*}} %[[T12]], %[[result]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(equal, [#cir.int<3> : !s32i]) {
+// CHECK:   %[[T11:.*]] = cir.load{{.*}} %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, release) : !s32i
+// CHECK:   cir.store{{.*}} %[[T12]], %[[result]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(equal, [#cir.int<4> : !s32i]) {
+// CHECK:   %[[T11:.*]] = cir.load{{.*}} %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, acq_rel) : !s32i
+// CHECK:   cir.store{{.*}} %[[T12]], %[[result]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(equal, [#cir.int<5> : !s32i]) {
+// CHECK:   %[[T11:.*]] = cir.load{{.*}} %[[temp_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   %[[T12:.*]] = cir.atomic.xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[T11]] : !s32i, seq_cst) : !s32i
+// CHECK:   cir.store{{.*}} %[[T12]], %[[result]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: }
+
+bool atomic_compare_exchange_n(int* ptr, int* expected,
+                               int desired, int success, int failure) {
+  return __atomic_compare_exchange_n(ptr, expected, desired, false,
+                                     success, failure);
+}
+
+// CHECK: %[[ptr:.*]] = cir.load{{.*}} %[[T0:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[success:.*]] = cir.load{{.*}} %[[T3:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[expected_addr:.*]] = cir.load{{.*}} %[[T1:.*]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[T11:.*]] = cir.load{{.*}} %[[T2:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store{{.*}} %[[T11]], %[[desired_var:.*]] : !s32i, !cir.ptr<!s32i>
+// CHECK: %[[failure:.*]] = cir.load{{.*}} %[[T4:.*]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[T13:.*]] = cir.const #false
+// CHECK: cir.switch (%[[success]] : !s32i) {
+// CHECK: cir.case(default, []) {
+// CHECK:   cir.switch (%[[failure]] : !s32i) {
+// CHECK:   cir.case(default, []) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = relaxed, failure = relaxed) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var:.*]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = relaxed, failure = acquire) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(equal, [#cir.int<5> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = relaxed, failure = seq_cst) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   }
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK:   cir.switch (%[[failure]] : !s32i) {
+// CHECK:   cir.case(default, []) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acquire, failure = relaxed) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acquire, failure = acquire) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(equal, [#cir.int<5> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acquire, failure = seq_cst) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   }
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(equal, [#cir.int<3> : !s32i])
+// CHECK:   cir.switch (%[[failure]] : !s32i) {
+// CHECK:   cir.case(default, []) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = release, failure = relaxed) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = release, failure = acquire) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(equal, [#cir.int<5> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = release, failure = seq_cst) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   }
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(equal, [#cir.int<4> : !s32i]) {
+// CHECK:   cir.switch (%[[failure]] : !s32i) {
+// CHECK:   cir.case(default, []) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acq_rel, failure = relaxed) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acq_rel, failure = acquire) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(equal, [#cir.int<5> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = acq_rel, failure = seq_cst) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   }
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: cir.case(equal, [#cir.int<5> : !s32i]) {
+// CHECK:   cir.switch (%[[failure]] : !s32i) {
+// CHECK:   cir.case(default, []) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = seq_cst, failure = relaxed) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = seq_cst, failure = acquire) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   cir.case(equal, [#cir.int<5> : !s32i]) {
+// CHECK:     %[[expected:.*]] = cir.load{{.*}} %[[expected_addr]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[desired:.*]] = cir.load{{.*}} %[[desired_var]] : !cir.ptr<!s32i>, !s32i
+// CHECK:     %old, %cmp = cir.atomic.cmp_xchg(%[[ptr]] : !cir.ptr<!s32i>, %[[expected]] : !s32i, %[[desired]] : !s32i, success = seq_cst, failure = seq_cst) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK:     %[[succeeded:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK:     cir.if %[[succeeded]] {
+// CHECK:       cir.store{{.*}} %old, %[[expected_addr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:     }
+// CHECK:     cir.store{{.*}} %cmp, %[[result_var]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.break
+// CHECK:   }
+// CHECK:   }
+// CHECK:   cir.break
+// CHECK: }
+// CHECK: }
+
diff --git a/clang/test/CIR/Incubator/CodeGen/atomic-thread-fence.c b/clang/test/CIR/Incubator/CodeGen/atomic-thread-fence.c
new file mode 100644
index 0000000000000..372b2133bf3d2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/atomic-thread-fence.c
@@ -0,0 +1,184 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+
+struct Data {
+  int value;
+  void *ptr;
+};
+
+typedef struct Data *DataPtr;
+
+void applyThreadFence() {
+  __atomic_thread_fence(__ATOMIC_SEQ_CST);
+}
+
+// CIR-LABEL: @applyThreadFence
+// CIR:   cir.atomic.fence syncscope(system) seq_cst
+// CIR:   cir.return
+
+// LLVM-LABEL: @applyThreadFence
+// LLVM:    fence seq_cst
+// LLVM:    ret void
+
+void applySignalFence() {
+  __atomic_signal_fence(__ATOMIC_SEQ_CST);
+}
+// CIR-LABEL: @applySignalFence
+// CIR:    cir.atomic.fence syncscope(single_thread) seq_cst
+// CIR:    cir.return
+
+// LLVM-LABEL: @applySignalFence
+// LLVM:    fence syncscope("singlethread") seq_cst
+// LLVM:    ret void
+
+void modifyWithThreadFence(DataPtr d) {
+  __atomic_thread_fence(__ATOMIC_SEQ_CST);
+  d->value = 42;
+}
+// CIR-LABEL: @modifyWithThreadFence
+// CIR:    %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
+// CIR:    cir.atomic.fence syncscope(system) seq_cst
+// CIR:    %[[VAL_42:.*]] = cir.const #cir.int<42> : !s32i
+// CIR:    %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
+// CIR:    %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
+// CIR:    cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
+// CIR:    cir.return
+
+// LLVM-LABEL: @modifyWithThreadFence
+// LLVM:    %[[DATA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:    fence seq_cst
+// LLVM:    %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
+// LLVM:    %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
+// LLVM:    store i32 42, ptr %[[DATA_VALUE]], align 8
+// LLVM:    ret void
+
+void modifyWithSignalFence(DataPtr d) {
+  __atomic_signal_fence(__ATOMIC_SEQ_CST);
+  d->value = 24;
+}
+// CIR-LABEL: @modifyWithSignalFence
+// CIR:    %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
+// CIR:    cir.atomic.fence syncscope(single_thread) seq_cst
+// CIR:    %[[VAL_42:.*]] = cir.const #cir.int<24> : !s32i
+// CIR:    %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
+// CIR:    %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
+// CIR:    cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
+// CIR:    cir.return
+
+// LLVM-LABEL: @modifyWithSignalFence
+// LLVM:    %[[DATA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:    fence syncscope("singlethread") seq_cst
+// LLVM:    %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
+// LLVM:    %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
+// LLVM:    store i32 24, ptr %[[DATA_VALUE]], align 8
+// LLVM:    ret void
+
+void loadWithThreadFence(DataPtr d) {
+  __atomic_thread_fence(__ATOMIC_SEQ_CST);
+  __atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
+}
+// CIR-LABEL: @loadWithThreadFence
+// CIR:    %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
+// CIR:    %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
+// CIR:    cir.atomic.fence syncscope(system) seq_cst
+// CIR:    %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
+// CIR:    %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
+// CIR:    %[[CASTED_DATA_VALUE:.*]] = cir.cast bitcast %[[DATA_VALUE]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
+// CIR:    %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
+// CIR:    %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_VALUE]] : !cir.ptr<!u64i>, !u64i
+// CIR:    cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
+// CIR:    %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
+// CIR:    %[[ATOMIC_LOAD_PTR:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:    cir.return
+
+// LLVM-LABEL: @loadWithThreadFence
+// LLVM:    %[[DATA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:    %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
+// LLVM:    fence seq_cst
+// LLVM:    %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
+// LLVM:    %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
+// LLVM:    %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
+// LLVM:    store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
+// LLVM:    %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
+// LLVM:    ret void
+
+void loadWithSignalFence(DataPtr d) {
+  __atomic_signal_fence(__ATOMIC_SEQ_CST);
+  __atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
+}
+// CIR-LABEL: @loadWithSignalFence
+// CIR:    %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
+// CIR:    %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
+// CIR:    cir.atomic.fence syncscope(single_thread) seq_cst
+// CIR:    %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
+// CIR:    %[[DATA_PTR:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
+// CIR:    %[[CASTED_DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
+// CIR:    %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
+// CIR:    %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_PTR]] : !cir.ptr<!u64i>, !u64i
+// CIR:    cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
+// CIR:    %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
+// CIR:    %[[LOAD_ATOMIC_TEMP:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:    cir.return
+
+// LLVM-LABEL: @loadWithSignalFence
+// LLVM:    %[[DATA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:    %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
+// LLVM:    fence syncscope("singlethread") seq_cst
+// LLVM:    %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
+// LLVM:    %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
+// LLVM:    %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
+// LLVM:    store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
+// LLVM:    %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
+// LLVM:    ret void
+
+// Test C11 atomic fence variants
+void applyC11ThreadFence() {
+  __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+}
+// CIR-LABEL: @applyC11ThreadFence
+// CIR:   cir.atomic.fence syncscope(system) seq_cst
+// CIR:   cir.return
+
+// LLVM-LABEL: @applyC11ThreadFence
+// LLVM:    fence seq_cst
+// LLVM:    ret void
+
+// OGCG-LABEL: @applyC11ThreadFence
+// OGCG:    fence seq_cst
+// OGCG:    ret void
+
+void applyC11SignalFence() {
+  __c11_atomic_signal_fence(__ATOMIC_SEQ_CST);
+}
+// CIR-LABEL: @applyC11SignalFence
+// CIR:   cir.atomic.fence syncscope(single_thread) seq_cst
+// CIR:   cir.return
+
+// LLVM-LABEL: @applyC11SignalFence
+// LLVM:    fence syncscope("singlethread") seq_cst
+// LLVM:    ret void
+
+// OGCG-LABEL: @applyC11SignalFence
+// OGCG:    fence syncscope("singlethread") seq_cst
+// OGCG:    ret void
+
+// Test __sync_synchronize (GCC builtin)
+void applySyncSynchronize() {
+  __sync_synchronize();
+}
+// CIR-LABEL: @applySyncSynchronize
+// CIR:   cir.atomic.fence syncscope(system) seq_cst
+// CIR:   cir.return
+
+// LLVM-LABEL: @applySyncSynchronize
+// LLVM:    fence seq_cst
+// LLVM:    ret void
+
+// OGCG-LABEL: @applySyncSynchronize
+// OGCG:    fence seq_cst
+// OGCG:    ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/atomic-type-casts.cpp b/clang/test/CIR/Incubator/CodeGen/atomic-type-casts.cpp
new file mode 100644
index 0000000000000..58266a60bbe62
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/atomic-type-casts.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.og.ll %s
+
+// Test CK_AtomicToNonAtomic and CK_NonAtomicToAtomic casts
+// Note: Full atomic load/store support is NYI - this tests just the casts
+
+// Test NonAtomicToAtomic cast (assigning non-atomic to atomic)
+void test_non_atomic_to_atomic() {
+  int x = 50;
+  _Atomic int y = x;  // Implicit NonAtomicToAtomic cast
+  // CIR: cir.func{{.*}}test_non_atomic_to_atomicv
+  // CIR: cir.alloca !s32i, !cir.ptr<!s32i>, ["x"
+  // CIR: cir.alloca !s32i, !cir.ptr<!s32i>, ["y"
+  // CIR: cir.load
+  // CIR: cir.store
+  // LLVM-LABEL: @_Z25test_non_atomic_to_atomicv
+  // LLVM: alloca i32
+  // LLVM: alloca i32
+  // LLVM: store i32 50
+  // LLVM: load i32
+  // LLVM: store i32
+  // OGCG-LABEL: @_Z25test_non_atomic_to_atomicv
+  // OGCG: %x = alloca i32
+  // OGCG: %y = alloca i32
+  // OGCG: store i32 50
+}
+
+// Test that atomic type casts don't crash the compiler
+void test_atomic_cast_exists() {
+  int regular = 42;
+  _Atomic int atomic_val = regular;
+  // Just verify this compiles - the cast infrastructure exists
+  // CIR: cir.func{{.*}}test_atomic_cast_existsv
+  // CIR: cir.alloca !s32i, !cir.ptr<!s32i>, ["regular"
+  // CIR: cir.alloca !s32i, !cir.ptr<!s32i>, ["atomic_val"
+  // LLVM-LABEL: @_Z23test_atomic_cast_existsv
+  // LLVM: alloca i32
+  // LLVM: alloca i32
+  // LLVM: store i32 42
+  // OGCG-LABEL: @_Z23test_atomic_cast_existsv
+  // OGCG: %regular = alloca i32
+  // OGCG: %atomic_val = alloca i32
+  // OGCG: store i32 42
+}
+
+// Test with different types
+void test_atomic_float_cast() {
+  float f = 3.14f;
+  _Atomic float g = f;
+  // CIR: cir.func{{.*}}test_atomic_float_castv
+  // CIR: cir.alloca !cir.float
+  // CIR: cir.alloca !cir.float
+  // LLVM-LABEL: @_Z22test_atomic_float_castv
+  // LLVM: alloca float
+  // LLVM: alloca float
+  // LLVM: store float
+  // OGCG-LABEL: @_Z22test_atomic_float_castv
+  // OGCG: %f = alloca float
+  // OGCG: %g = alloca float
+  // OGCG: store float
+}
+
+// Test that cast infrastructure is in place for pointers
+void test_atomic_pointer_cast() {
+  int val = 42;
+  int* ptr = &val;
+  _Atomic(int*) atomic_ptr = ptr;
+  // CIR: cir.func{{.*}}test_atomic_pointer_castv
+  // CIR: cir.alloca !cir.ptr<!s32i>
+  // CIR: cir.alloca !cir.ptr<!s32i>
+  // LLVM-LABEL: @_Z24test_atomic_pointer_castv
+  // LLVM: alloca i32
+  // LLVM: alloca ptr
+  // LLVM: alloca ptr
+  // LLVM: store i32 42
+  // OGCG-LABEL: @_Z24test_atomic_pointer_castv
+  // OGCG: %val = alloca i32
+  // OGCG: %ptr = alloca ptr
+  // OGCG: %atomic_ptr = alloca ptr
+  // OGCG: store i32 42
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/atomic-xchg-field.c b/clang/test/CIR/Incubator/CodeGen/atomic-xchg-field.c
new file mode 100644
index 0000000000000..aca99ec0c3df1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/atomic-xchg-field.c
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+typedef struct __Base {
+  unsigned long id;
+  unsigned int a;
+  unsigned int n;
+  unsigned char x;
+  unsigned short u;
+} Base;
+
+struct w {
+  Base _base;
+  const void * ref;
+};
+
+typedef struct w *wPtr;
+
+void field_access(wPtr item) {
+  __atomic_exchange_n((&item->ref), (((void*)0)), 5);
+}
+
+// CHECK: ![[W:.*]] = !cir.record<struct "w"
+// CHECK-LABEL: @field_access
+// CHECK-NEXT: %[[WADDR:.*]] = cir.alloca !cir.ptr<![[W]]>, {{.*}} {alignment = 8 : i64}
+// CHECK: %[[FIELD:.*]] = cir.load{{.*}} %[[WADDR]]
+// CHECK: %[[MEMBER:.*]] = cir.get_member %[[FIELD]][1] {name = "ref"}
+// CHECK: %[[CASTED_MEMBER:.*]] = cir.cast bitcast %[[MEMBER]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
+// CHECK: cir.atomic.xchg(%[[CASTED_MEMBER]] : !cir.ptr<!u64i>, {{.*}} : !u64i, seq_cst)
+
+// LLVM-LABEL: @field_access
+// LLVM: = alloca ptr, i64 1, align 8
+// LLVM: %[[VAL_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM: %[[RES_ADDR:.*]] = alloca ptr, i64 1, align 8
+
+// LLVM: %[[MEMBER:.*]] = getelementptr %struct.w, ptr {{.*}}, i32 0, i32 1
+// LLVM: store ptr null, ptr %[[VAL_ADDR]], align 8
+// LLVM: %[[VAL:.*]] = load i64, ptr %[[VAL_ADDR]], align 8
+// LLVM: %[[RES:.*]] = atomicrmw xchg ptr %[[MEMBER]], i64 %[[VAL]] seq_cst, align 8
+// LLVM: store i64 %[[RES]], ptr %4, align 8
+// LLVM: load ptr, ptr %[[RES_ADDR]], align 8
+// LLVM: ret void
+
+void structAtomicExchange(unsigned referenceCount, wPtr item) {
+  __atomic_compare_exchange_n((&item->_base.a), (&referenceCount), (referenceCount + 1), 1 , 5, 5);
+}
+
+// CHECK-LABEL: @structAtomicExchange
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, {{.*}} : !u32i, success = seq_cst, failure = seq_cst) syncscope(system) align(8) weak : (!u32i, !cir.bool)
+
+// LLVM-LABEL: @structAtomicExchange
+// LLVM:   load i32
+// LLVM:   add i32
+// LLVM:   store i32
+// LLVM:   %[[EXP:.*]] = load i32
+// LLVM:   %[[DES:.*]] = load i32
+// LLVM:   %[[RES:.*]] = cmpxchg weak ptr %9, i32 %[[EXP]], i32 %[[DES]] seq_cst seq_cst, align 8
+// LLVM:   %[[OLD:.*]] = extractvalue { i32, i1 } %[[RES]], 0
+// LLVM:   %[[CMP:.*]] = extractvalue { i32, i1 } %[[RES]], 1
+// LLVM:   %[[FAIL:.*]] = xor i1 %[[CMP]], true
+// LLVM:   br i1 %[[FAIL]], label %[[STORE_OLD:.*]], label %[[CONTINUE:.*]]
+// LLVM: [[STORE_OLD]]:
+// LLVM:   store i32 %[[OLD]], ptr
+// LLVM:   br label %[[CONTINUE]]
+
+// LLVM: [[CONTINUE]]:
+// LLVM:   %[[Z:.*]] = zext i1 %[[CMP]] to i8
+// LLVM:   store i8 %[[Z]], ptr {{.*}}, align 1
+// LLVM:   ret void
+
+void f2(const void *cf);
+
+void structLoad(unsigned referenceCount, wPtr item) {
+  f2(__atomic_load_n(&item->ref, 5));
+}
+
+// CHECK-LABEL: @structLoad
+// CHECK:    %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"]
+// CHECK:    %[[RES:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
+// CHECK:    %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %6 : !cir.ptr<!u64i>, !u64i
+// CHECK:    cir.store{{.*}} %[[ATOMIC_LOAD]], %[[RES]] : !u64i, !cir.ptr<!u64i>
+
+// No LLVM tests needed for this one, already covered elsewhere.
diff --git a/clang/test/CIR/Incubator/CodeGen/atomic.cpp b/clang/test/CIR/Incubator/CodeGen/atomic.cpp
new file mode 100644
index 0000000000000..47340ae0acc9a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/atomic.cpp
@@ -0,0 +1,1464 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// Available on resource dir.
+#include <stdatomic.h>
+
+typedef struct _a {
+  _Atomic(int) d;
+} at;
+
+void m() { at y; }
+
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+signed long long sll;
+unsigned long long ull;
+
+// CHECK: ![[A:.*]] = !cir.record<struct "_a" {!s32i}>
+
+int basic_binop_fetch(int *i) {
+  return __atomic_add_fetch(i, 1, memory_order_seq_cst);
+}
+
+// CHECK: cir.func {{.*}} @_Z17basic_binop_fetchPi
+// CHECK:  %[[ARGI:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["i", init] {alignment = 8 : i64}
+// CHECK:  %[[ONE_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, [".atomictmp"] {alignment = 4 : i64}
+// CHECK:  cir.store{{.*}} %arg0, %[[ARGI]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:  %[[I:.*]] = cir.load{{.*}} %[[ARGI]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:  %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:  cir.store{{.*}} %[[ONE]], %[[ONE_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:  %[[VAL:.*]] = cir.load{{.*}} %[[ONE_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CHECK:  cir.atomic.fetch(add, %[[I]] : !cir.ptr<!s32i>, %[[VAL]] : !s32i, seq_cst) : !s32i
+
+// LLVM: define dso_local i32 @_Z17basic_binop_fetchPi
+// LLVM: %[[RMW:.*]] = atomicrmw add ptr {{.*}}, i32 %[[VAL:.*]] seq_cst, align 4
+// LLVM: add i32 %[[RMW]], %[[VAL]]
+
+int other_binop_fetch(int *i) {
+  __atomic_sub_fetch(i, 1, memory_order_relaxed);
+  __atomic_and_fetch(i, 1, memory_order_consume);
+  __atomic_or_fetch(i, 1, memory_order_acquire);
+  return __atomic_xor_fetch(i, 1, memory_order_release);
+}
+
+// CHECK: cir.func {{.*}} @_Z17other_binop_fetchPi
+// CHECK: cir.atomic.fetch(sub, {{.*}}, relaxed
+// CHECK: cir.atomic.fetch(and, {{.*}}, acquire
+// CHECK: cir.atomic.fetch(or, {{.*}}, acquire
+// CHECK: cir.atomic.fetch(xor, {{.*}}, release
+
+// LLVM: define dso_local i32 @_Z17other_binop_fetchPi
+// LLVM: %[[RMW_SUB:.*]] = atomicrmw sub ptr {{.*}} monotonic
+// LLVM: sub i32 %[[RMW_SUB]], {{.*}}
+// LLVM: %[[RMW_AND:.*]] = atomicrmw and ptr {{.*}} acquire
+// LLVM: and i32 %[[RMW_AND]], {{.*}}
+// LLVM: %[[RMW_OR:.*]] = atomicrmw or ptr {{.*}} acquire
+// LLVM: or i32 %[[RMW_OR]], {{.*}}
+// LLVM: %[[RMW_XOR:.*]] = atomicrmw xor ptr {{.*}} release
+// LLVM: xor i32 %[[RMW_XOR]], {{.*}}
+
+int nand_binop_fetch(int *i) {
+  return __atomic_nand_fetch(i, 1, memory_order_acq_rel);
+}
+
+// CHECK: cir.func {{.*}} @_Z16nand_binop_fetchPi
+// CHECK: cir.atomic.fetch(nand, {{.*}}, acq_rel
+
+// LLVM: define dso_local i32 @_Z16nand_binop_fetchPi
+// LLVM: %[[RMW_NAND:.*]] = atomicrmw nand ptr {{.*}} acq_rel
+// LLVM: %[[AND:.*]] = and i32 %[[RMW_NAND]]
+// LLVM: = xor i32 %[[AND]], -1
+
+int fp_binop_fetch(float *i) {
+  __atomic_add_fetch(i, 1, memory_order_seq_cst);
+  return __atomic_sub_fetch(i, 1, memory_order_seq_cst);
+}
+
+// CHECK: cir.func {{.*}} @_Z14fp_binop_fetchPf
+// CHECK: cir.atomic.fetch(add,
+// CHECK: cir.atomic.fetch(sub,
+
+// LLVM: define dso_local i32 @_Z14fp_binop_fetchPf
+// LLVM: %[[RMW_FADD:.*]] = atomicrmw fadd ptr
+// LLVM: fadd float %[[RMW_FADD]]
+// LLVM: %[[RMW_FSUB:.*]] = atomicrmw fsub ptr
+// LLVM: fsub float %[[RMW_FSUB]]
+
+int fetch_binop(int *i) {
+  __atomic_fetch_add(i, 1, memory_order_seq_cst);
+  __atomic_fetch_sub(i, 1, memory_order_seq_cst);
+  __atomic_fetch_and(i, 1, memory_order_seq_cst);
+  __atomic_fetch_or(i, 1, memory_order_seq_cst);
+  __atomic_fetch_xor(i, 1, memory_order_seq_cst);
+  return __atomic_fetch_nand(i, 1, memory_order_seq_cst);
+}
+
+// CHECK: cir.func {{.*}} @_Z11fetch_binopPi
+// CHECK: cir.atomic.fetch(add, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(sub, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(and, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(or, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(xor, {{.*}}) fetch_first
+// CHECK: cir.atomic.fetch(nand, {{.*}}) fetch_first
+
+// LLVM: define dso_local i32 @_Z11fetch_binopPi
+// LLVM: atomicrmw add ptr
+// LLVM-NOT: add {{.*}}
+// LLVM: atomicrmw sub ptr
+// LLVM-NOT: sub {{.*}}
+// LLVM: atomicrmw and ptr
+// LLVM-NOT: and {{.*}}
+// LLVM: atomicrmw or ptr
+// LLVM-NOT: or {{.*}}
+// LLVM: atomicrmw xor ptr
+// LLVM-NOT: xor {{.*}}
+// LLVM: atomicrmw nand ptr
+// LLVM-NOT: nand {{.*}}
+
+void min_max_fetch(int *i) {
+  __atomic_fetch_max(i, 1, memory_order_seq_cst);
+  __atomic_fetch_min(i, 1, memory_order_seq_cst);
+  __atomic_max_fetch(i, 1, memory_order_seq_cst);
+  __atomic_min_fetch(i, 1, memory_order_seq_cst);
+}
+
+// CHECK: cir.func {{.*}} @_Z13min_max_fetchPi
+// CHECK: = cir.atomic.fetch(max, {{.*}}) fetch_first
+// CHECK: = cir.atomic.fetch(min, {{.*}}) fetch_first
+// CHECK: = cir.atomic.fetch(max, {{.*}}) : !s32i
+// CHECK: = cir.atomic.fetch(min, {{.*}}) : !s32i
+
+// LLVM: define dso_local void @_Z13min_max_fetchPi
+// LLVM: atomicrmw max ptr
+// LLVM-NOT: icmp {{.*}}
+// LLVM: atomicrmw min ptr
+// LLVM-NOT: icmp {{.*}}
+// LLVM: %[[MAX:.*]] = atomicrmw max ptr
+// LLVM: %[[ICMP_MAX:.*]] = icmp sgt i32 %[[MAX]]
+// LLVM: select i1 %[[ICMP_MAX]], i32 %[[MAX]]
+// LLVM: %[[MIN:.*]] = atomicrmw min ptr
+// LLVM: %[[ICMP_MIN:.*]] = icmp slt i32 %[[MIN]]
+// LLVM: select i1 %[[ICMP_MIN]], i32 %[[MIN]]
+
+int fi1(_Atomic(int) *i) {
+  return __c11_atomic_load(i, memory_order_seq_cst);
+}
+
+// CHECK: cir.func {{.*}} @_Z3fi1PU7_Atomici
+// CHECK: cir.load{{.*}} atomic(seq_cst)
+
+// LLVM-LABEL: @_Z3fi1PU7_Atomici
+// LLVM: load atomic i32, ptr {{.*}} seq_cst, align 4
+
+int fi1a(int *i) {
+  int v;
+  __atomic_load(i, &v, memory_order_seq_cst);
+  return v;
+}
+
+// CHECK-LABEL: @_Z4fi1aPi
+// CHECK: cir.load{{.*}} atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi1aPi
+// LLVM: load atomic i32, ptr {{.*}} seq_cst, align 4
+
+int fi1b(int *i) {
+  return __atomic_load_n(i, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z4fi1bPi
+// CHECK: cir.load{{.*}} atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi1bPi
+// LLVM: load atomic i32, ptr {{.*}} seq_cst, align 4
+
+int fi1c(atomic_int *i) {
+  return atomic_load(i);
+}
+
+// CHECK-LABEL: @_Z4fi1cPU7_Atomici
+// CHECK: cir.load{{.*}} atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi1cPU7_Atomici
+// LLVM: load atomic i32, ptr {{.*}} seq_cst, align 4
+
+void fi2(_Atomic(int) *i) {
+  __c11_atomic_store(i, 1, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z3fi2PU7_Atomici
+// CHECK: cir.store{{.*}} atomic(seq_cst)
+
+// LLVM-LABEL: @_Z3fi2PU7_Atomici
+// LLVM: store atomic i32 {{.*}} seq_cst, align 4
+
+void fi2a(int *i) {
+  int v = 1;
+  __atomic_store(i, &v, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z4fi2aPi
+// CHECK: cir.store{{.*}} atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi2aPi
+// LLVM: store atomic i32 {{.*}} seq_cst, align 4
+
+void fi2b(int *i) {
+  __atomic_store_n(i, 1, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z4fi2bPi
+// CHECK: cir.store{{.*}} atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi2bPi
+// LLVM: store atomic i32 {{.*}} seq_cst, align 4
+
+void fi2c(atomic_int *i) {
+  atomic_store(i, 1);
+}
+
+struct S {
+  double x;
+};
+
+// CHECK-LABEL: @_Z4fi2cPU7_Atomici
+// CHECK: cir.store{{.*}} atomic(seq_cst)
+
+// LLVM-LABEL: @_Z4fi2cPU7_Atomici
+// LLVM: store atomic i32 {{.*}} seq_cst, align 4
+
+void fd3(struct S *a, struct S *b, struct S *c) {
+  __atomic_exchange(a, b, c, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z3fd3P1SS0_S0_
+// CHECK: cir.atomic.xchg({{.*}} : !cir.ptr<!u64i>, {{.*}} : !u64i, seq_cst) : !u64i
+
+// FIXME: CIR is producing an over alignment of 8, only 4 needed.
+// LLVM-LABEL: @_Z3fd3P1SS0_S0_
+// LLVM:      [[A_ADDR:%.*]] = alloca ptr
+// LLVM-NEXT: [[B_ADDR:%.*]] = alloca ptr
+// LLVM-NEXT: [[C_ADDR:%.*]] = alloca ptr
+// LLVM-NEXT: store ptr {{.*}}, ptr [[A_ADDR]]
+// LLVM-NEXT: store ptr {{.*}}, ptr [[B_ADDR]]
+// LLVM-NEXT: store ptr {{.*}}, ptr [[C_ADDR]]
+// LLVM-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]]
+// LLVM-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]]
+// LLVM-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]]
+// LLVM-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]]
+// LLVM-NEXT: [[RESULT:%.*]] = atomicrmw xchg ptr [[LOAD_A_PTR]], i64 [[LOAD_B]] seq_cst
+// LLVM-NEXT: store i64 [[RESULT]], ptr [[LOAD_C_PTR]]
+
+bool fd4(struct S *a, struct S *b, struct S *c) {
+  return __atomic_compare_exchange(a, b, c, 1, 5, 5);
+}
+
+// CHECK-LABEL: @_Z3fd4P1SS0_S0_
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!u64i>, {{.*}} : !u64i, {{.*}} : !u64i, success = seq_cst, failure = seq_cst) syncscope(system) align(8) weak : (!u64i, !cir.bool)
+
+// LLVM-LABEL: @_Z3fd4P1SS0_S0_
+// LLVM: cmpxchg weak ptr {{.*}}, i64 {{.*}}, i64 {{.*}} seq_cst seq_cst, align 8
+
+bool fi4a(int *i) {
+  int cmp = 0;
+  int desired = 1;
+  return __atomic_compare_exchange(i, &cmp, &desired, false, memory_order_acquire, memory_order_acquire);
+}
+
+// CHECK-LABEL: @_Z4fi4aPi
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s32i>, {{.*}} : !s32i, {{.*}} : !s32i, success = acquire, failure = acquire) syncscope(system) align(4) : (!s32i, !cir.bool)
+
+// LLVM-LABEL: @_Z4fi4aPi
+// LLVM: %[[RES:.*]] = cmpxchg ptr %7, i32 %8, i32 %9 acquire acquire, align 4
+// LLVM: extractvalue { i32, i1 } %[[RES]], 0
+// LLVM: extractvalue { i32, i1 } %[[RES]], 1
+
+bool fi4b(int *i) {
+  int cmp = 0;
+  return __atomic_compare_exchange_n(i, &cmp, 1, true, memory_order_acquire, memory_order_acquire);
+}
+
+// CHECK-LABEL: @_Z4fi4bPi
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s32i>, {{.*}} : !s32i, {{.*}} : !s32i, success = acquire, failure = acquire) syncscope(system) align(4) weak : (!s32i, !cir.bool)
+
+// LLVM-LABEL: @_Z4fi4bPi
+// LLVM: %[[R:.*]] = cmpxchg weak ptr {{.*}}, i32 {{.*}}, i32 {{.*}} acquire acquire, align 4
+// LLVM: extractvalue { i32, i1 } %[[R]], 0
+// LLVM: extractvalue { i32, i1 } %[[R]], 1
+
+bool fi4c(atomic_int *i) {
+  int cmp = 0;
+  return atomic_compare_exchange_strong(i, &cmp, 1);
+}
+
+// CHECK-LABEL: @_Z4fi4cPU7_Atomici
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s32i>, {{.*}} : !s32i, {{.*}} : !s32i, success = seq_cst, failure = seq_cst) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK: %[[CMP:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK: cir.if %[[CMP:.*]] {
+// CHECK:   cir.store{{.*}} %old, {{.*}} : !s32i, !cir.ptr<!s32i>
+// CHECK: }
+
+// LLVM-LABEL: @_Z4fi4cPU7_Atomici
+// LLVM: cmpxchg ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
+
+bool fi4d(atomic_int *i) {
+  int cmp = 0;
+  return atomic_compare_exchange_weak(i, &cmp, 1);
+}
+
+// CHECK-LABEL: @_Z4fi4dPU7_Atomici
+// CHECK: %old, %cmp = cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s32i>, {{.*}} : !s32i, {{.*}} : !s32i, success = seq_cst, failure = seq_cst) syncscope(system) align(4) weak : (!s32i, !cir.bool)
+// CHECK: %[[CMP:.*]] = cir.unary(not, %cmp) : !cir.bool, !cir.bool
+// CHECK: cir.if %[[CMP:.*]] {
+// CHECK:   cir.store{{.*}} %old, {{.*}} : !s32i, !cir.ptr<!s32i>
+// CHECK: }
+
+// LLVM-LABEL: @_Z4fi4dPU7_Atomici
+// LLVM: cmpxchg weak ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
+
+bool fsb(bool *c) {
+  return __atomic_exchange_n(c, 1, memory_order_seq_cst);
+}
+
+// CHECK-LABEL: @_Z3fsbPb
+// CHECK: cir.atomic.xchg({{.*}} : !cir.ptr<!u8i>, {{.*}} : !u8i, seq_cst) : !u8i
+
+// LLVM-LABEL: @_Z3fsbPb
+// LLVM: atomicrmw xchg ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+
+void atomicinit(void)
+{
+  _Atomic(unsigned int) j = 12;
+  __c11_atomic_init(&j, 1);
+}
+
+// CHECK-LABEL: @_Z10atomicinitv
+// CHECK: %[[ADDR:.*]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["j"
+// CHECK: cir.store{{.*}} {{.*}}, %[[ADDR]] : !u32i, !cir.ptr<!u32i>
+// CHECK: cir.store{{.*}} {{.*}}, %[[ADDR]] : !u32i, !cir.ptr<!u32i>
+
+// LLVM-LABEL: @_Z10atomicinitv
+// LLVM: %[[ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: store i32 12, ptr %[[ADDR]], align 4
+// LLVM: store i32 1, ptr %[[ADDR]], align 4
+
+void incdec() {
+  _Atomic(unsigned int) j = 12;
+  __c11_atomic_fetch_add(&j, 1, 0);
+  __c11_atomic_fetch_sub(&j, 1, 0);
+}
+
+// CHECK-LABEL: @_Z6incdecv
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, relaxed) fetch_first
+// CHECK: cir.atomic.fetch(sub, {{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, relaxed) fetch_first
+
+// LLVM-LABEL: @_Z6incdecv
+// LLVM: atomicrmw add ptr {{.*}}, i32 {{.*}} monotonic, align 4
+// LLVM: atomicrmw sub ptr {{.*}}, i32 {{.*}} monotonic, align 4
+
+void inc_int(int* a, int b) {
+  int c = __sync_fetch_and_add(a, b);
+}
+// CHECK-LABEL: @_Z7inc_int
+// CHECK: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[VAL:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[RES:.*]] = cir.atomic.fetch(add, %[[PTR]] : !cir.ptr<!s32i>, %[[VAL]] : !s32i, seq_cst) fetch_first : !s32i
+// CHECK: cir.store{{.*}} %[[RES]], {{.*}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @_Z7inc_int
+// LLVM: atomicrmw add ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+
+void sub_int(int* a, int b) {
+  int c = __sync_fetch_and_sub(a, b);
+}
+
+// CHECK-LABEL: _Z7sub_int
+// CHECK: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[VAL:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[RES:.*]] = cir.atomic.fetch(sub, %[[PTR]] : !cir.ptr<!s32i>, %[[VAL]] : !s32i, seq_cst) fetch_first : !s32i
+// CHECK: cir.store{{.*}} %[[RES]], {{.*}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: _Z7sub_int
+// LLVM: atomicrmw sub ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+
+
+// CHECK-LABEL: @_Z8inc_long
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!s64i>, {{.*}} : !s64i, seq_cst) fetch_first : !s64i
+
+// LLVM-LABEL: @_Z8inc_long
+// LLVM: atomicrmw add ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+
+void inc_long(long* a, long b) {
+  long c = __sync_fetch_and_add(a, 2);
+}
+
+// CHECK-LABEL: @_Z8sub_long
+// CHECK: cir.atomic.fetch(sub, {{.*}} : !cir.ptr<!s64i>, {{.*}} : !s64i, seq_cst) fetch_first : !s64i
+
+// LLVM-LABEL: @_Z8sub_long
+// LLVM: atomicrmw sub ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+
+void sub_long(long* a, long b) {
+  long c = __sync_fetch_and_sub(a, 2);
+}
+
+
+// CHECK-LABEL: @_Z9inc_short
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!s16i>, {{.*}} : !s16i, seq_cst) fetch_first : !s16i
+
+// LLVM-LABEL: @_Z9inc_short
+// LLVM: atomicrmw add ptr {{.*}}, i16 {{.*}} seq_cst, align 2
+void inc_short(short* a, short b) {
+  short c = __sync_fetch_and_add(a, 2);
+}
+
+// CHECK-LABEL: @_Z9sub_short
+// CHECK: cir.atomic.fetch(sub, {{.*}} : !cir.ptr<!s16i>, {{.*}} : !s16i, seq_cst) fetch_first : !s16i
+
+// LLVM-LABEL: @_Z9sub_short
+// LLVM: atomicrmw sub ptr {{.*}}, i16 {{.*}} seq_cst, align 2
+void sub_short(short* a, short b) {
+  short c = __sync_fetch_and_sub(a, 2);
+}
+
+
+// CHECK-LABEL: @_Z8inc_byte
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!s8i>, {{.*}} : !s8i, seq_cst) fetch_first : !s8i
+
+// LLVM-LABEL: @_Z8inc_byte
+// LLVM: atomicrmw add ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void inc_byte(char* a, char b) {
+  char c = __sync_fetch_and_add(a, b);
+}
+
+// CHECK-LABEL: @_Z8sub_byte
+// CHECK: cir.atomic.fetch(sub, {{.*}} : !cir.ptr<!s8i>, {{.*}} : !s8i, seq_cst) fetch_first : !s8i
+
+// LLVM-LABEL: @_Z8sub_byte
+// LLVM: atomicrmw sub ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void sub_byte(char* a, char b) {
+  char c = __sync_fetch_and_sub(a, b);
+}
+// CHECK-LABEL: @_Z12cmp_bool_int
+// CHECK: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[CMP:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[UPD:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[OLD:.*]], %[[RES:.*]] = cir.atomic.cmp_xchg(%[[PTR]] : !cir.ptr<!s32i>, %[[CMP]] : !s32i, %[[UPD]] : !s32i, success = seq_cst, failure = seq_cst) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK: cir.store{{.*}} %[[RES]], {{.*}} : !cir.bool, !cir.ptr<!cir.bool>
+
+// LLVM-LABEL: @_Z12cmp_bool_int
+// LLVM: %[[PTR:.*]] = load ptr
+// LLVM: %[[CMP:.*]] = load i32
+// LLVM: %[[UPD:.*]] = load i32
+// LLVM: %[[RES:.*]] = cmpxchg ptr %[[PTR]], i32 %[[CMP]], i32 %[[UPD]] seq_cst seq_cst, align 4
+// LLVM: %[[TMP:.*]] = extractvalue { i32, i1 } %[[RES]], 1
+// LLVM: %[[EXT:.*]] = zext i1 %[[TMP]] to i8
+// LLVM: store i8 %[[EXT]], ptr {{.*}}
+void cmp_bool_int(int* p, int x, int u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+
+// CHECK-LABEL: @_Z13cmp_bool_long
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s64i>, {{.*}} : !s64i, {{.*}} : !s64i, success = seq_cst, failure = seq_cst) syncscope(system) align(8) : (!s64i, !cir.bool)
+
+// LLVM-LABEL: @_Z13cmp_bool_long
+// LLVM: cmpxchg ptr {{.*}}, i64 {{.*}}, i64 {{.*}} seq_cst seq_cst, align 8
+void cmp_bool_long(long* p, long x, long u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z14cmp_bool_short
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s16i>, {{.*}} : !s16i, {{.*}} : !s16i, success = seq_cst, failure = seq_cst) syncscope(system) align(2) : (!s16i, !cir.bool)
+
+// LLVM-LABEL: @_Z14cmp_bool_short
+// LLVM: cmpxchg ptr {{.*}}, i16 {{.*}}, i16 {{.*}} seq_cst seq_cst, align 2
+void cmp_bool_short(short* p, short x, short u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z13cmp_bool_byte
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s8i>, {{.*}} : !s8i, {{.*}} : !s8i, success = seq_cst, failure = seq_cst) syncscope(system) align(1) : (!s8i, !cir.bool)
+
+// LLVM-LABEL: @_Z13cmp_bool_byte
+// LLVM: cmpxchg ptr {{.*}}, i8 {{.*}}, i8 {{.*}} seq_cst seq_cst, align 1
+void cmp_bool_byte(char* p, char x, char u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z11cmp_val_int
+// CHECK: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[CMP:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[UPD:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[OLD:.*]], %[[RES:.*]] = cir.atomic.cmp_xchg(%[[PTR]] : !cir.ptr<!s32i>, %[[CMP]] : !s32i, %[[UPD]] : !s32i, success = seq_cst, failure = seq_cst) syncscope(system) align(4) : (!s32i, !cir.bool)
+// CHECK: cir.store{{.*}} %[[OLD]], {{.*}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @_Z11cmp_val_int
+// LLVM: %[[PTR:.*]] = load ptr
+// LLVM: %[[CMP:.*]] = load i32
+// LLVM: %[[UPD:.*]] = load i32
+// LLVM: %[[RES:.*]] = cmpxchg ptr %[[PTR]], i32 %[[CMP]], i32 %[[UPD]] seq_cst seq_cst, align 4
+// LLVM: %[[TMP:.*]] = extractvalue { i32, i1 } %[[RES]], 0
+// LLVM: store i32 %[[TMP]], ptr {{.*}}
+void cmp_val_int(int* p, int x, int u) {
+  int r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z12cmp_val_long
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s64i>, {{.*}} : !s64i, {{.*}} : !s64i, success = seq_cst, failure = seq_cst) syncscope(system) align(8) : (!s64i, !cir.bool)
+
+// LLVM-LABEL: @_Z12cmp_val_long
+// LLVM: cmpxchg ptr {{.*}}, i64 {{.*}}, i64 {{.*}} seq_cst seq_cst, align 8
+void cmp_val_long(long* p, long x, long u) {
+  long r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z13cmp_val_short
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s16i>, {{.*}} : !s16i, {{.*}} : !s16i, success = seq_cst, failure = seq_cst) syncscope(system) align(2) : (!s16i, !cir.bool)
+
+// LLVM-LABEL: @_Z13cmp_val_short
+// LLVM: cmpxchg ptr {{.*}}, i16 {{.*}}, i16 {{.*}} seq_cst seq_cst, align 2
+void cmp_val_short(short* p, short x, short u) {
+  short r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z12cmp_val_byte
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!s8i>, {{.*}} : !s8i, {{.*}} : !s8i, success = seq_cst, failure = seq_cst) syncscope(system) align(1) : (!s8i, !cir.bool)
+
+// LLVM-LABEL: @_Z12cmp_val_byte
+// LLVM: cmpxchg ptr {{.*}}, i8 {{.*}}, i8 {{.*}} seq_cst seq_cst, align 1
+void cmp_val_byte(char* p, char x, char u) {
+  char r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z8inc_uint
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, seq_cst) fetch_first : !u32i
+
+// LLVM-LABEL: @_Z8inc_uint
+// LLVM: atomicrmw add ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+void inc_uint(unsigned int* a, int b) {
+  unsigned int c = __sync_fetch_and_add(a, b);
+}
+
+// CHECK-LABEL: @_Z8sub_uint
+// CHECK: cir.atomic.fetch(sub, {{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, seq_cst) fetch_first : !u32i
+
+// LLVM-LABEL: @_Z8sub_uint
+// LLVM: atomicrmw sub ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+void sub_uint(unsigned int* a, int b) {
+  unsigned int c = __sync_fetch_and_sub(a, b);
+}
+
+// CHECK-LABEL: @_Z9inc_ulong
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!u64i>, {{.*}} : !u64i, seq_cst) fetch_first : !u64i
+
+// LLVM-LABEL: @_Z9inc_ulong
+// LLVM: atomicrmw add ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+void inc_ulong(unsigned long* a, long b) {
+  unsigned long c = __sync_fetch_and_add(a, b);
+}
+
+// CHECK-LABEL: @_Z9sub_ulong
+// CHECK: cir.atomic.fetch(sub, {{.*}} : !cir.ptr<!u64i>, {{.*}} : !u64i, seq_cst) fetch_first : !u64i
+
+// LLVM-LABEL: @_Z9sub_ulong
+// LLVM: atomicrmw sub ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+void sub_ulong(unsigned long* a, long b) {
+  unsigned long c = __sync_fetch_and_sub(a, b);
+}
+
+
+// CHECK-LABEL: @_Z9inc_uchar
+// CHECK: cir.atomic.fetch(add, {{.*}} : !cir.ptr<!u8i>, {{.*}} : !u8i, seq_cst) fetch_first : !u8i
+
+// LLVM-LABEL: @_Z9inc_uchar
+// LLVM: atomicrmw add ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void inc_uchar(unsigned char* a, char b) {
+  unsigned char c = __sync_fetch_and_add(a, b);
+}
+
+// CHECK-LABEL: @_Z9sub_uchar
+// CHECK: cir.atomic.fetch(sub, {{.*}} : !cir.ptr<!u8i>, {{.*}} : !u8i, seq_cst) fetch_first : !u8i
+
+// LLVM-LABEL: @_Z9sub_uchar
+// LLVM: atomicrmw sub ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void sub_uchar(unsigned char* a, char b) {
+  unsigned char c = __sync_fetch_and_sub(a, b);
+}
+
+// CHECK-LABEL: @_Z13cmp_bool_uint
+// CHECK: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CHECK: %[[CMP:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[CMP_U:.*]] = cir.cast integral %[[CMP]] : !s32i -> !u32i
+// CHECK: %[[UPD:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[UPD_U:.*]] = cir.cast integral %[[UPD]] : !s32i -> !u32i
+// CHECK: %[[OLD:.*]], %[[RES:.*]] = cir.atomic.cmp_xchg(%[[PTR]] : !cir.ptr<!u32i>, %[[CMP_U]] :
+// CHECK-SAME: !u32i, %[[UPD_U]] : !u32i, success = seq_cst, failure = seq_cst) syncscope(system) align(4) : (!u32i, !cir.bool)
+// CHECK: cir.store{{.*}} %[[RES]], {{.*}} : !cir.bool, !cir.ptr<!cir.bool>
+
+// LLVM-LABEL: @_Z13cmp_bool_uint
+// LLVM: %[[PTR:.*]] = load ptr
+// LLVM: %[[CMP:.*]] = load i32
+// LLVM: %[[UPD:.*]] = load i32
+// LLVM: %[[RES:.*]] = cmpxchg ptr %[[PTR]], i32 %[[CMP]], i32 %[[UPD]] seq_cst seq_cst, align 4
+// LLVM: %[[TMP:.*]] = extractvalue { i32, i1 } %[[RES]], 1
+// LLVM: %[[EXT:.*]] = zext i1 %[[TMP]] to i8
+// LLVM: store i8 %[[EXT]], ptr {{.*}}
+void cmp_bool_uint(unsigned int* p, int x, int u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z15cmp_bool_ushort
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!u16i>, {{.*}} : !u16i, {{.*}} : !u16i, success = seq_cst, failure = seq_cst) syncscope(system) align(2) : (!u16i, !cir.bool)
+
+// LLVM-LABEL: @_Z15cmp_bool_ushort
+// LLVM: cmpxchg ptr {{.*}}, i16 {{.*}}, i16 {{.*}} seq_cst seq_cst, align 2
+void cmp_bool_ushort(unsigned short* p, short x, short u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z14cmp_bool_ulong
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!u64i>, {{.*}} : !u64i, {{.*}} : !u64i, success = seq_cst, failure = seq_cst) syncscope(system) align(8) : (!u64i, !cir.bool)
+
+// LLVM-LABEL: @_Z14cmp_bool_ulong
+// LLVM: cmpxchg ptr {{.*}}, i64 {{.*}}, i64 {{.*}} seq_cst seq_cst, align 8
+void cmp_bool_ulong(unsigned long* p, long x, long u) {
+  bool r = __sync_bool_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z12cmp_val_uint
+// CHECK: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CHECK: %[[CMP:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[CMP_U:.*]] = cir.cast integral %[[CMP]] : !s32i -> !u32i
+// CHECK: %[[UPD:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[UPD_U:.*]] = cir.cast integral %[[UPD]] : !s32i -> !u32i
+// CHECK: %[[OLD:.*]], %[[RES:.*]] = cir.atomic.cmp_xchg(%[[PTR]] : !cir.ptr<!u32i>, %[[CMP_U]] :
+// CHECK-SAME: !u32i, %[[UPD_U]] : !u32i, success = seq_cst, failure = seq_cst) syncscope(system) align(4) : (!u32i, !cir.bool)
+// CHECK: %[[R:.*]] = cir.cast integral %[[OLD]] : !u32i -> !s32i
+// CHECK: cir.store{{.*}} %[[R]], {{.*}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @_Z12cmp_val_uint
+// LLVM: %[[PTR:.*]] = load ptr
+// LLVM: %[[CMP:.*]] = load i32
+// LLVM: %[[UPD:.*]] = load i32
+// LLVM: %[[RES:.*]] = cmpxchg ptr %[[PTR]], i32 %[[CMP]], i32 %[[UPD]] seq_cst seq_cst, align 4
+// LLVM: %[[TMP:.*]] = extractvalue { i32, i1 } %[[RES]], 0
+// LLVM: store i32 %[[TMP]], ptr {{.*}}
+void cmp_val_uint(unsigned int* p, int x, int u) {
+  int r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z14cmp_val_ushort
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!u16i>, {{.*}} : !u16i, {{.*}} : !u16i, success = seq_cst, failure = seq_cst) syncscope(system) align(2) : (!u16i, !cir.bool)
+
+// LLVM-LABEL: @_Z14cmp_val_ushort
+// LLVM: cmpxchg ptr {{.*}}, i16 {{.*}}, i16 {{.*}} seq_cst seq_cst, align 2
+void cmp_val_ushort(unsigned short* p, short x, short u) {
+  short r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @_Z13cmp_val_ulong
+// CHECK: cir.atomic.cmp_xchg({{.*}} : !cir.ptr<!u64i>, {{.*}} : !u64i, {{.*}} : !u64i, success = seq_cst, failure = seq_cst) syncscope(system) align(8) : (!u64i, !cir.bool)
+
+// LLVM-LABEL: @_Z13cmp_val_ulong
+// LLVM: cmpxchg ptr {{.*}}, i64 {{.*}}, i64 {{.*}} seq_cst seq_cst, align 8
+void cmp_val_ulong(unsigned long* p, long x, long u) {
+  long r = __sync_val_compare_and_swap(p, x, u);
+}
+
+// CHECK-LABEL: @test_op_and_fetch
+// LLVM-LABEL: @test_op_and_fetch
+extern "C" void test_op_and_fetch(void)
+{
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(add, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw add ptr @sc, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr @sc, align 1
+  sc = __sync_add_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(add, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw add ptr @uc, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = add i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr @uc, align 1
+  uc = __sync_add_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(add, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw add ptr @ss, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = add i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr @ss, align 2
+  ss = __sync_add_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(add, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw add ptr @us, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = add i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr @us
+  us = __sync_add_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(add, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw add ptr @si, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = add i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr @si, align 4
+  si = __sync_add_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(add, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw add ptr @ui, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = add i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr @ui, align 4
+  ui = __sync_add_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(add, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw add ptr @sll, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = add i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr @sll, align 8
+  sll = __sync_add_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(add, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw add ptr @ull, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = add i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr @ull, align 8
+  ull = __sync_add_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(sub, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw sub ptr @sc, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = sub i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr @sc, align 1
+  sc = __sync_sub_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(sub, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw sub ptr @uc, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = sub i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr @uc, align 1
+  uc = __sync_sub_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(sub, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw sub ptr @ss, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = sub i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr @ss, align 2
+  ss = __sync_sub_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(sub, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw sub ptr @us, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = sub i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr @us
+  us = __sync_sub_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(sub, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw sub ptr @si, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = sub i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr @si, align 4
+  si = __sync_sub_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(sub, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw sub ptr @ui, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = sub i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr @ui, align 4
+  ui = __sync_sub_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(sub, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw sub ptr @sll, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = sub i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr @sll, align 8
+  sll = __sync_sub_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(sub, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw sub ptr @ull, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = sub i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr @ull, align 8
+  ull = __sync_sub_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(and, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw and ptr @sc, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = and i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr @sc, align 1
+  sc = __sync_and_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(and, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw and ptr @uc, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = and i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr @uc, align 1
+  uc = __sync_and_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(and, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw and ptr @ss, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = and i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr @ss, align 2
+  ss = __sync_and_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(and, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw and ptr @us, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = and i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr @us
+  us = __sync_and_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(and, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw and ptr @si, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = and i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr @si, align 4
+  si = __sync_and_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(and, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw and ptr @ui, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = and i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr @ui, align 4
+  ui = __sync_and_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(and, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw and ptr @sll, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = and i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr @sll, align 8
+  sll = __sync_and_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(and, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw and ptr @ull, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = and i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr @ull, align 8
+  ull = __sync_and_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(or, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw or ptr @sc, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = or i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr @sc, align 1
+  sc = __sync_or_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(or, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw or ptr @uc, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = or i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr @uc, align 1
+  uc = __sync_or_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(or, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw or ptr @ss, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = or i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr @ss, align 2
+  ss = __sync_or_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(or, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw or ptr @us, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = or i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr @us
+  us = __sync_or_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(or, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw or ptr @si, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = or i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr @si, align 4
+  si = __sync_or_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(or, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw or ptr @ui, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = or i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr @ui, align 4
+  ui = __sync_or_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(or, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw or ptr @sll, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = or i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr @sll, align 8
+  sll = __sync_or_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(or, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw or ptr @ull, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = or i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr @ull, align 8
+  ull = __sync_or_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(xor, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw xor ptr @sc, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = xor i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr @sc, align 1
+  sc = __sync_xor_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(xor, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw xor ptr @uc, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = xor i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr @uc, align 1
+  uc = __sync_xor_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(xor, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw xor ptr @ss, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = xor i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr @ss, align 2
+  ss = __sync_xor_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(xor, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw xor ptr @us, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = xor i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr @us
+  us = __sync_xor_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(xor, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw xor ptr @si, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = xor i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr @si, align 4
+  si = __sync_xor_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(xor, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw xor ptr @ui, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = xor i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr @ui, align 4
+  ui = __sync_xor_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(xor, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw xor ptr @sll, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = xor i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr @sll, align 8
+  sll = __sync_xor_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(xor, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw xor ptr @ull, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = xor i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr @ull, align 8
+  ull = __sync_xor_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[INTERM0:%.*]] = cir.binop(and, [[RES0]], [[VAL0]]) : !s8i
+  // CHECK: [[RET0:%.*]] =  cir.unary(not, [[INTERM0]]) : !s8i, !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw nand ptr @sc, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[INTERM0:%.*]] = and i8 [[RES0]], [[VAL0]]
+  // LLVM:  [[RET0:%.*]] = xor i8 [[INTERM0]], -1
+  // LLVM:  store i8 [[RET0]], ptr @sc, align 1
+  sc = __sync_nand_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[INTERM1:%.*]] = cir.binop(and, [[RES1]], [[VAL1]]) : !u8i
+  // CHECK: [[RET1:%.*]] = cir.unary(not, [[INTERM1]]) : !u8i, !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw nand ptr @uc, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[INTERM1:%.*]] = and i8 [[RES1]], [[VAL1]]
+  // LLVM:  [[RET1:%.*]] = xor i8 [[INTERM1]], -1
+  // LLVM:  store i8 [[RET1]], ptr @uc, align 1
+  uc = __sync_nand_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[INTERM2:%.*]] = cir.binop(and, [[RES2]], [[VAL2]]) : !s16i
+  // CHECK: [[RET2:%.*]] =  cir.unary(not, [[INTERM2]]) : !s16i, !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw nand ptr @ss, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[INTERM2:%.*]] = and i16 [[RES2]], [[CONV2]]
+  // LLVM:  [[RET2:%.*]] = xor i16 [[INTERM2]], -1
+  // LLVM:  store i16 [[RET2]], ptr @ss, align 2
+  ss = __sync_nand_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[INTERM3:%.*]] = cir.binop(and, [[RES3]], [[VAL3]]) : !u16i
+  // CHECK: [[RET3:%.*]] =  cir.unary(not, [[INTERM3]]) : !u16i, !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw nand ptr @us, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[INTERM3:%.*]] = and i16 [[RES3]], [[CONV3]]
+  // LLVM:  [[RET3:%.*]] = xor i16 [[INTERM3]], -1
+  // LLVM:  store i16 [[RET3]], ptr @us, align 2
+  us = __sync_nand_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[INTERM4:%.*]] = cir.binop(and, [[RES4]], [[VAL4]]) : !s32i
+  // CHECK: [[RET4:%.*]] =  cir.unary(not, [[INTERM4]]) : !s32i, !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw nand ptr @si, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[INTERM4:%.*]] = and i32 [[RES4]], [[CONV4]]
+  // LLVM:  [[RET4:%.*]] = xor i32 [[INTERM4]], -1
+  // LLVM:  store i32 [[RET4]], ptr @si, align 4
+  si = __sync_nand_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[INTERM5:%.*]] = cir.binop(and, [[RES5]], [[VAL5]]) : !u32i
+  // CHECK: [[RET5:%.*]] =  cir.unary(not, [[INTERM5]]) : !u32i, !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw nand ptr @ui, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[INTERM5:%.*]] = and i32 [[RES5]], [[CONV5]]
+  // LLVM:  [[RET5:%.*]] = xor i32 [[INTERM5]], -1
+  // LLVM:  store i32 [[RET5]], ptr @ui, align 4
+  ui = __sync_nand_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[INTERM6:%.*]] = cir.binop(and, [[RES6]], [[VAL6]]) : !s64i
+  // CHECK: [[RET6:%.*]] =  cir.unary(not, [[INTERM6]]) : !s64i, !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw nand ptr @sll, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[INTERM6:%.*]] = and i64 [[RES6]], [[CONV6]]
+  // LLVM:  [[RET6:%.*]] = xor i64 [[INTERM6]], -1
+  // LLVM:  store i64 [[RET6]], ptr @sll, align 8
+  sll = __sync_nand_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[INTERM7:%.*]] = cir.binop(and, [[RES7]], [[VAL7]]) : !u64i
+  // CHECK: [[RET7:%.*]] =  cir.unary(not, [[INTERM7]]) : !u64i, !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr @uc, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw nand ptr @ull, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[INTERM7:%.*]] = and i64 [[RES7]], [[CONV7]]
+  // LLVM:  [[RET7:%.*]] = xor i64 [[INTERM7]], -1
+  // LLVM:  store i64 [[RET7]], ptr @ull, align 8
+  ull = __sync_nand_and_fetch(&ull, uc);
+}
+
+// CHECK-LABEL: @_Z12test_and_setPvPVv
+// LLVM-LABEL: @_Z12test_and_setPvPVv
+void test_and_set(void *p, volatile void *vp) {
+  bool x = __atomic_test_and_set(p, __ATOMIC_SEQ_CST);
+  // CHECK:      %[[VOID_PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+  // CHECK-NEXT: %[[PTR:.+]] = cir.cast bitcast %[[VOID_PTR]] : !cir.ptr<!void> -> !cir.ptr<!s8i>
+  // CHECK:      %{{.+}} = cir.atomic.test_and_set seq_cst %[[PTR]] : !cir.ptr<!s8i> -> !cir.bool
+
+  // LLVM:      %[[PTR:.+]] = load ptr, ptr %{{.+}}, align 8
+  // LLVM-NEXT: %[[RES:.+]] = atomicrmw xchg ptr %[[PTR]], i8 1 seq_cst, align 1
+  // LLVM-NEXT: %{{.+}} = icmp ne i8 1, %[[RES]]
+
+  bool y = __atomic_test_and_set(vp, __ATOMIC_SEQ_CST);
+  // CHECK:      %[[VOID_PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+  // CHECK-NEXT: %[[PTR:.+]] = cir.cast bitcast %[[VOID_PTR]] : !cir.ptr<!void> -> !cir.ptr<!s8i>
+  // CHECK:      %{{.+}} = cir.atomic.test_and_set seq_cst %[[PTR]] volatile : !cir.ptr<!s8i> -> !cir.bool
+
+  // LLVM:      %[[PTR:.+]] = load ptr, ptr %{{.+}}, align 8
+  // LLVM-NEXT: %[[RES:.+]] = atomicrmw volatile xchg ptr %[[PTR]], i8 1 seq_cst, align 1
+  // LLVM-NEXT: %{{.+}} = icmp ne i8 1, %[[RES]]
+}
+
+// CHECK-LABEL: @_Z5clearPvPVv
+// LLVM-LABEL: @_Z5clearPvPVv
+void clear(void *p, volatile void *vp) {
+  __atomic_clear(p, __ATOMIC_SEQ_CST);
+  // CHECK:      %[[VOID_PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+  // CHECK-NEXT: %[[PTR:.+]] = cir.cast bitcast %[[VOID_PTR]] : !cir.ptr<!void> -> !cir.ptr<!s8i>
+  // CHECK:      cir.atomic.clear seq_cst %[[PTR]] : !cir.ptr<!s8i>
+
+  // LLVM: store atomic i8 0, ptr %{{.+}} seq_cst, align 1
+
+  __atomic_clear(vp, __ATOMIC_SEQ_CST);
+  // CHECK:      %[[VOID_PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+  // CHECK-NEXT: %[[PTR:.+]] = cir.cast bitcast %[[VOID_PTR]] : !cir.ptr<!void> -> !cir.ptr<!s8i>
+  // CHECK:      cir.atomic.clear seq_cst %[[PTR]] volatile : !cir.ptr<!s8i>
+
+  // LLVM: store atomic volatile i8 0, ptr %{{.+}} seq_cst, align 1
+}
+
+// CHECK-LABEL: @_Z17lock_test_and_setPii
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!s32i>, {{.*}} : !s32i, seq_cst) fetch_first : !s32i
+
+// LLVM-LABEL: @_Z17lock_test_and_setPii
+// LLVM: atomicrmw xchg ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+void lock_test_and_set(int* a, int b) {
+  int c = __sync_lock_test_and_set(a, b);
+}
+
+
+// CHECK-LABEL: @_Z17lock_test_and_setPll
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!s64i>, {{.*}} : !s64i, seq_cst) fetch_first : !s64i
+
+// LLVM-LABEL: @_Z17lock_test_and_setPll
+// LLVM: atomicrmw xchg ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+void lock_test_and_set(long* a, long b) {
+  long c = __sync_lock_test_and_set(a, b);
+}
+
+// CHECK-LABEL: @_Z17lock_test_and_setPss
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!s16i>, {{.*}} : !s16i, seq_cst) fetch_first : !s16i
+
+// LLVM-LABEL: @_Z17lock_test_and_setPss
+// LLVM: atomicrmw xchg ptr {{.*}}, i16 {{.*}} seq_cst, align 2
+void lock_test_and_set(short* a, short b) {
+  short c = __sync_lock_test_and_set(a, 2);
+}
+
+
+// CHECK-LABEL: @_Z17lock_test_and_setPcc
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!s8i>, {{.*}} : !s8i, seq_cst) fetch_first : !s8i
+
+// LLVM-LABEL: @_Z17lock_test_and_setPcc
+// LLVM: atomicrmw xchg ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void lock_test_and_set(char* a, char b) {
+  char c = __sync_lock_test_and_set(a, b);
+}
+
+// CHECK-LABEL: @_Z17lock_test_and_setPji
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, seq_cst) fetch_first : !u32i
+
+// LLVM-LABEL: @_Z17lock_test_and_setPji
+// LLVM: atomicrmw xchg ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+void lock_test_and_set(unsigned int* a, int b) {
+  unsigned int c = __sync_lock_test_and_set(a, b);
+}
+
+
+// CHECK-LABEL: @_Z17lock_test_and_setPml
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!u64i>, {{.*}} : !u64i, seq_cst) fetch_first : !u64i
+
+// LLVM-LABEL: @_Z17lock_test_and_setPml
+// LLVM: atomicrmw xchg ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+void lock_test_and_set(unsigned long* a, long b) {
+  unsigned long c = __sync_lock_test_and_set(a, b);
+}
+
+// CHECK-LABEL: @_Z17lock_test_and_setPts
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!u16i>, {{.*}} : !u16i, seq_cst) fetch_first : !u16i
+//
+// LLVM-LABEL: @_Z17lock_test_and_setPts
+// LLVM: atomicrmw xchg ptr {{.*}}, i16 {{.*}} seq_cst, align 2
+void lock_test_and_set(unsigned short* a, short b) {
+  unsigned long long c = __sync_lock_test_and_set(a, b);
+}
+
+// CHECK-LABEL: @_Z17lock_test_and_setPhc
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!u8i>, {{.*}} : !u8i, seq_cst) fetch_first : !u8i
+
+// LLVM-LABEL: @_Z17lock_test_and_setPhc
+// LLVM: atomicrmw xchg ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void lock_test_and_set(unsigned char* a, char b) {
+  unsigned char c = __sync_lock_test_and_set(a, b);
+}
+
+// CHECK-LABEL: @_Z4swapPii
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!s32i>, {{.*}} : !s32i, seq_cst) fetch_first : !s32i
+
+// LLVM-LABEL: @_Z4swapPii
+// LLVM: atomicrmw xchg ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+
+// OGCG-LABEL: @_Z4swapPii
+// OGCG: atomicrmw xchg ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+void swap(int* a, int b) {
+  int c = __sync_swap(a, b);
+}
+
+// CHECK-LABEL: @_Z4swapPll
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!s64i>, {{.*}} : !s64i, seq_cst) fetch_first : !s64i
+
+// LLVM-LABEL: @_Z4swapPll
+// LLVM: atomicrmw xchg ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+
+// OGCG-LABEL: @_Z4swapPll
+// OGCG: atomicrmw xchg ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+void swap(long* a, long b) {
+  long c = __sync_swap(a, b);
+}
+
+// CHECK-LABEL: @_Z4swapPss
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!s16i>, {{.*}} : !s16i, seq_cst) fetch_first : !s16i
+
+// LLVM-LABEL: @_Z4swapPss
+// LLVM: atomicrmw xchg ptr {{.*}}, i16 {{.*}} seq_cst, align 2
+
+// OGCG-LABEL: @_Z4swapPss
+// OGCG: atomicrmw xchg ptr {{.*}}, i16 {{.*}} seq_cst, align 2
+void swap(short* a, short b) {
+  short c = __sync_swap(a, 2);
+}
+
+// CHECK-LABEL: @_Z4swapPcc
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!s8i>, {{.*}} : !s8i, seq_cst) fetch_first : !s8i
+
+// LLVM-LABEL: @_Z4swapPcc
+// LLVM: atomicrmw xchg ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+
+// OGCG-LABEL: @_Z4swapPcc
+// OGCG: atomicrmw xchg ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void swap(char* a, char b) {
+  char c = __sync_swap(a, b);
+}
+
+// CHECK-LABEL: @_Z4swapPji
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!u32i>, {{.*}} : !u32i, seq_cst) fetch_first : !u32i
+
+// LLVM-LABEL: @_Z4swapPji
+// LLVM: atomicrmw xchg ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+
+// OGCG-LABEL: @_Z4swapPji
+// OGCG: atomicrmw xchg ptr {{.*}}, i32 {{.*}} seq_cst, align 4
+void swap(unsigned int* a, int b) {
+  unsigned int c = __sync_swap(a, b);
+}
+
+// CHECK-LABEL: @_Z4swapPml
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!u64i>, {{.*}} : !u64i, seq_cst) fetch_first : !u64i
+
+// LLVM-LABEL: @_Z4swapPml
+// LLVM: atomicrmw xchg ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+
+// OGCG-LABEL: @_Z4swapPml
+// OGCG: atomicrmw xchg ptr {{.*}}, i64 {{.*}} seq_cst, align 8
+void swap(unsigned long* a, long b) {
+  unsigned long c = __sync_swap(a, b);
+}
+
+// CHECK-LABEL: @_Z4swapPts
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!u16i>, {{.*}} : !u16i, seq_cst) fetch_first : !u16i
+//
+// LLVM-LABEL: @_Z4swapPts
+// LLVM: atomicrmw xchg ptr {{.*}}, i16 {{.*}} seq_cst, align 2
+
+// OGCG-LABEL: @_Z4swapPts
+// OGCG: atomicrmw xchg ptr {{.*}}, i16 {{.*}} seq_cst, align 2
+void swap(unsigned short* a, short b) {
+  unsigned long long c = __sync_swap(a, b);
+}
+
+// CHECK-LABEL: @_Z4swapPhc
+// CHECK: cir.atomic.fetch(xchg, {{.*}} : !cir.ptr<!u8i>, {{.*}} : !u8i, seq_cst) fetch_first : !u8i
+
+// LLVM-LABEL: @_Z4swapPhc
+// LLVM: atomicrmw xchg ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+
+// OGCG-LABEL: @_Z4swapPhc
+// OGCG: atomicrmw xchg ptr {{.*}}, i8 {{.*}} seq_cst, align 1
+void swap(unsigned char* a, char b) {
+  unsigned char c = __sync_swap(a, b);
+}
+
+// CHECK-LABEL: @_Z12lock_releasePi
+// CHECK: cir.store align(4) atomic(release) {{.*}}, {{.*}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @_Z12lock_releasePi
+// LLVM: store atomic i32 0, ptr {{.*}} release, align 4
+
+// OGCG-LABEL: @_Z12lock_releasePi
+// OGCG: store atomic i32 0, ptr {{.*}} release, align 4
+void lock_release(int* a) {
+  __sync_lock_release(a);
+}
+
+// CHECK-LABEL: @_Z12lock_releasePl
+// CHECK: cir.store align(8) atomic(release) {{.*}}, {{.*}} : !s64i, !cir.ptr<!s64i>
+
+// LLVM-LABEL: @_Z12lock_releasePl
+// LLVM: store atomic i64 0, ptr {{.*}} release, align 8
+
+// OGCG-LABEL: @_Z12lock_releasePl
+// OGCG: store atomic i64 0, ptr {{.*}} release, align 8
+void lock_release(long* a) {
+  __sync_lock_release(a);
+}
+
+// CHECK-LABEL: @_Z12lock_releasePs
+// CHECK: cir.store align(2) atomic(release) {{.*}}, {{.*}} : !s16i, !cir.ptr<!s16i>
+
+// LLVM-LABEL: @_Z12lock_releasePs
+// LLVM: store atomic i16 0, ptr {{.*}} release, align 2
+
+// OGCG-LABEL: @_Z12lock_releasePs
+// OGCG: store atomic i16 0, ptr {{.*}} release, align 2
+void lock_release(short* a) {
+  __sync_lock_release(a);
+}
+
+// CHECK-LABEL: @_Z12lock_releasePc
+// CHECK: cir.store align(1) atomic(release) {{.*}}, {{.*}} : !s8i, !cir.ptr<!s8i>
+
+// LLVM-LABEL: @_Z12lock_releasePc
+// LLVM: store atomic i8 0, ptr {{.*}} release, align 1
+
+// OGCG-LABEL: @_Z12lock_releasePc
+// OGCG: store atomic i8 0, ptr {{.*}} release, align 1
+void lock_release(char* a) {
+  __sync_lock_release(a);
+}
+
+// CHECK-LABEL: @_Z12lock_releasePj
+// CHECK: cir.store align(4) atomic(release) {{.*}}, {{.*}} : !u32i, !cir.ptr<!u32i>
+
+// LLVM-LABEL: @_Z12lock_releasePj
+// LLVM: store atomic i32 0, ptr {{.*}} release, align 4
+
+// OGCG-LABEL: @_Z12lock_releasePj
+// OGCG: store atomic i32 0, ptr {{.*}} release, align 4
+void lock_release(unsigned int* a) {
+  __sync_lock_release(a);
+}
+
+// CHECK-LABEL: @_Z12lock_releasePm
+// CHECK: cir.store align(8) atomic(release) {{.*}}, {{.*}} : !u64i, !cir.ptr<!u64i>
+
+// LLVM-LABEL: @_Z12lock_releasePm
+// LLVM: store atomic i64 0, ptr {{.*}} release, align 8
+
+// OGCG-LABEL: @_Z12lock_releasePm
+// OGCG: store atomic i64 0, ptr {{.*}} release, align 8
+void lock_release(unsigned long* a) {
+  __sync_lock_release(a);
+}
+
+// CHECK-LABEL: @_Z12lock_releasePt
+// CHECK: cir.store align(2) atomic(release) {{.*}}, {{.*}} : !u16i, !cir.ptr<!u16i>
+
+// LLVM-LABEL: @_Z12lock_releasePt
+// LLVM: store atomic i16 0, ptr {{.*}} release, align 2
+
+// OGCG-LABEL: @_Z12lock_releasePt
+// OGCG: store atomic i16 0, ptr {{.*}} release, align 2
+void lock_release(unsigned short* a) {
+  __sync_lock_release(a);
+}
+
+// CHECK-LABEL: @_Z12lock_releasePh
+// CHECK: cir.store align(1) atomic(release) {{.*}}, {{.*}} : !u8i, !cir.ptr<!u8i>
+
+// LLVM-LABEL: @_Z12lock_releasePh
+// LLVM: store atomic i8 0, ptr {{.*}} release, align 1
+
+// OGCG-LABEL: @_Z12lock_releasePh
+// OGCG: store atomic i8 0, ptr {{.*}} release, align 1
+void lock_release(unsigned char* a) {
+  __sync_lock_release(a);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/attribute-annotate-multiple.cpp b/clang/test/CIR/Incubator/CodeGen/attribute-annotate-multiple.cpp
new file mode 100644
index 0000000000000..b45ba0ce6d4a9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/attribute-annotate-multiple.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+double *a __attribute__((annotate("withargs", "21", 12 )));
+int *b __attribute__((annotate("withargs", "21", 12 )));
+void *c __attribute__((annotate("noargvar")));
+
+enum : char { npu1 = 42};
+int tile __attribute__((annotate("cir.aie.device.tile", npu1))) = 7;
+
+void foo(int i) __attribute__((annotate("noargfunc"))) {
+}
+// redeclare with more annotate
+void foo(int i) __attribute__((annotate("withargfunc", "os", 23 )));
+void bar() __attribute__((annotate("withargfunc", "os", 22))) {
+}
+
+// BEFORE: module @{{.*}}attribute-annotate-multiple.cpp" attributes {{{.*}}cir.lang =
+
+// BEFORE: cir.global external @a = #cir.ptr<null> : !cir.ptr<!cir.double>
+// BEFORE-SAME: [#cir.annotation<name = "withargs", args = ["21", 12 : i32]>]
+// BEFORE: cir.global external @b = #cir.ptr<null> : !cir.ptr<!s32i>
+// BEFORE-SAME: [#cir.annotation<name = "withargs", args = ["21", 12 : i32]>]
+// BEFORE: cir.global external @c = #cir.ptr<null> : !cir.ptr<!void>
+// BEFORE-SAME: [#cir.annotation<name = "noargvar", args = []>]
+// BEFORE: cir.global external @tile = #cir.int<7> : !s32i
+// BEFORE-SAME: #cir.annotation<name = "cir.aie.device.tile", args = [42 : i8]>]
+
+// BEFORE: cir.func {{.*}} @_Z3fooi(%arg0: !s32i) [#cir.annotation<name = "noargfunc", args = []>,
+// BEFORE-SAME: #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>]
+// BEFORE: cir.func {{.*}} @_Z3barv() [#cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>]
+
+
+// AFTER: module {{.*}}attribute-annotate-multiple.cpp" attributes
+// AFTER-SAME: {cir.global_annotations = #cir<global_annotations [
+// AFTER-SAME: ["a", #cir.annotation<name = "withargs", args = ["21", 12 : i32]>],
+// AFTER-SAME: ["b", #cir.annotation<name = "withargs", args = ["21", 12 : i32]>],
+// AFTER-SAME: ["c", #cir.annotation<name = "noargvar", args = []>],
+// AFTER-SAME: ["tile", #cir.annotation<name = "cir.aie.device.tile", args = [42 : i8]>],
+// AFTER-SAME: ["_Z3fooi", #cir.annotation<name = "noargfunc", args = []>],
+// AFTER-SAME: ["_Z3fooi", #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>],
+// AFTER-SAME: ["_Z3barv", #cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>]]>,
+
+
+// LLVM: @a = global ptr null
+// LLVM: @b = global ptr null
+// LLVM: @c = global ptr null
+// LLVM: @tile = global i32 7
+// LLVM: @.str.annotation = private unnamed_addr constant [9 x i8] c"withargs\00", section "llvm.metadata"
+// LLVM: @.str.1.annotation = private unnamed_addr constant [{{[0-9]+}} x i8] c"{{.*}}attribute-annotate-multiple.cpp\00", section "llvm.metadata"
+// LLVM: @.str.annotation.arg = private unnamed_addr constant [3 x i8] c"21\00", align 1
+// LLVM: @.args.annotation = private unnamed_addr constant { ptr, i32 } { ptr @.str.annotation.arg, i32 12 }, section "llvm.metadata"
+// LLVM: @.str.2.annotation = private unnamed_addr constant [9 x i8] c"noargvar\00", section "llvm.metadata"
+// LLVM: @.str.3.annotation = private unnamed_addr constant [20 x i8] c"cir.aie.device.tile\00", section "llvm.metadata"
+// LLVM: @.args.1.annotation = private unnamed_addr constant { i8 } { i8 42 }, section "llvm.metadata"
+// LLVM: @.str.4.annotation = private unnamed_addr constant [10 x i8] c"noargfunc\00", section "llvm.metadata"
+// LLVM: @.str.5.annotation = private unnamed_addr constant [12 x i8] c"withargfunc\00", section "llvm.metadata"
+// LLVM: @.str.1.annotation.arg = private unnamed_addr constant [3 x i8] c"os\00", align 1
+// LLVM: @.args.2.annotation = private unnamed_addr constant { ptr, i32 }
+// LLVM-SAME: { ptr @.str.1.annotation.arg, i32 23 }, section "llvm.metadata"
+// LLVM: @.args.3.annotation = private unnamed_addr constant { ptr, i32 }
+// LLVM-SAME: { ptr @.str.1.annotation.arg, i32 22 }, section "llvm.metadata"
+
+// LLVM: @llvm.global.annotations = appending global [7 x { ptr, ptr, ptr, i32, ptr }]
+// LLVM-SAME: [{ ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @a, ptr @.str.annotation, ptr @.str.1.annotation, i32 5, ptr @.args.annotation },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @b, ptr @.str.annotation, ptr @.str.1.annotation, i32 6, ptr @.args.annotation },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @c, ptr @.str.2.annotation, ptr @.str.1.annotation, i32 7, ptr null },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @tile, ptr @.str.3.annotation, ptr @.str.1.annotation, i32 10, ptr @.args.1.annotation },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @_Z3fooi, ptr @.str.4.annotation, ptr @.str.1.annotation, i32 12, ptr null },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @_Z3fooi, ptr @.str.5.annotation, ptr @.str.1.annotation, i32 12, ptr @.args.2.annotation },
+// LLVM-SAME: { ptr, ptr, ptr, i32, ptr }
+// LLVM-SAME: { ptr @_Z3barv, ptr @.str.5.annotation, ptr @.str.1.annotation, i32 16, ptr @.args.3.annotation }],
+// LLVM-SAME: section "llvm.metadata"
+
+// LLVM: define dso_local void @_Z3fooi(i32 %0)
+// LLVM: define dso_local void @_Z3barv()
diff --git a/clang/test/CIR/Incubator/CodeGen/attributes.c b/clang/test/CIR/Incubator/CodeGen/attributes.c
new file mode 100644
index 0000000000000..67bb467a8c5d1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/attributes.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o - | FileCheck %s -check-prefix=LLVM
+
+extern int __attribute__((section(".shared"))) ext;
+int getExt() {
+  return ext;
+}
+// CIR:   cir.global "private" external @ext : !s32i {alignment = 4 : i64, section = ".shared"}
+// LLVM:  @ext = external global i32, section ".shared"
+
+int __attribute__((section(".shared"))) glob = 42;
+// CIR:   cir.global external @glob = #cir.int<42> : !s32i {alignment = 4 : i64, section = ".shared"}
+// LLVM:   @glob = global i32 42, section ".shared"
+
+
+void __attribute__((__visibility__("hidden"))) foo();
+// CIR: cir.func no_proto private hidden @foo(...)
+int bah()
+{
+  foo();
+  return 1;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/basic.c b/clang/test/CIR/Incubator/CodeGen/basic.c
new file mode 100644
index 0000000000000..55755b0e33ca8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/basic.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+int foo(int i);
+
+int foo(int i) {
+  i;
+  return i;
+}
+
+//      CIR: module @"{{.*}}basic.c" attributes {{{.*}}cir.lang = #cir.lang<c>
+// CIR-NEXT: cir.func {{.*}} @foo(%arg0: !s32i loc({{.*}})) -> !s32i
+// CIR-NEXT: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CIR-NEXT: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR-NEXT: cir.store{{.*}} %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %2 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: %3 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.store{{.*}} %3, %1 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %4 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.return %4 : !s32i
+
+int f2(void) { return 3; }
+
+// CIR: cir.func {{.*}} @f2() -> !s32i
+// CIR-NEXT: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR-NEXT: %1 = cir.const #cir.int<3> : !s32i
+// CIR-NEXT: cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %2 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.return %2 : !s32i
+
+// LLVM: define dso_local i32 @f2()
+// LLVM-NEXT:  %1 = alloca i32, i64 1, align 4
+// LLVM-NEXT:  store i32 3, ptr %1, align 4
+// LLVM-NEXT:  %2 = load i32, ptr %1, align 4
+// LLVM-NEXT:  ret i32 %2
+
+
+
+int f3(void) {
+  int i = 3;
+  return i;
+}
+
+// CIR: cir.func {{.*}} @f3() -> !s32i
+// CIR-NEXT: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR-NEXT: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CIR-NEXT: %2 = cir.const #cir.int<3> : !s32i
+// CIR-NEXT: cir.store{{.*}} %2, %1 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %3 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.store{{.*}} %3, %0 : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT: %4 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT: cir.return %4 : !s32i
diff --git a/clang/test/CIR/Incubator/CodeGen/basic.cpp b/clang/test/CIR/Incubator/CodeGen/basic.cpp
new file mode 100644
index 0000000000000..b577cc35e4695
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/basic.cpp
@@ -0,0 +1,182 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int *p0() {
+  int *p = nullptr;
+  return p;
+}
+
+// CHECK: cir.func {{.*}} @_Z2p0v() -> !cir.ptr<!s32i>
+// CHECK: %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p", init]
+// CHECK: %2 = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK: cir.store{{.*}} %2, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+int *p1() {
+  int *p;
+  p = nullptr;
+  return p;
+}
+
+// CHECK: cir.func {{.*}} @_Z2p1v() -> !cir.ptr<!s32i>
+// CHECK: %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p"]
+// CHECK: %2 = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK: cir.store{{.*}} %2, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+int *p2() {
+  int *p = nullptr;
+  {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }
+  *p = 42;
+  return p;
+}
+
+// CHECK: cir.func {{.*}} @_Z2p2v() -> !cir.ptr<!s32i>
+// CHECK-NEXT:  %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["__retval"] {alignment = 8 : i64}
+// CHECK-NEXT:  %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p", init] {alignment = 8 : i64}
+// CHECK-NEXT:  %2 = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.store{{.*}} %2, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT:  cir.scope {
+// CHECK-NEXT:    %7 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK-NEXT:    %8 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:    cir.store{{.*}} %8, %7 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:    cir.store{{.*}} %7, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT:    %9 = cir.const #cir.int<42> : !s32i
+// CHECK-NEXT:    %10 = cir.load deref{{.*}}  %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:    cir.store{{.*}} %9, %10 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:  } loc(#[[locScope:loc[0-9]+]])
+// CHECK-NEXT:  %3 = cir.const #cir.int<42> : !s32i
+// CHECK-NEXT:  %4 = cir.load deref{{.*}}  %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.store{{.*}} %3, %4 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:  %5 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.store{{.*}} %5, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT:  %6 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK-NEXT:  cir.return %6 : !cir.ptr<!s32i>
+
+void b0() { bool x = true, y = false; }
+
+// CHECK: cir.func {{.*}} @_Z2b0v()
+// CHECK: %2 = cir.const #true
+// CHECK: %3 = cir.const #false
+
+void b1(int a) { bool b = a; }
+
+// CHECK: cir.func {{.*}} @_Z2b1i(%arg0: !s32i loc({{.*}}))
+// CHECK: %2 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK: %3 = cir.cast int_to_bool %2 : !s32i -> !cir.bool
+// CHECK: cir.store{{.*}} %3, %1 : !cir.bool, !cir.ptr<!cir.bool>
+
+void if0(int a) {
+  int x = 0;
+  if (a) {
+    x = 3;
+  } else {
+    x = 4;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z3if0i(%arg0: !s32i loc({{.*}}))
+// CHECK: cir.scope {
+// CHECK:   %3 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:   %4 = cir.cast int_to_bool %3 : !s32i -> !cir.bool
+// CHECK-NEXT:   cir.if %4 {
+// CHECK-NEXT:     %5 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:     cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   } else {
+// CHECK-NEXT:     %5 = cir.const #cir.int<4> : !s32i
+// CHECK-NEXT:     cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   }
+// CHECK: }
+
+void if1(int a, bool b, bool c) {
+  int x = 0;
+  if (a) {
+    x = 3;
+    if (b) {
+      x = 8;
+    }
+  } else {
+    if (c) {
+      x = 14;
+    }
+    x = 4;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z3if1ibb(%arg0: !s32i loc({{.*}}), %arg1: !cir.bool loc({{.*}}), %arg2: !cir.bool loc({{.*}}))
+// CHECK: cir.scope {
+// CHECK:   %5 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:   %6 = cir.cast int_to_bool %5 : !s32i -> !cir.bool
+// CHECK:   cir.if %6 {
+// CHECK:     %7 = cir.const #cir.int<3> : !s32i
+// CHECK:     cir.store{{.*}} %7, %3 : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.scope {
+// CHECK:       %8 = cir.load{{.*}} %1 : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT:       cir.if %8 {
+// CHECK-NEXT:         %9 = cir.const #cir.int<8> : !s32i
+// CHECK-NEXT:         cir.store{{.*}} %9, %3 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       }
+// CHECK:     }
+// CHECK:   } else {
+// CHECK:     cir.scope {
+// CHECK:       %8 = cir.load{{.*}} %2 : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT:       cir.if %8 {
+// CHECK-NEXT:         %9 = cir.const #cir.int<14> : !s32i
+// CHECK-NEXT:         cir.store{{.*}} %9, %3 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       }
+// CHECK:     }
+// CHECK:     %7 = cir.const #cir.int<4> : !s32i
+// CHECK:     cir.store{{.*}} %7, %3 : !s32i, !cir.ptr<!s32i>
+// CHECK:   }
+// CHECK: }
+
+enum {
+  um = 0,
+  dois = 1,
+}; // Do not crash!
+
+extern "C" {
+struct regs {
+  unsigned long sp;
+  unsigned long pc;
+};
+
+// Check it's not mangled.
+// CHECK: cir.func {{.*}} @use_regs()
+
+void use_regs() { regs r; }
+}
+
+void x() {
+  const bool b0 = true;
+  const bool b1 = false;
+}
+
+// CHECK: cir.func {{.*}} @_Z1xv()
+// CHECK:   %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b0", init, const] {alignment = 1 : i64}
+// CHECK:   %1 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b1", init, const] {alignment = 1 : i64}
+// CHECK:   %2 = cir.const #true
+// CHECK:   cir.store{{.*}} %2, %0 : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:   %3 = cir.const #false
+// CHECK:   cir.store{{.*}} %3, %1 : !cir.bool, !cir.ptr<!cir.bool>
+
+typedef unsigned long size_type;
+typedef unsigned long _Tp;
+
+size_type max_size() {
+  return size_type(~0) / sizeof(_Tp);
+}
+
+// CHECK: cir.func {{.*}} @_Z8max_sizev()
+// CHECK:   %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"] {alignment = 8 : i64}
+// CHECK:   %1 = cir.const #cir.int<0> : !s32i
+// CHECK:   %2 = cir.unary(not, %1) : !s32i, !s32i
+// CHECK:   %3 = cir.cast integral %2 : !s32i -> !u64i
+// CHECK:   %4 = cir.const #cir.int<8> : !u64i
+// CHECK:   %5 = cir.binop(div, %3, %4) : !u64i
+
+// CHECK-DAG: #[[locScope]] = loc(fused[#[[locScopeA:loc[0-9]+]], #[[locScopeB:loc[0-9]+]]])
+// CHECK-DAG: #[[locScopeA]] = loc("{{.*}}basic.cpp":27:3)
+// CHECK-DAG: #[[locScopeB]] = loc("{{.*}}basic.cpp":31:3)
diff --git a/clang/test/CIR/Incubator/CodeGen/bf16-ops.c b/clang/test/CIR/Incubator/CodeGen/bf16-ops.c
new file mode 100644
index 0000000000000..7aa53752d9dd2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/bf16-ops.c
@@ -0,0 +1,1639 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefix=NONATIVE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16 -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefix=NATIVE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefix=NONATIVE-LLVM %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16 -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefix=NATIVE-LLVM %s
+
+volatile unsigned test;
+volatile int i0;
+volatile __bf16 h0 = 0.0, h1 = 1.0, h2;
+volatile float f0, f1, f2;
+volatile double d0;
+short s0;
+
+void foo(void) {
+  test = (h0);
+  // NONATIVE: %{{.+}} = cir.cast float_to_int %{{.+}} : !cir.bf16 -> !u32i
+  // NATIVE: %{{.+}} = cir.cast float_to_int %{{.+}} : !cir.bf16 -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fptoui bfloat %{{.+}} to i32
+  // NATIVE-LLVM: %{{.+}} = fptoui bfloat %{{.+}} to i32
+
+  h0 = (test);
+  // NONATIVE: %{{.+}} = cir.cast int_to_float %{{.+}} : !u32i -> !cir.bf16
+  // NATIVE: %{{.+}} = cir.cast int_to_float %{{.+}} : !u32i -> !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = uitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = uitofp i32 %{{.+}} to bfloat
+
+  test = (!h1);
+  //      NONATIVE: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.bf16 -> !cir.bool
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(not, %[[#A]]) : !cir.bool, !cir.bool
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.bf16 -> !cir.bool
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(not, %[[#A]]) : !cir.bool, !cir.bool
+  // NATIVE-NEXT: %[[#C:]] = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = fcmp une bfloat %{{.+}}, 0xR0000
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = xor i1 %[[#A]], true
+  // NONATIVE-LLVM-NEXT: %{{.+}} = zext i1 %[[#C]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = fcmp une bfloat %{{.+}}, 0xR0000
+  // NATIVE-LLVM-NEXT: %[[#C:]] = xor i1 %[[#A]], true
+  // NATIVE-LLVM-NEXT: %{{.+}} = zext i1 %[[#C]] to i32
+
+  h1 = -h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //  NATIVE-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //  NATIVE-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.float -> !cir.bf16
+  //      NATIVE: %{{.+}} = cir.unary(minus, %{{.+}}) : !cir.bf16, !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fneg float %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fneg bfloat %{{.+}}
+
+  h1 = +h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(plus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //  NATIVE-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //  NATIVE-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.float -> !cir.bf16
+  //      NATIVE: %{{.+}} = cir.unary(plus, %{{.+}}) : !cir.bf16, !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h1, align 2
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#A]], ptr @h1, align 2
+
+  h1++;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.bf16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  ++h1;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.bf16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  --h1;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.bf16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xRBF80
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xRBF80
+
+  h1--;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.bf16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  // NONATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xRBF80
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xRBF80
+
+  h1 = h0 * h2;
+  //      NONATIVE: %[[#LHS:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#RHS:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHS]], %[[#RHS]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#A]] : !cir.float -> !cir.bf16
+
+  // NATIVE: %{{.+}} = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, %{{.+}}
+
+  h1 = h0 * (__bf16) -2.0f;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.unary(minus, %[[#B]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(mul, %[[#A]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#F]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#C]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], -2.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, 0xRC000
+
+  h1 = h0 * f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = f0 * h2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = h0 * i0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext bfloat %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fmul bfloat %{{.+}}, %[[#A]]
+
+  h1 = (h0 / h2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+
+  // NATIVE: %{{.+}} = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, %{{.+}}
+
+  h1 = (h0 / (__bf16) -2.0f);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.unary(minus, %[[#B]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(div, %[[#A]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#F]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#C]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], -2.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, 0xRC000
+
+  h1 = (h0 / f2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (f0 / h2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (h0 / i0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(div, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext bfloat %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fdiv bfloat %{{.+}}, %[[#A]]
+
+  h1 = (h2 + h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+
+  // NATIVE: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, %{{.+}}
+
+  h1 = ((__bf16)-2.0 + h0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#E:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(add, %[[#D]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#F]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.bf16
+  //      NATIVE: %{{.+}} = cir.binop(add, %[[#C]], %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float -2.000000e+00, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat 0xRC000, %{{.+}}
+
+  h1 = (h2 + f0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = load volatile float, ptr @f0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RHS:]] = load volatile float, ptr @f0, align 4
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (f2 + h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (h0 + i0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(add, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS_INT:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#RHS_INT]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fadd bfloat %{{.+}}, %[[#A]]
+
+  h1 = (h2 - h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+
+  // NATIVE: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat %{{.+}}, %{{.+}}
+
+  h1 = ((__bf16)-2.0f - h0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#E:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(sub, %[[#D]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#F]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  //      NATIVE: %{{.+}} = cir.binop(sub, %[[#C]], %{{.+}}) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float -2.000000e+00, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat 0xRC000, %{{.+}}
+
+  h1 = (h2 - f0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = load volatile float, ptr @f0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RHS:]] = load volatile float, ptr @f0, align 4
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (f2 - h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  h1 = (h0 - i0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.bf16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS_INT:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#RHS_INT]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fsub bfloat %{{.+}}, %[[#A]]
+
+  test = (h2 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, %{{.+}}
+
+  test = (h2 < (__bf16)42.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#C]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#C]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, 0xR4228
+
+  // NATIVE-LLVM: %{{.+}} = fcmp olt bfloat %{{.+}}, 0xR4228
+
+  test = (h2 < f0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp olt float %[[#LHS]], %{{.+}}
+
+  test = (f2 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#RHS]]
+
+  test = (i0 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp olt bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 < i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %{{.+}}, %{{.+}}
+
+  test = ((__bf16)42.0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  //      NONATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#C]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#C]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt bfloat 0xR4228, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt bfloat 0xR4228, %{{.+}}
+
+  test = (h0 > f2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
+
+  test = (f0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
+
+  // NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
+
+  test = (i0 > h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 > i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h2 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, %{{.+}}
+
+  test = (h2 <= (__bf16)42.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#C]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#C]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, 0xR4228
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ole bfloat %{{.+}}, 0xR4228
+
+  test = (h2 <= f0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
+
+  test = (f2 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
+
+  test = (i0 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp ole bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 <= i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h0 >= h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+  // NONATIVE-NEXT: %{{.+}} = cir.get_global @test : !cir.ptr<!u32i>
+
+  //      NATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, %{{.+}}
+
+  test = (h0 >= (__bf16)-2.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#D]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#D]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, 0xRC000
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oge bfloat %{{.+}}, 0xRC000
+
+  test = (h0 >= f2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
+
+  test = (f0 >= h2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
+
+  // NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
+
+  test = (i0 >= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp oge bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 >= i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oge bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oge bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h1 == h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, %{{.+}}
+
+  test = (h1 == (__bf16)1.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#C]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#C]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, 0xR3F80
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %{{.+}}, 0xR3F80
+
+  test = (h1 == f1);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq float %[[#A]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq float %[[#A]], %{{.+}}
+
+  test = (f1 == h1);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
+
+  test = (i0 == h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 == i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq bfloat %{{.+}}, %[[#RHS]]
+
+  test = (h1 != h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#A]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, %{{.+}}
+
+  test = (h1 != (__bf16)1.0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#C]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, 0xR3F80
+
+  // NATIVE-LLVM: %{{.+}} = fcmp une bfloat %{{.+}}, 0xR3F80
+
+  test = (h1 != f1);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
+
+  test = (f1 != h1);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#RHS]]
+
+  test = (i0 != h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM: %{{.+}} = fcmp une bfloat %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fcmp une bfloat %[[#LHS]], %{{.+}}
+
+  test = (h0 != i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.bf16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une bfloat %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp une bfloat %{{.+}}, %[[#RHS]]
+
+  h1 = (h1 ? h2 : h0);
+  //      NONATIVE: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.bf16 -> !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.ternary(%[[#A]], true {
+  //      NONATIVE:   cir.yield %{{.+}} : !cir.bf16
+  // NONATIVE-NEXT: }, false {
+  //      NONATIVE:   cir.yield %{{.+}} : !cir.bf16
+  // NONATIVE-NEXT: }) : (!cir.bool) -> !cir.bf16
+  //      NONATIVE: %{{.+}} = cir.get_global @h1 : !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.bf16 -> !cir.bool
+  // NATIVE-NEXT: %[[#B:]] = cir.ternary(%[[#A]], true {
+  //      NATIVE:   cir.yield %{{.+}} : !cir.bf16
+  // NATIVE-NEXT: }, false {
+  //      NATIVE:   cir.yield %{{.+}} : !cir.bf16
+  // NATIVE-NEXT: }) : (!cir.bool) -> !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.get_global @h1 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#B]], %[[#C]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM:   %[[#A:]] = fcmp une bfloat %{{.+}}, 0xR0000
+  // NONATIVE-LLVM-NEXT:   br i1 %[[#A]], label %[[#LABEL_A:]], label %[[#LABEL_B:]]
+  //      NONATIVE-LLVM: [[#LABEL_A]]:
+  // NONATIVE-LLVM-NEXT:   %[[#B:]] = load volatile bfloat, ptr @h2, align 2
+  // NONATIVE-LLVM-NEXT:   br label %[[#LABEL_C:]]
+  //      NONATIVE-LLVM: [[#LABEL_B]]:
+  // NONATIVE-LLVM-NEXT:   %[[#C:]] = load volatile bfloat, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT:   br label %[[#LABEL_C]]
+  //      NONATIVE-LLVM: [[#LABEL_C]]:
+  // NONATIVE-LLVM-NEXT:   %{{.+}} = phi bfloat [ %[[#C]], %[[#LABEL_B]] ], [ %[[#B]], %[[#LABEL_A]] ]
+
+  //      NATIVE-LLVM:   %[[#A:]] = fcmp une bfloat %{{.+}}, 0xR0000
+  // NATIVE-LLVM-NEXT:   br i1 %[[#A]], label %[[#LABEL_A:]], label %[[#LABEL_B:]]
+  //      NATIVE-LLVM: [[#LABEL_A]]:
+  // NATIVE-LLVM-NEXT:   %[[#B:]] = load volatile bfloat, ptr @h2, align 2
+  // NATIVE-LLVM-NEXT:   br label %[[#LABEL_C:]]
+  //      NATIVE-LLVM: [[#LABEL_B]]:
+  // NATIVE-LLVM-NEXT:   %[[#C:]] = load volatile bfloat, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT:   br label %[[#LABEL_C]]
+  //      NATIVE-LLVM: [[#LABEL_C]]:
+  // NATIVE-LLVM-NEXT:   %{{.+}} = phi bfloat [ %[[#C]], %[[#LABEL_B]] ], [ %[[#B]], %[[#LABEL_A]] ]
+
+  h0 = h1;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h1 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#B]], %[[#C]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h1 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#B]], %[[#C]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h1, align 2
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#A]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h1, align 2
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#A]], ptr @h0, align 2
+
+  h0 = (__bf16)-2.0f;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  // NONATIVE-LLVM: store volatile bfloat 0xRC000, ptr @h0, align 2
+
+  // NATIVE-LLVM: store volatile bfloat 0xRC000, ptr @h0, align 2
+
+  h0 = f0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @f0 : !cir.ptr<!cir.float>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.float>, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @f0 : !cir.ptr<!cir.float>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.float>, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile float, ptr @f0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile float, ptr @f0, align 4
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  h0 = i0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile i32, ptr @i0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = sitofp i32 %[[#A]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile i32, ptr @i0, align 4
+  // NATIVE-LLVM-NEXT: %[[#B:]] = sitofp i32 %[[#A]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  i0 = h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.bf16 -> !s32i
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.bf16 -> !s32i
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptosi bfloat %[[#A]] to i32
+  // NONATIVE-LLVM-NEXT: store volatile i32 %[[#B]], ptr @i0, align 4
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptosi bfloat %[[#A]] to i32
+  // NATIVE-LLVM-NEXT: store volatile i32 %[[#B]], ptr @i0, align 4
+
+  h0 += h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#A]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, %{{.+}}
+
+  h0 += (__bf16)1.0f;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#D:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.binop(add, %[[#D]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.cast floating %[[#E]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#F]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.float -> !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.binop(add, %{{.+}}, %[[#B]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, 0xR3F80
+
+  h0 += f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float %[[#A]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fadd float %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  i0 += h0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast float_to_int %[[#C]] : !cir.float -> !s32i
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.bf16 -> !s32i
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %[[#A:]] = fadd bfloat %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi bfloat %[[#A]] to i32
+
+  h0 += i0;
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(add, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#B]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fadd bfloat %{{.+}}, %[[#RHS]]
+
+  h0 -= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#A]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat %{{.+}}, %{{.+}}
+
+  h0 -= (__bf16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.binop(sub, %{{.+}}, %[[#B]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat %{{.+}}, 0xR3F80
+
+  h0 -= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float %[[#A]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fsub float %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  i0 -= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast float_to_int %[[#C]] : !cir.float -> !s32i
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.bf16 -> !s32i
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %[[#A:]] = fsub bfloat %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi bfloat %[[#A]] to i32
+
+  h0 -= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#B]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fsub bfloat %{{.+}}, %[[#RHS]]
+
+  h0 *= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(mul, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#A]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, %{{.+}}
+
+  h0 *= (__bf16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.binop(mul, %{{.+}}, %[[#B]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, 0xR3F80
+
+  h0 *= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  i0 *= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(mul, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast float_to_int %[[#C]] : !cir.float -> !s32i
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.bf16 -> !s32i
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %[[#A:]] = fmul bfloat %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi bfloat %[[#A]] to i32
+
+  h0 *= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#B]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fmul bfloat %{{.+}}, %[[#RHS]]
+
+  h0 /= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#A]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#A]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, %{{.+}}
+
+  h0 /= (__bf16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#D:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.binop(div, %[[#D]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.cast floating %[[#E]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#F]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.bf16
+  //      NATIVE: %[[#C:]] = cir.binop(div, %{{.+}}, %[[#B]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, 0xR3F80
+
+  h0 /= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext bfloat %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to bfloat
+
+  i0 /= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast float_to_int %[[#C]] : !cir.float -> !s32i
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.bf16 -> !s32i
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext bfloat %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM-NEXT: %[[#A:]] = fdiv bfloat %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi bfloat %[[#A]] to i32
+
+  h0 /= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.bf16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(div, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#E]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.bf16
+  //      NATIVE: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.bf16
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#B]], %{{.+}} : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to bfloat
+  // NONATIVE-LLVM-NEXT: %[[#RHS:]] = fpext bfloat %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext bfloat %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to bfloat
+
+  // NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to bfloat
+  // NATIVE-LLVM: %{{.+}} = fdiv bfloat %{{.+}}, %[[#RHS]]
+
+  h0 = d0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  h0 = (float)d0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+  // NONATIVE-NEXT: %[[#E:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %[[#E]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.float
+  // NATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.bf16
+  // NATIVE-NEXT: %[[#E:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %[[#E]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to float
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = fptrunc float %[[#B]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#C]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to float
+  // NATIVE-LLVM-NEXT: %[[#C:]] = fptrunc float %[[#B]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#C]], ptr @h0, align 2
+
+  d0 = h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.double
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.double
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext bfloat %[[#A]] to double
+  // NONATIVE-LLVM-NEXT: store volatile double %[[#B]], ptr @d0, align 8
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fpext bfloat %[[#A]] to double
+  // NATIVE-LLVM-NEXT: store volatile double %[[#B]], ptr @d0, align 8
+
+  d0 = (float)h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.double
+  // NONATIVE-NEXT: %[[#E:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %[[#E]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile{{.*}}  %[[#A]] : !cir.ptr<!cir.bf16>, !cir.bf16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.bf16 -> !cir.float
+  // NATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.double
+  // NATIVE-NEXT: %[[#E:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#D]], %[[#E]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext bfloat %[[#A]] to float
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = fpext float %[[#B]] to double
+  // NONATIVE-LLVM-NEXT: store volatile double %[[#C]], ptr @d0, align 8
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile bfloat, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fpext bfloat %[[#A]] to float
+  // NATIVE-LLVM-NEXT: %[[#C:]] = fpext float %[[#B]] to double
+  // NATIVE-LLVM-NEXT: store volatile double %[[#C]], ptr @d0, align 8
+
+  h0 = s0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @s0 : !cir.ptr<!s16i>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load{{.*}} %[[#A]] : !cir.ptr<!s16i>, !s16i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s16i -> !cir.bf16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NONATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @s0 : !cir.ptr<!s16i>
+  // NATIVE-NEXT: %[[#B:]] = cir.load{{.*}} %[[#A]] : !cir.ptr<!s16i>, !s16i
+  // NATIVE-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s16i -> !cir.bf16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.bf16>
+  // NATIVE-NEXT: cir.store volatile{{.*}} %[[#C]], %[[#D]] : !cir.bf16, !cir.ptr<!cir.bf16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load i16, ptr @s0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = sitofp i16 %[[#A]] to bfloat
+  // NONATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load i16, ptr @s0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = sitofp i16 %[[#A]] to bfloat
+  // NATIVE-LLVM-NEXT: store volatile bfloat %[[#B]], ptr @h0, align 2
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/binassign.cpp b/clang/test/CIR/Incubator/CodeGen/binassign.cpp
new file mode 100644
index 0000000000000..f62119a2cca70
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/binassign.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int foo(int a, int b) {
+  int x = a * b;
+  x *= b;
+  x /= b;
+  x %= b;
+  x += b;
+  x -= b;
+  x >>= b;
+  x <<= b;
+  x &= b;
+  x ^= b;
+  x |= b;
+  return x;
+}
+
+// CHECK: [[Value:%[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK: = cir.binop(mul,
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(mul,
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: cir.binop(div,
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(rem,  {{.*}} loc([[SourceLocation:#loc[0-9]+]])
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(add,
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(sub,
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.shift(right
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.shift(left
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(and,
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(xor,
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+// CHECK: = cir.load {{.*}}[[Value]]
+// CHECK: = cir.binop(or,
+// CHECK: cir.store{{.*}} {{.*}}[[Value]]
+
+typedef enum {
+  A = 3,
+} enumy;
+
+enumy getty();
+
+void exec() {
+  enumy r;
+  if ((r = getty()) < 0) {}
+}
+
+// CHECK: cir.func {{.*}} @_Z4execv()
+// CHECK:   %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["r"] {alignment = 4 : i64}
+// CHECK:   cir.scope {
+// CHECK:     %1 = cir.call @_Z5gettyv() : () -> !u32i
+// CHECK:     cir.store{{.*}} %1, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:     %2 = cir.cast integral %1 : !u32i -> !s32i
+// CHECK:     %3 = cir.const #cir.int<0> : !s32i
+// CHECK:     %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+// CHECK:     cir.if %4 {
+
+// CHECK: [[SourceLocationB:#loc[0-9]+]] = loc("{{.*}}binassign.cpp":8:8)
+// CHECK: [[SourceLocationA:#loc[0-9]+]] = loc("{{.*}}binassign.cpp":8:3)
+// CHECK: [[SourceLocation]] = loc(fused[[[SourceLocationA]], [[SourceLocationB]]])
diff --git a/clang/test/CIR/Incubator/CodeGen/binop.c b/clang/test/CIR/Incubator/CodeGen/binop.c
new file mode 100644
index 0000000000000..4427e4b605297
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/binop.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void conditionalResultIimplicitCast(int a, int b, float f) {
+  // Should implicit cast back to int.
+  int x = a && b;
+  // CHECK: %[[#INT:]] = cir.ternary
+  // CHECK: %{{.+}} = cir.cast bool_to_int %[[#INT]] : !cir.bool -> !s32i
+  float y = f && f;
+  // CHECK: %[[#BOOL:]] = cir.ternary
+  // CHECK: %[[#INT:]] = cir.cast bool_to_int %[[#BOOL]] : !cir.bool -> !s32i
+  // CHECK: %{{.+}} = cir.cast int_to_float %[[#INT]] : !s32i -> !cir.float
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/binop.cpp b/clang/test/CIR/Incubator/CodeGen/binop.cpp
new file mode 100644
index 0000000000000..9609458a966d1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/binop.cpp
@@ -0,0 +1,118 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -O1 -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void b0(int a, int b) {
+  int x = a * b;
+  x = x / b;
+  x = x % b;
+  x = x + b;
+  x = x - b;
+  x = x >> b;
+  x = x << b;
+  x = x & b;
+  x = x ^ b;
+  x = x | b;
+}
+
+// CHECK: = cir.binop(mul, %3, %4) nsw : !s32i
+// CHECK: = cir.binop(div, %6, %7) : !s32i
+// CHECK: = cir.binop(rem, %9, %10) : !s32i
+// CHECK: = cir.binop(add, %12, %13) nsw : !s32i
+// CHECK: = cir.binop(sub, %15, %16) nsw : !s32i
+// CHECK: = cir.shift(right, %18 : !s32i, %19 : !s32i) -> !s32i
+// CHECK: = cir.shift(left, %21 : !s32i, %22 : !s32i) -> !s32i
+// CHECK: = cir.binop(and, %24, %25) : !s32i
+// CHECK: = cir.binop(xor, %27, %28) : !s32i
+// CHECK: = cir.binop(or, %30, %31) : !s32i
+
+void b1(bool a, bool b) {
+  bool x = a && b;
+  x = x || b;
+}
+
+// CHECK: cir.ternary(%3, true
+// CHECK-NEXT: %7 = cir.load{{.*}} %1
+// CHECK-NEXT: cir.yield %7
+// CHECK-NEXT: false {
+// CHECK-NEXT: cir.const #false
+// CHECK-NEXT: cir.yield
+
+// CHECK: cir.ternary(%5, true
+// CHECK-NEXT: cir.const #true
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: false {
+// CHECK-NEXT: %7 = cir.load{{.*}} %1
+// CHECK-NEXT: cir.yield
+
+void b2(bool a) {
+ bool x = 0 && a;
+ x = 1 && a;
+ x = 0 || a;
+ x = 1 || a;
+}
+
+// CHECK: %0 = cir.alloca {{.*}} ["a", init]
+// CHECK: %1 = cir.alloca {{.*}} ["x", init]
+// CHECK: %2 = cir.const #false
+// CHECK-NEXT: cir.store{{.*}} %2, %1
+// CHECK-NEXT: %3 = cir.load{{.*}} %0
+// CHECK-NEXT: cir.store{{.*}} %3, %1
+// CHECK-NEXT: %4 = cir.load{{.*}} %0
+// CHECK-NEXT: cir.store{{.*}} %4, %1
+// CHECK-NEXT: %5 = cir.const #true
+// CHECK-NEXT: cir.store{{.*}} %5, %1
+
+void b3(int a, int b, int c, int d) {
+  bool x = (a == b) && (c == d);
+  x = (a == b) || (c == d);
+}
+
+// CHECK: %0 = cir.alloca {{.*}} ["a", init]
+// CHECK-NEXT: %1 = cir.alloca {{.*}} ["b", init]
+// CHECK-NEXT: %2 = cir.alloca {{.*}} ["c", init]
+// CHECK-NEXT: %3 = cir.alloca {{.*}} ["d", init]
+// CHECK-NEXT: %4 = cir.alloca {{.*}} ["x", init]
+// CHECK: %5 = cir.load{{.*}} %0
+// CHECK-NEXT: %6 = cir.load{{.*}} %1
+// CHECK-NEXT: %7 = cir.cmp(eq, %5, %6)
+// CHECK-NEXT: cir.ternary(%7, true
+// CHECK-NEXT: %13 = cir.load{{.*}} %2
+// CHECK-NEXT: %14 = cir.load{{.*}} %3
+// CHECK-NEXT: %15 = cir.cmp(eq, %13, %14)
+// CHECK-NEXT: cir.yield %15
+// CHECK-NEXT: }, false {
+// CHECK-NEXT: %13 = cir.const #false
+// CHECK-NEXT: cir.yield %13
+
+void testFloatingPointBinOps(float a, float b) {
+  a * b;
+  // CHECK: cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.float
+  a / b;
+  // CHECK: cir.binop(div, %{{.+}}, %{{.+}}) : !cir.float
+  a + b;
+  // CHECK: cir.binop(add, %{{.+}}, %{{.+}}) : !cir.float
+  a - b;
+  // CHECK: cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.float
+}
+
+struct S {};
+
+struct HasOpEq
+{
+  bool operator==(const S& other);
+};
+
+void rewritten_binop()
+{
+  HasOpEq s1;
+  S s2;
+  if (s1 != s2)
+    return;
+}
+
+// CHECK-LABEL: _Z15rewritten_binopv
+// CHECK:   cir.scope {
+// CHECK:     cir.call @_ZN7HasOpEqeqERK1S
+// CHECK:     %[[COND:.*]] = cir.unary(not
+// CHECK:     cir.if %[[COND]]
+// CHECK:       cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/bitfield-union.c b/clang/test/CIR/Incubator/CodeGen/bitfield-union.c
new file mode 100644
index 0000000000000..51d9175878a7d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/bitfield-union.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+typedef union {
+  int x;
+  int y : 4;
+  int z : 8;
+} demo;
+
+// CIR:  !rec_demo = !cir.record<union "demo" {!s32i, !u8i, !u8i}>
+// LLVM: %union.demo = type { i32 }
+// OGCG: %union.demo = type { i32 }
+
+typedef union {
+  int x;
+  int y : 3;
+  int   : 0;
+  int z : 2;
+} zero_bit;
+
+// CIR:  !rec_zero_bit = !cir.record<union "zero_bit" {!s32i, !u8i, !u8i} #cir.record.decl.ast>
+// LLVM: %union.zero_bit = type { i32 }
+// OGCG: %union.zero_bit = type { i32 }
+
+demo d;
+zero_bit z;
+
+void f() {
+    demo d;
+    d.x = 1;
+    d.y = 2;
+    d.z = 0;
+}
+
+// CIR: #bfi_y = #cir.bitfield_info<name = "y", storage_type = !u8i, size = 4, offset = 0, is_signed = true>
+// CIR: #bfi_z = #cir.bitfield_info<name = "z", storage_type = !u8i, size = 8, offset = 0, is_signed = true>
+
+// CIR:   cir.func {{.*}} @f
+// CIR:    [[ALLOC:%.*]] = cir.alloca !rec_demo, !cir.ptr<!rec_demo>, ["d"] {alignment = 4 : i64}
+// CIR:    [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR:    [[X:%.*]] = cir.get_member [[ALLOC]][0] {name = "x"} : !cir.ptr<!rec_demo> -> !cir.ptr<!s32i>
+// CIR:    cir.store align(4) [[ONE]], [[X]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[TWO:%.*]] = cir.const #cir.int<2> : !s32i
+// CIR:    [[Y:%.*]] = cir.get_member [[ALLOC]][1] {name = "y"} : !cir.ptr<!rec_demo> -> !cir.ptr<!u8i>
+// CIR:    [[SET:%.*]] = cir.set_bitfield align(4) (#bfi_y, [[Y]] : !cir.ptr<!u8i>, [[TWO]] : !s32i) -> !s32i
+// CIR:    [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR:    [[Z:%.*]] = cir.get_member [[ALLOC]][2] {name = "z"} : !cir.ptr<!rec_demo> -> !cir.ptr<!u8i>
+// CIR:    [[SET2:%.*]] = cir.set_bitfield align(4) (#bfi_z, [[Z]] : !cir.ptr<!u8i>, [[ZERO]] : !s32i) -> !s32i
+// CIR:    cir.return
+
+// LLVM: define dso_local void @f
+// LLVM:   [[ALLOC:%.*]] = alloca %union.demo, i64 1, align 4
+// LLVM:   store i32 1, ptr [[ALLOC]], align 4
+// LLVM:   [[BFLOAD:%.*]] = load i8, ptr [[ALLOC]], align 4
+// LLVM:   [[CLEAR:%.*]] = and i8 [[BFLOAD]], -16
+// LLVM:   [[SET:%.*]] = or i8 [[CLEAR]], 2
+// LLVM:   store i8 [[SET]], ptr [[ALLOC]], align 4
+// LLVM:   store i8 0, ptr [[ALLOC]], align 4
+
+// OGCG: define dso_local void @f
+// OGCG:   [[ALLOC:%.*]] = alloca %union.demo, align 4
+// OGCG:   store i32 1, ptr [[ALLOC]], align 4
+// OGCG:   [[BFLOAD:%.*]] = load i8, ptr [[ALLOC]], align 4
+// OGCG:   [[CLEAR:%.*]] = and i8 [[BFLOAD]], -16
+// OGCG:   [[SET:%.*]] = or i8 [[CLEAR]], 2
+// OGCG:   store i8 [[SET]], ptr [[ALLOC]], align 4
+// OGCG:   store i8 0, ptr [[ALLOC]], align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/bitfields.c b/clang/test/CIR/Incubator/CodeGen/bitfields.c
new file mode 100644
index 0000000000000..141c5681a2c37
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/bitfields.c
@@ -0,0 +1,158 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct __long {
+  struct __attribute__((__packed__)) {
+      unsigned __is_long_ : 1;
+      unsigned __cap_ : sizeof(unsigned) * 8 - 1;
+  };
+  unsigned __size_;
+  unsigned *__data_;
+};
+// CHECK-DAG: !rec_anon2E0 = !cir.record<struct "anon.0" {!u32i} #cir.record.decl.ast>
+// CHECK-DAG: !rec___long = !cir.record<struct "__long" {!rec_anon2E0, !u32i, !cir.ptr<!u32i>}>
+// CHECK-DAG: !rec_anon_struct = !cir.record<struct  {!u8i, !u8i, !cir.array<!u8i x 2>, !s32i}>
+void m() {
+  struct __long l;
+}
+
+typedef struct {
+  int a : 4;
+  int b : 5;
+  int c;
+} D;
+// CHECK-DAG: !rec_D = !cir.record<struct "D" {!u16i, !s32i}>
+
+typedef struct {
+  int a : 4;
+  int b : 27;
+  int c : 17;
+  int d : 2;
+  int e : 15;
+  unsigned f; // type other than int above, not a bitfield
+} S;
+// CHECK-DAG: !rec_S = !cir.record<struct "S" {!u64i, !u16i, !u32i}>
+// CHECK-DAG: #bfi_d = #cir.bitfield_info<name = "d", storage_type = !u64i, size = 2, offset = 49, is_signed = true>
+// CHECK-DAG: #bfi_e = #cir.bitfield_info<name = "e", storage_type = !u16i, size = 15, offset = 0, is_signed = true>
+typedef struct {
+  int a : 3;  // one bitfield with size < 8
+  unsigned b;
+} T;
+// CHECK-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i} #cir.record.decl.ast>
+// CHECK-DAG: #bfi_a = #cir.bitfield_info<name = "a", storage_type = !u8i, size = 3, offset = 0, is_signed = true>
+
+typedef struct {
+    char a;
+    char b;
+    char c;
+
+    // startOffset 24 bits, new storage from here
+    int d: 2;
+    int e: 2;
+    int f: 4;
+    int g: 25;
+    int h: 3;
+    int i: 4;
+    int j: 3;
+    int k: 8;
+
+    int l: 14;
+} U;
+// CHECK-DAG: !cir.record<struct "U" packed {!s8i, !s8i, !s8i, !u8i, !u64i}>
+
+// CHECK-DAG: !rec_G = !cir.record<struct "G" {!u16i, !s32i} #cir.record.decl.ast>
+
+// CHECK: cir.func {{.*@store_field}}
+// CHECK:   [[TMP0:%.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>
+// CHECK:   [[TMP1:%.*]] = cir.const #cir.int<3> : !s32i
+// CHECK:   [[TMP2:%.*]] = cir.get_member [[TMP0]][1] {name = "e"} : !cir.ptr<!rec_S> -> !cir.ptr<!u16i>
+// CHECK:   cir.set_bitfield align(4) (#bfi_e, [[TMP2]] : !cir.ptr<!u16i>, [[TMP1]] : !s32i)
+void store_field() {
+  S s;
+  s.e = 3;
+}
+
+// CHECK: cir.func {{.*@load_field}}
+// CHECK:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["s", init]
+// CHECK:   [[TMP1:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CHECK:   [[TMP2:%.*]] = cir.get_member [[TMP1]][0] {name = "d"} : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CHECK:   [[TMP3:%.*]] = cir.get_bitfield align(4) (#bfi_d, [[TMP2]] : !cir.ptr<!u64i>) -> !s32i
+int load_field(S* s) {
+  return s->d;
+}
+
+// CHECK: cir.func {{.*@unOp}}
+// CHECK:   [[TMP0:%.*]] = cir.get_member {{.*}}[0] {name = "d"} : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CHECK:   [[TMP1:%.*]] = cir.get_bitfield align(4) (#bfi_d, [[TMP0]] : !cir.ptr<!u64i>) -> !s32i
+// CHECK:   [[TMP2:%.*]] = cir.unary(inc, [[TMP1]]) nsw : !s32i, !s32i
+// CHECK:   cir.set_bitfield align(4) (#bfi_d, [[TMP0]] : !cir.ptr<!u64i>, [[TMP2]] : !s32i)
+void unOp(S* s) {
+  s->d++;
+}
+
+// CHECK: cir.func {{.*@binOp}}
+// CHECK:   [[TMP0:%.*]] = cir.const #cir.int<42> : !s32i
+// CHECK:   [[TMP1:%.*]] = cir.get_member {{.*}}[0] {name = "d"} : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CHECK:   [[TMP2:%.*]] = cir.get_bitfield align(4) (#bfi_d, [[TMP1]] : !cir.ptr<!u64i>) -> !s32i
+// CHECK:   [[TMP3:%.*]] = cir.binop(or, [[TMP2]], [[TMP0]]) : !s32i
+// CHECK:   cir.set_bitfield align(4) (#bfi_d, [[TMP1]] : !cir.ptr<!u64i>, [[TMP3]] : !s32i)
+void binOp(S* s) {
+   s->d |= 42;
+}
+
+
+// CHECK: cir.func {{.*@load_non_bitfield}}
+// CHECK:   cir.get_member {{%.}}[2] {name = "f"} : !cir.ptr<!rec_S> -> !cir.ptr<!u32i>
+unsigned load_non_bitfield(S *s) {
+  return s->f;
+}
+
+// just create a usage of T type
+// CHECK: cir.func {{.*@load_one_bitfield}}
+int load_one_bitfield(T* t) {
+  return t->a;
+}
+
+// CHECK: cir.func {{.*@createU}}
+void createU() {
+  U u;
+}
+
+// for this struct type we create an anon structure with different storage types in initialization
+// CHECK: cir.func {{.*@createD}}
+// CHECK:   %0 = cir.alloca !rec_D, !cir.ptr<!rec_D>, ["d"] {alignment = 4 : i64}
+// CHECK:   %1 = cir.cast bitcast %0 : !cir.ptr<!rec_D> -> !cir.ptr<!rec_anon_struct>
+// CHECK:   %2 = cir.const #cir.const_record<{#cir.int<33> : !u8i, #cir.int<0> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>, #cir.int<3> : !s32i}> : !rec_anon_struct
+// CHECK:   cir.store{{.*}} %2, %1 : !rec_anon_struct, !cir.ptr<!rec_anon_struct>
+void createD() {
+  D d = {1,2,3};
+}
+
+// check the -1 is stored to the ret value
+// LLVM: define dso_local i32 {{@.*get_a.*}}
+// LLVM:    %[[V1:.*]] = alloca i32
+// LLVM:    store i32 -1, ptr %[[V1]], align 4
+// LLVM:    %[[V2:.*]] = load i32, ptr %[[V1]], align 4
+// LLVM:    ret i32 %[[V2:.*]]
+int get_a(T *t) {
+  return (t->a = 7);
+}
+
+typedef struct {
+  int x : 15;
+  int y ;
+} G;
+
+// CHECK: cir.global external @g = #cir.const_record<{#cir.int<133> : !u8i, #cir.int<127> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>, #cir.int<254> : !s32i}> : !rec_anon_struct
+G g = { -123, 254UL};
+
+// CHECK: cir.func {{.*@get_y}}
+// CHECK:   %[[V1:.*]] = cir.get_global @g : !cir.ptr<!rec_anon_struct>
+// CHECK:   %[[V2:.*]] = cir.cast bitcast %[[V1]] : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!rec_G>
+// CHECK:   %[[V3:.*]] = cir.get_member %[[V2]][1] {name = "y"} : !cir.ptr<!rec_G> -> !cir.ptr<!s32i>
+// CHECK:   cir.load{{.*}} %[[V3]] : !cir.ptr<!s32i>, !s32i
+int get_y() {
+  return g.y;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/bitfields.cpp b/clang/test/CIR/Incubator/CodeGen/bitfields.cpp
new file mode 100644
index 0000000000000..59ec477c331aa
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/bitfields.cpp
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct __long {
+  struct __attribute__((__packed__)) {
+      unsigned __is_long_ : 1;
+      unsigned __cap_ : sizeof(unsigned) * 8 - 1;
+  };
+  unsigned __size_;
+  unsigned *__data_;
+};
+// CHECK-DAG: !rec___long = !cir.record<struct "__long" {!rec_anon2E0, !u32i, !cir.ptr<!u32i>}>
+
+void m() {
+  __long l;
+}
+
+typedef struct {
+  int a : 4;
+  int b : 27;
+  int c : 17;
+  int d : 2;
+  int e : 15;
+  unsigned f; // type other than int above, not a bitfield
+} S;
+// CHECK-DAG: !rec_S = !cir.record<struct "S" {!u64i, !u16i, !u32i}>
+// CHECK-DAG: #bfi_a = #cir.bitfield_info<name = "a", storage_type = !u64i, size = 4, offset = 0, is_signed = true>
+typedef struct {
+  int a : 3;  // one bitfield with size < 8
+  unsigned b;
+} T;
+// CHECK-DAG: !rec_T = !cir.record<struct "T" {!u8i, !u32i} #cir.record.decl.ast>
+// CHECK-DAG: #bfi_a1 = #cir.bitfield_info<name = "a", storage_type = !u8i, size = 3, offset = 0, is_signed = true>
+// CHECK-DAG: !rec_anon2E0 = !cir.record<struct "anon.0" {!u32i} #cir.record.decl.ast>
+
+// CHECK: cir.func {{.*}} @_Z11store_field
+// CHECK:   [[TMP0:%.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>
+// CHECK:   [[TMP1:%.*]] = cir.const #cir.int<3> : !s32i
+// CHECK:   [[TMP2:%.*]] = cir.get_member [[TMP0]][0] {name = "a"} : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CHECK:   cir.set_bitfield align(4) (#bfi_a, [[TMP2]] : !cir.ptr<!u64i>, [[TMP1]] : !s32i)
+void store_field() {
+  S s;
+  s.a = 3;
+}
+
+// CHECK: cir.func {{.*}} @_Z10load_field
+// CHECK:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["s", init, const]
+// CHECK:   [[TMP1:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CHECK:   [[TMP2:%.*]] = cir.get_member [[TMP1]][0] {name = "d"} : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CHECK:   [[TMP3:%.*]] = cir.get_bitfield align(4) (#bfi_d, [[TMP2]] : !cir.ptr<!u64i>) -> !s32i
+int load_field(S& s) {
+  return s.d;
+}
+
+// CHECK: cir.func {{.*}} @_Z17load_non_bitfield
+// CHECK:   cir.get_member {{%.}}[2] {name = "f"} : !cir.ptr<!rec_S> -> !cir.ptr<!u32i>
+unsigned load_non_bitfield(S& s) {
+  return s.f;
+}
+
+// just create a usage of T type
+// CHECK: cir.func {{.*}} @_Z17load_one_bitfield
+int load_one_bitfield(T& t) {
+  return t.a;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/bitfields_be.c b/clang/test/CIR/Incubator/CodeGen/bitfields_be.c
new file mode 100644
index 0000000000000..5c3acb07a7d15
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/bitfields_be.c
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1  -triple aarch64_be-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1  -triple aarch64_be-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1  -triple aarch64_be-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+typedef struct {
+    int a : 4;
+    int b : 11;
+    int c : 17;
+} S;
+
+void init(S* s) {
+    s->a = -4;
+    s->b = 42;
+    s->c = -12345;
+}
+
+// field 'a'
+// CIR:    %[[PTR0:.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["s", init] {alignment = 8 : i64}
+// CIR:    %[[CONST1:.*]] = cir.const #cir.int<4> : !s32i
+// CIR:    %[[MIN1:.*]] = cir.unary(minus, %[[CONST1]]) nsw : !s32i, !s32i
+// CIR:    %[[VAL0:.*]] = cir.load align(8) %[[PTR0]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CIR:    %[[GET0:.*]] = cir.get_member %[[VAL0]][0] {name = "a"} : !cir.ptr<!rec_S> -> !cir.ptr<!u32i>
+// CIR:    %[[SET0:.*]] = cir.set_bitfield align(4) (#bfi_a, %[[GET0]] : !cir.ptr<!u32i>, %[[MIN1]] : !s32i) -> !s32i
+
+// LLVM:   %[[PTR0:.*]] = load ptr
+// LLVM:   %[[GET0:.*]] = getelementptr %struct.S, ptr %[[PTR0]], i32 0, i32 0
+// LLVM:   %[[VAL0:.*]] = load i32, ptr %[[GET0]], align 4
+// LLVM:   %[[AND0:.*]] = and i32 %[[VAL0]], 268435455
+// LLVM:   %[[OR0:.*]] = or i32 %[[AND0]], -1073741824
+// LLVM:   store i32 %[[OR0]], ptr %[[GET0]], align 4
+
+// OGCG:   %[[PTR0:.*]] = load ptr
+// OGCG:   %[[VAL0:.*]] = load i32, ptr %[[PTR0]], align 4
+// OGCG:   %[[AND0:.*]] = and i32 %[[VAL0]], 268435455
+// OGCG:   %[[OR0:.*]] = or i32 %[[AND0]], -1073741824
+// OGCG:   store i32 %[[OR0]], ptr %[[PTR0]], align 4
+
+// field 'b'
+// CIR:    %[[CONST2:.*]] = cir.const #cir.int<42> : !s32i
+// CIR:    %[[VAL1:.*]] = cir.load align(8) %[[PTR0]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CIR:    %[[GET1:.*]] = cir.get_member %[[VAL1]][0] {name = "b"} : !cir.ptr<!rec_S> -> !cir.ptr<!u32i>
+// CIR:    %[[SET1:.*]] = cir.set_bitfield align(4) (#bfi_b, %[[GET1]] : !cir.ptr<!u32i>, %[[CONST2]] : !s32i) -> !s32i
+
+// LLVM:  %[[PTR1:.*]] = load ptr
+// LLVM:  %[[GET1:.*]] = getelementptr %struct.S, ptr %[[PTR1]], i32 0, i32 0
+// LLVM:  %[[VAL1:.*]] = load i32, ptr %[[GET1]], align 4
+// LLVM:  %[[AND1:.*]] = and i32 %[[VAL1]], -268304385
+// LLVM:  %[[OR1:.*]] = or i32 %[[AND1]], 5505024
+// LLVM:  store i32 %[[OR1]], ptr %[[GET1]], align 4
+
+// OGCG:   %[[PTR1:.*]] = load ptr
+// OGCG:   %[[VAL1:.*]] = load i32, ptr %[[PTR1]], align 4
+// OGCG:   %[[AND1:.*]] = and i32 %[[VAL1]], -268304385
+// OGCG:   %[[OR1:.*]] = or i32 %[[AND1]], 5505024
+// OGCG:   store i32 %[[OR1]], ptr %[[PTR1]], align 4
+
+// field 'c'
+// CIR:    %[[CONST3:.*]] = cir.const #cir.int<12345> : !s32i
+// CIR:    %[[MIN2:.*]] = cir.unary(minus, %[[CONST3]]) nsw : !s32i, !s32i
+// CIR:    %[[VAL2:.*]] = cir.load align(8) %[[PTR0]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CIR:    %[[GET2:.*]] = cir.get_member %[[VAL2]][0] {name = "c"} : !cir.ptr<!rec_S> -> !cir.ptr<!u32i>
+// CIR:    %[[SET2:.*]] = cir.set_bitfield align(4) (#bfi_c, %[[GET2]] : !cir.ptr<!u32i>, %[[MIN2]] : !s32i) -> !s32i
+
+// LLVM:  %[[PTR2:.*]] = load ptr
+// LLVM:  %[[GET2:.*]] = getelementptr %struct.S, ptr  %[[PTR2]], i32 0, i32 0
+// LLVM:  %[[VAL2:.*]] = load i32, ptr %[[GET2]], align 4
+// LLVM:  %[[AND2:.*]] = and i32 %[[VAL2]], -131072
+// LLVM:  %[[OR2:.*]] = or i32 %[[AND2]], 118727
+// LLVM:  store i32 %[[OR2]], ptr %[[GET2]], align 4
+
+// OGCG:   %[[PTR2:.*]] = load ptr
+// OGCG:   %[[VAL2:.*]] = load i32, ptr %[[PTR2]], align 4
+// OGCG:   %[[AND2:.*]] = and i32 %[[VAL2]], -131072
+// OGCG:   %[[OR2:.*]] = or i32 %[[AND2]], 118727
+// OGCG:   store i32 %[[OR2]], ptr %[[PTR2]], align 4
+
diff --git a/clang/test/CIR/Incubator/CodeGen/bitint.c b/clang/test/CIR/Incubator/CodeGen/bitint.c
new file mode 100644
index 0000000000000..2f5d36c03ea5f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/bitint.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void VLATest(_BitInt(3) A, _BitInt(42) B, _BitInt(17) C) {
+  int AR1[A];
+  int AR2[B];
+  int AR3[C];
+}
+
+//      CHECK: cir.func {{.*}} @VLATest
+//      CHECK:   %[[#A:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<s, 3>>, !cir.int<s, 3>
+// CHECK-NEXT:   %[[#A_PROMOTED:]] = cir.cast integral %[[#A]] : !cir.int<s, 3> -> !u64i
+// CHECK-NEXT:   %[[#SP:]] = cir.stack_save : !cir.ptr<!u8i>
+// CHECK-NEXT:   cir.store{{.*}} %[[#SP]], %{{.+}} : !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, %[[#A_PROMOTED]] : !u64i
+// CHECK-NEXT:   %[[#B:]] = cir.load{{.*}} %1 : !cir.ptr<!cir.int<s, 42>>, !cir.int<s, 42>
+// CHECK-NEXT:   %[[#B_PROMOTED:]] = cir.cast integral %[[#B]] : !cir.int<s, 42> -> !u64i
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, %[[#B_PROMOTED]] : !u64i
+// CHECK-NEXT:   %[[#C:]] = cir.load{{.*}} %2 : !cir.ptr<!cir.int<s, 17>>, !cir.int<s, 17>
+// CHECK-NEXT:   %[[#C_PROMOTED:]] = cir.cast integral %[[#C]] : !cir.int<s, 17> -> !u64i
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, %[[#C_PROMOTED]] : !u64i
+//      CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/bitint.cpp b/clang/test/CIR/Incubator/CodeGen/bitint.cpp
new file mode 100644
index 0000000000000..1837333ec62d9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/bitint.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+using i10 = signed _BitInt(10);
+using u10 = unsigned _BitInt(10);
+
+unsigned _BitInt(1) GlobSize1 = 0;
+// CHECK: cir.global external @GlobSize1 = #cir.int<0> : !cir.int<u, 1>
+
+i10 test_signed(i10 arg) {
+  return arg;
+}
+
+// CHECK: cir.func {{.*}} @_Z11test_signedDB10_(%arg0: !cir.int<s, 10> loc({{.*}}) -> !cir.int<s, 10>
+// CHECK: }
+
+u10 test_unsigned(u10 arg) {
+  return arg;
+}
+
+// CHECK: cir.func {{.*}} @_Z13test_unsignedDU10_(%arg0: !cir.int<u, 10> loc({{.*}}) -> !cir.int<u, 10>
+// CHECK: }
+
+i10 test_init() {
+  return 42;
+}
+
+//      CHECK: cir.func {{.*}} @_Z9test_initv() -> !cir.int<s, 10>
+//      CHECK:   %[[#LITERAL:]] = cir.const #cir.int<42> : !s32i
+// CHECK-NEXT:   %{{.+}} = cir.cast integral %[[#LITERAL]] : !s32i -> !cir.int<s, 10>
+//      CHECK: }
+
+void test_init_for_mem() {
+  i10 x = 42;
+}
+
+//      CHECK: cir.func {{.*}} @_Z17test_init_for_memv()
+//      CHECK:   %[[#LITERAL:]] = cir.const #cir.int<42> : !s32i
+// CHECK-NEXT:   %[[#INIT:]] = cir.cast integral %[[#LITERAL]] : !s32i -> !cir.int<s, 10>
+// CHECK-NEXT:   cir.store{{.*}} %[[#INIT]], %{{.+}} : !cir.int<s, 10>, !cir.ptr<!cir.int<s, 10>>
+//      CHECK: }
+
+i10 test_arith(i10 lhs, i10 rhs) {
+  return lhs + rhs;
+}
+
+//      CHECK: cir.func {{.*}} @_Z10test_arithDB10_S_(%arg0: !cir.int<s, 10> loc({{.+}}), %arg1: !cir.int<s, 10> loc({{.+}})) -> !cir.int<s, 10>
+//      CHECK:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<s, 10>>, !cir.int<s, 10>
+// CHECK-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<s, 10>>, !cir.int<s, 10>
+// CHECK-NEXT:   %{{.+}} = cir.binop(add, %[[#LHS]], %[[#RHS]]) nsw : !cir.int<s, 10>
+//      CHECK: }
+
+void Size1ExtIntParam(unsigned _BitInt(1) A) {
+  unsigned _BitInt(1) B[5];
+  B[2] = A;
+}
+
+//      CHECK: cir.func {{.*}} @_Z16Size1ExtIntParamDU1_
+//      CHECK:   %[[#A:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<u, 1>>, !cir.int<u, 1>
+// CHECK-NEXT:   %[[#IDX:]] = cir.const #cir.int<2> : !s32i
+// CHECK-NEXT:   %[[#ELEM:]] = cir.get_element %1[%[[#IDX]]] : (!cir.ptr<!cir.array<!cir.int<u, 1> x 5>>, !s32i) -> !cir.ptr<!cir.int<u, 1>>
+// CHECK-NEXT:   cir.store{{.*}} %[[#A]], %[[#ELEM]] : !cir.int<u, 1>, !cir.ptr<!cir.int<u, 1>>
+//      CHECK: }
+
+struct S {
+  _BitInt(17) A;
+  _BitInt(10) B;
+  _BitInt(17) C;
+};
+
+void OffsetOfTest(void) {
+  int A = __builtin_offsetof(struct S,A);
+  int B = __builtin_offsetof(struct S,B);
+  int C = __builtin_offsetof(struct S,C);
+}
+
+// CHECK: cir.func {{.*}} @_Z12OffsetOfTestv()
+// CHECK:   %{{.+}} = cir.const #cir.int<0> : !u64i
+// CHECK:   %{{.+}} = cir.const #cir.int<4> : !u64i
+// CHECK:   %{{.+}} = cir.const #cir.int<8> : !u64i
+// CHECK: }
+
+_BitInt(2) ParamPassing(_BitInt(15) a, _BitInt(31) b) {}
+
+// CHECK: cir.func {{.*}} @_Z12ParamPassingDB15_DB31_(%arg0: !cir.int<s, 15> loc({{.+}}), %arg1: !cir.int<s, 31> loc({{.+}})) -> !cir.int<s, 2>
diff --git a/clang/test/CIR/Incubator/CodeGen/bool.c b/clang/test/CIR/Incubator/CodeGen/bool.c
new file mode 100644
index 0000000000000..ce293801574d3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/bool.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include <stdbool.h>
+
+typedef struct {
+  bool x;
+} S;
+
+// CHECK:  cir.func {{.*}} @init_bool
+// CHECK:    [[ALLOC:%.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>
+// CHECK:    [[ZERO:%.*]] = cir.const #cir.zero : !rec_S
+// CHECK:    cir.store{{.*}} [[ZERO]], [[ALLOC]] : !rec_S, !cir.ptr<!rec_S>
+void init_bool(void) {
+  S s = {0};
+}
+
+// CHECK:  cir.func {{.*}} @store_bool
+// CHECK:    [[TMP0:%.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+// CHECK:    cir.store{{.*}} %arg0, [[TMP0]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+// CHECK:    [[TMP1:%.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:    [[TMP2:%.*]] = cir.cast int_to_bool [[TMP1]] : !s32i -> !cir.bool
+// CHECK:    [[TMP3:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CHECK:    [[TMP4:%.*]] = cir.get_member [[TMP3]][0] {name = "x"} : !cir.ptr<!rec_S> -> !cir.ptr<!cir.bool>
+// CHECK:    cir.store{{.*}} [[TMP2]], [[TMP4]] : !cir.bool, !cir.ptr<!cir.bool>
+void store_bool(S *s) {
+  s->x = false;
+}
+
+// CHECK:  cir.func {{.*}} @load_bool
+// CHECK:    [[TMP0:%.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["s", init] {alignment = 8 : i64}
+// CHECK:    [[TMP1:%.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["x", init] {alignment = 1 : i64}
+// CHECK:    cir.store{{.*}} %arg0, [[TMP0]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+// CHECK:    [[TMP2:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CHECK:    [[TMP3:%.*]] = cir.get_member [[TMP2]][0] {name = "x"} : !cir.ptr<!rec_S> -> !cir.ptr<!cir.bool>
+// CHECK:    [[TMP4:%.*]] = cir.load{{.*}} [[TMP3]] : !cir.ptr<!cir.bool>, !cir.bool
+void load_bool(S *s) {
+  bool x = s->x;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/bswap.cpp b/clang/test/CIR/Incubator/CodeGen/bswap.cpp
new file mode 100644
index 0000000000000..1525ba4aa9eff
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/bswap.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+using u16 = unsigned short;
+using u32 = unsigned int;
+using u64 = unsigned long long;
+
+u16 bswap_u16(u16 x) {
+  return __builtin_bswap16(x);
+}
+
+// CHECK: cir.func {{.*}} @_Z9bswap_u16t
+// CHECK:   %{{.+}} = cir.byte_swap %{{.+}} : !u16i
+// CHECK: }
+
+u32 bswap_u32(u32 x) {
+  return __builtin_bswap32(x);
+}
+
+// CHECK: cir.func {{.*}} @_Z9bswap_u32j
+// CHECK:   %{{.+}} = cir.byte_swap %{{.+}} : !u32i
+// CHECK: }
+
+u64 bswap_u64(u64 x) {
+  return __builtin_bswap64(x);
+}
+
+// CHECK: cir.func {{.*}} @_Z9bswap_u64y
+// CHECK:   %{{.+}} = cir.byte_swap %{{.+}} : !u64i
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/build-deferred.cpp b/clang/test/CIR/Incubator/CodeGen/build-deferred.cpp
new file mode 100644
index 0000000000000..cfd969cadc00c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/build-deferred.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fclangir-build-deferred-threshold=0 %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+class String {
+  char *storage{nullptr};
+  long size;
+  long capacity;
+
+public:
+  String() : size{0} {}
+  String(int size) : size{size} {}
+  String(const char *s) {}
+};
+
+void test() {
+  String s1{};
+  String s2{1};
+  String s3{"abcdefghijklmnop"};
+}
+
+// CHECK-NOT: cir.func {{.*}} @_ZN6StringC2Ev
+// CHECK-NOT: cir.func {{.*}} @_ZN6StringC2Ei
+// CHECK-NOT: cir.func {{.*}} @_ZN6StringC2EPKc
+
+// CHECK: cir.func {{.*}} @_Z4testv()
+// CHECK:   cir.call @_ZN6StringC1Ev(%0) : (!cir.ptr<!rec_String>) -> ()
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-abort.c b/clang/test/CIR/Incubator/CodeGen/builtin-abort.c
new file mode 100644
index 0000000000000..d60d0efedd500
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-abort.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+void abort();
+void test() { abort(); }
+
+// TODO: Add test to test unreachable when CIR support for NORETURN is added.
+
+// CIR-LABEL: test
+// CIR:  cir.call @abort() : () -> ()
+
+// LLVM-LABEL: test
+// LLVM:  call void @abort()
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-addressof.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-addressof.cpp
new file mode 100644
index 0000000000000..d9b978fe37a85
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-addressof.cpp
@@ -0,0 +1,99 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
+
+// Test addressof builtins in emitPointerWithAlignment context
+// This tests the fix for crash at CIRGenExpr.cpp:240 (248 production crashes)
+
+struct S {
+  void operator&() = delete;  // Ensures addressof is needed
+};
+
+// Test 1: __builtin_addressof in delete expression (main crash scenario)
+// CIR-LABEL: @_Z{{.*}}test_delete_builtin
+// LLVM-LABEL: @_Z{{.*}}test_delete_builtin
+// OGCG-LABEL: @_Z{{.*}}test_delete_builtin
+void test_delete_builtin() {
+  S* s = new S();
+  delete __builtin_addressof(*s);
+  // CIR: cir.call @_ZdlPvm
+  // LLVM: call{{.*}} @_ZdlPv
+  // OGCG: call{{.*}} @_ZdlPv
+}
+
+// Test 2: Simple case - local variable
+// CIR-LABEL: @_Z{{.*}}test_simple_local
+// LLVM-LABEL: @_Z{{.*}}test_simple_local
+// OGCG-LABEL: @_Z{{.*}}test_simple_local
+int* test_simple_local() {
+  int x = 42;
+  return __builtin_addressof(x);
+  // CIR: cir.alloca
+  // CIR: cir.return
+  // LLVM: alloca
+  // LLVM: ret ptr
+  // OGCG: alloca
+  // OGCG: ret ptr
+}
+
+// Test 3: Global variable
+extern int global_var;
+// CIR-LABEL: @_Z{{.*}}test_global
+// LLVM-LABEL: @_Z{{.*}}test_global
+// OGCG-LABEL: @_Z{{.*}}test_global
+int* test_global() {
+  return __builtin_addressof(global_var);
+  // CIR: cir.get_global
+  // CIR: cir.return
+  // LLVM: @global_var
+  // OGCG: @global_var
+}
+
+// Test 4: Conditional operator with addressof
+// CIR-LABEL: @_Z{{.*}}test_conditional
+// LLVM-LABEL: @_Z{{.*}}test_conditional
+// OGCG-LABEL: @_Z{{.*}}test_conditional
+S *test_conditional(bool b, S &s, S &t) {
+  return __builtin_addressof(b ? s : t);
+  // CIR: cir.ternary
+  // LLVM: phi ptr
+  // OGCG: phi ptr
+}
+
+// Test 5: Member access
+struct Container {
+  int value;
+};
+
+// CIR-LABEL: @_Z{{.*}}test_member
+// LLVM-LABEL: @_Z{{.*}}test_member
+// OGCG-LABEL: @_Z{{.*}}test_member
+int* test_member(Container& c) {
+  return __builtin_addressof(c.value);
+  // CIR: cir.get_member
+  // CIR: cir.return
+  // LLVM: getelementptr
+  // LLVM: ret ptr
+  // OGCG: getelementptr
+  // OGCG: ret ptr
+}
+
+// Test 6: Array element
+// CIR-LABEL: @_Z{{.*}}test_array_elem
+// LLVM-LABEL: @_Z{{.*}}test_array_elem
+// OGCG-LABEL: @_Z{{.*}}test_array_elem
+int* test_array_elem(int i, int* arr) {
+  return __builtin_addressof(arr[i]);
+  // CIR: cir.ptr_stride
+  // CIR: cir.return
+  // LLVM: getelementptr
+  // LLVM: ret ptr
+  // OGCG: getelementptr
+  // OGCG: ret ptr
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-alloca.c b/clang/test/CIR/Incubator/CodeGen/builtin-alloca.c
new file mode 100644
index 0000000000000..ab5ccbb3701bd
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-alloca.c
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+typedef __SIZE_TYPE__ size_t;
+void *alloca(size_t size);
+void *_alloca(size_t size);
+
+void my_alloca(size_t n)
+{
+  int *c1 = alloca(n);
+}
+// CIR:       cir.func {{.*}} @my_alloca([[ALLOCA_SIZE:%.*]]: !u64i
+// CIR:       cir.store [[ALLOCA_SIZE]], [[LOCAL_VAR_ALLOCA_SIZE:%.*]] : !u64i, !cir.ptr<!u64i>
+// CIR:       [[TMP_ALLOCA_SIZE:%.*]] = cir.load{{.*}} [[LOCAL_VAR_ALLOCA_SIZE]] : !cir.ptr<!u64i>, !u64i
+// CIR:       [[ALLOCA_RES:%.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, [[TMP_ALLOCA_SIZE]] : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// CIR-NEXT:  cir.cast bitcast [[ALLOCA_RES]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+// CIR: }
+
+
+// LLVM:       define dso_local void @my_alloca(i64 [[ALLOCA_SIZE:%.*]])
+// LLVM:       store i64 [[ALLOCA_SIZE]], ptr [[LOCAL_VAR_ALLOCA_SIZE:%.*]],
+// LLVM:       [[TMP_ALLOCA_SIZE:%.*]] =  load i64, ptr [[LOCAL_VAR_ALLOCA_SIZE]],
+// LLVM:       [[ALLOCA_RES:%.*]] = alloca i8, i64 [[TMP_ALLOCA_SIZE]], align 16
+// LLVM: }
+
+void my___builtin_alloca(size_t n)
+{
+  int *c1 = (int *)__builtin_alloca(n);
+}
+
+// CIR:       cir.func {{.*}} @my___builtin_alloca([[ALLOCA_SIZE:%.*]]: !u64i
+// CIR:       cir.store [[ALLOCA_SIZE]], [[LOCAL_VAR_ALLOCA_SIZE:%.*]] : !u64i, !cir.ptr<!u64i>
+// CIR:       [[TMP_ALLOCA_SIZE:%.*]] = cir.load{{.*}} [[LOCAL_VAR_ALLOCA_SIZE]] : !cir.ptr<!u64i>, !u64i
+// CIR:       [[ALLOCA_RES:%.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, [[TMP_ALLOCA_SIZE]] : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// CIR-NEXT:  cir.cast bitcast [[ALLOCA_RES]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+// CIR: }
+
+
+// LLVM:       define dso_local void @my___builtin_alloca(i64 [[ALLOCA_SIZE:%.*]])
+// LLVM:       store i64 [[ALLOCA_SIZE]], ptr [[LOCAL_VAR_ALLOCA_SIZE:%.*]],
+// LLVM:       [[TMP_ALLOCA_SIZE:%.*]] =  load i64, ptr [[LOCAL_VAR_ALLOCA_SIZE]],
+// LLVM:       [[ALLOCA_RES:%.*]] = alloca i8, i64 [[TMP_ALLOCA_SIZE]], align 16
+// LLVM: }
+
+void my__builtin_alloca_uninitialized(size_t n)
+{
+  int *c1 = (int *)__builtin_alloca_uninitialized(n);
+}
+
+// CIR:       cir.func {{.*}} @my__builtin_alloca_uninitialized([[ALLOCA_SIZE:%.*]]: !u64i
+// CIR:       cir.store [[ALLOCA_SIZE]], [[LOCAL_VAR_ALLOCA_SIZE:%.*]] : !u64i, !cir.ptr<!u64i>
+// CIR:       [[TMP_ALLOCA_SIZE:%.*]] = cir.load{{.*}} [[LOCAL_VAR_ALLOCA_SIZE]] : !cir.ptr<!u64i>, !u64i
+// CIR:       [[ALLOCA_RES:%.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, [[TMP_ALLOCA_SIZE]] : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// CIR-NEXT:  cir.cast bitcast [[ALLOCA_RES]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+// CIR: }
+
+
+// LLVM:       define dso_local void @my__builtin_alloca_uninitialized(i64 [[ALLOCA_SIZE:%.*]])
+// LLVM:       store i64 [[ALLOCA_SIZE]], ptr [[LOCAL_VAR_ALLOCA_SIZE:%.*]],
+// LLVM:       [[TMP_ALLOCA_SIZE:%.*]] =  load i64, ptr [[LOCAL_VAR_ALLOCA_SIZE]],
+// LLVM:       [[ALLOCA_RES:%.*]] = alloca i8, i64 [[TMP_ALLOCA_SIZE]], align 16
+// LLVM: }
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-arm-exclusive.c b/clang/test/CIR/Incubator/CodeGen/builtin-arm-exclusive.c
new file mode 100644
index 0000000000000..0ccb16c2672b4
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-arm-exclusive.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+struct twoFldT {
+  char a, b;
+};
+// CIR: !rec_twoFldT = !cir.record<struct "twoFldT" {!s8i, !s8i}
+
+int test_ldrex(char *addr, long long *addr64, float *addrfloat) {
+// CIR-LABEL: @test_ldrex
+  int sum = 0;
+  sum += __builtin_arm_ldrex(addr);
+// CIR: [[INTRES0:%.*]] = cir.llvm.intrinsic "aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s8i>) -> !s64i
+// CIR: [[CAST0:%.*]] = cir.cast integral [[INTRES0]] : !s64i -> !s8i
+// CIR: [[CAST_I32:%.*]] = cir.cast integral [[CAST0]] : !s8i -> !s32i
+
+  sum += __builtin_arm_ldrex((short *)addr);
+// CIR: [[INTRES1:%.*]] = cir.llvm.intrinsic "aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s16i>) -> !s64i
+// CIR: [[CAST1:%.*]] = cir.cast integral [[INTRES1]] : !s64i -> !s16i
+// CIR: [[CAST_I16:%.*]] = cir.cast integral [[CAST1]] : !s16i -> !s32i
+
+  sum += __builtin_arm_ldrex((int *)addr);
+// CIR: [[INTRES2:%.*]] = cir.llvm.intrinsic "aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s32i>) -> !s64i
+// CIR: [[CAST2:%.*]] = cir.cast integral [[INTRES2]] : !s64i -> !s32i
+
+  sum += __builtin_arm_ldrex((long long *)addr);
+// CIR: [[INTRES3:%.*]] = cir.llvm.intrinsic "aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s64i>) -> !s64i
+
+  sum += __builtin_arm_ldrex(addr64);
+// CIR: [[INTRES4:%.*]] = cir.llvm.intrinsic "aarch64.ldxr" {{%[0-9]+}} : (!cir.ptr<!s64i>) -> !s64i
+
+
+  sum += *__builtin_arm_ldrex((int **)addr);
+// CIR: [[INTRES5:%.*]] = cir.llvm.intrinsic "aarch64.ldxr"  {{%[0-9]+}} : (!cir.ptr<!cir.ptr<!s32i>>) -> !s64i
+
+  sum += __builtin_arm_ldrex((struct twoFldT **)addr)->a;
+// CIR: [[INTRES6:%.*]] = cir.llvm.intrinsic "aarch64.ldxr"  {{%[0-9]+}} : (!cir.ptr<!cir.ptr<!rec_twoFldT>>) -> !s64i
+// CIR: [[CAST3:%.*]] = cir.cast int_to_ptr [[INTRES6]] : !s64i -> !cir.ptr<!rec_twoFldT>
+// CIR: [[MEMBER_A:%.*]] = cir.get_member [[CAST3]][0] {name = "a"} : !cir.ptr<!rec_twoFldT> -> !cir.ptr<!s8i>
+
+
+ // TODO: Uncomment next 2 lines, add tests when floating result type supported
+ // sum += __builtin_arm_ldrex(addrfloat);
+
+ // sum += __builtin_arm_ldrex((double *)addr);
+
+
+  return sum;
+}
+
+int test_ldaex(char *addr, long long *addr64, float *addrfloat) {
+// CIR-LABEL: @test_ldaex
+  int sum = 0;
+  sum += __builtin_arm_ldaex(addr);
+// CIR: [[INTRES0:%.*]] = cir.llvm.intrinsic "aarch64.ldaxr" {{%[0-9]+}} : (!cir.ptr<!s8i>) -> !s64i
+// CIR: [[CAST0:%.*]] = cir.cast integral [[INTRES0]] : !s64i -> !s8i
+// CIR: [[CAST_I32:%.*]] = cir.cast integral [[CAST0]] : !s8i -> !s32i
+
+  sum += __builtin_arm_ldaex((short *)addr);
+// CIR: [[INTRES1:%.*]] = cir.llvm.intrinsic "aarch64.ldaxr" {{%[0-9]+}} : (!cir.ptr<!s16i>) -> !s64i
+// CIR: [[CAST1:%.*]] = cir.cast integral [[INTRES1]] : !s64i -> !s16i
+// CIR: [[CAST_I16:%.*]] = cir.cast integral [[CAST1]] : !s16i -> !s32i
+
+  sum += __builtin_arm_ldaex((int *)addr);
+// CIR: [[INTRES2:%.*]] = cir.llvm.intrinsic "aarch64.ldaxr" {{%[0-9]+}} : (!cir.ptr<!s32i>) -> !s64i
+// CIR: [[CAST2:%.*]] = cir.cast integral [[INTRES2]] : !s64i -> !s32i
+
+  sum += __builtin_arm_ldaex((long long *)addr);
+// CIR: [[INTRES3:%.*]] = cir.llvm.intrinsic "aarch64.ldaxr" {{%[0-9]+}} : (!cir.ptr<!s64i>) -> !s64i
+
+  sum += __builtin_arm_ldaex(addr64);
+// CIR: [[INTRES4:%.*]] = cir.llvm.intrinsic "aarch64.ldaxr" {{%[0-9]+}} : (!cir.ptr<!s64i>) -> !s64i
+
+
+  sum += *__builtin_arm_ldaex((int **)addr);
+// CIR: [[INTRES5:%.*]] = cir.llvm.intrinsic "aarch64.ldaxr"  {{%[0-9]+}} : (!cir.ptr<!cir.ptr<!s32i>>) -> !s64i
+
+  sum += __builtin_arm_ldaex((struct twoFldT **)addr)->a;
+// CIR: [[INTRES6:%.*]] = cir.llvm.intrinsic "aarch64.ldaxr"  {{%[0-9]+}} : (!cir.ptr<!cir.ptr<!rec_twoFldT>>) -> !s64i
+// CIR: [[CAST3:%.*]] = cir.cast int_to_ptr [[INTRES6]] : !s64i -> !cir.ptr<!rec_twoFldT>
+// CIR: [[MEMBER_A:%.*]] = cir.get_member [[CAST3]][0] {name = "a"} : !cir.ptr<!rec_twoFldT> -> !cir.ptr<!s8i>
+
+
+ // TODO: Uncomment next 2 lines, add tests when floating result type supported
+ // sum += __builtin_arm_ldaex(addrfloat);
+
+ // sum += __builtin_arm_ldaex((double *)addr);
+
+
+  return sum;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-assume.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-assume.cpp
new file mode 100644
index 0000000000000..87a7ac07020f1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-assume.cpp
@@ -0,0 +1,92 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++23 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --check-prefix=CIR --input-file=%t.cir
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++23 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck %s --check-prefix=LLVM --input-file=%t.ll
+
+int test_assume(int x) {
+  __builtin_assume(x > 0);
+  return x;
+}
+
+//      CIR: cir.func {{.*}} @_Z11test_assumei
+//      CIR:   %[[#x:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#zero:]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT:   %[[#cond:]] = cir.cmp(gt, %[[#x]], %[[#zero]]) : !s32i, !cir.bool
+// CIR-NEXT:   cir.assume %[[#cond]] : !cir.bool
+//      CIR: }
+
+//      LLVM: @_Z11test_assumei
+//      LLVM: %[[#cond:]] = icmp sgt i32 %{{.+}}, 0
+// LLVM-NEXT: call void @llvm.assume(i1 %[[#cond]])
+
+int test_assume_attr(int x) {
+  [[assume(x > 0)]];
+  return x;
+}
+
+//      CIR: cir.func {{.*}} @_Z16test_assume_attri
+//      CIR:   %[[#x:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#zero:]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT:   %[[#cond:]] = cir.cmp(gt, %[[#x]], %[[#zero]]) : !s32i, !cir.bool
+// CIR-NEXT:   cir.assume %[[#cond]] : !cir.bool
+//      CIR: }
+
+//      LLVM: @_Z16test_assume_attri
+//      LLVM: %[[#cond:]] = icmp sgt i32 %{{.+}}, 0
+// LLVM-NEXT: call void @llvm.assume(i1 %[[#cond]])
+
+int test_assume_aligned(int *ptr) {
+  int *aligned = (int *)__builtin_assume_aligned(ptr, 8);
+  return *aligned;
+}
+
+//      CIR: cir.func {{.*}} @_Z19test_assume_alignedPi
+//      CIR:   %[[#ptr:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[#aligned:]] = cir.assume.aligned %[[#ptr]] : !cir.ptr<!s32i>[alignment 8]
+// CIR-NEXT:   cir.store{{.*}} %[[#aligned]], %[[#aligned_slot:]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR-NEXT:   %[[#aligned2:]] = cir.load deref{{.*}}  %[[#aligned_slot]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %{{.+}} = cir.load{{.*}} %[[#aligned2]] : !cir.ptr<!s32i>, !s32i
+//      CIR: }
+
+//      LLVM: @_Z19test_assume_alignedPi
+//      LLVM: %[[#ptr:]] = load ptr, ptr %{{.+}}, align 8
+// LLVM-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr %[[#ptr]], i64 8) ]
+// LLVM-NEXT: store ptr %[[#ptr]], ptr %{{.+}}, align 8
+
+int test_assume_aligned_offset(int *ptr) {
+  int *aligned = (int *)__builtin_assume_aligned(ptr, 8, 4);
+  return *aligned;
+}
+
+//      CIR: cir.func {{.*}} @_Z26test_assume_aligned_offsetPi
+//      CIR:   %[[#ptr:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[#offset:]] = cir.const #cir.int<4> : !s32i
+// CIR-NEXT:   %[[#offset2:]] = cir.cast integral %[[#offset]] : !s32i -> !u64i
+// CIR-NEXT:   %[[#aligned:]] = cir.assume.aligned %[[#ptr]] : !cir.ptr<!s32i>[alignment 8, offset %[[#offset2]] : !u64i]
+// CIR-NEXT:   cir.store{{.*}} %[[#aligned]], %[[#aligned_slot:]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR-NEXT:   %[[#aligned2:]] = cir.load deref{{.*}}  %[[#aligned_slot]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %{{.+}} = cir.load{{.*}} %[[#aligned2]] : !cir.ptr<!s32i>, !s32i
+//      CIR: }
+
+//      LLVM: @_Z26test_assume_aligned_offsetPi
+//      LLVM: %[[#ptr:]] = load ptr, ptr %{{.+}}, align 8
+// LLVM-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr %[[#ptr]], i64 8, i64 4) ]
+// LLVM-NEXT: store ptr %[[#ptr]], ptr %{{.+}}, align 8
+
+int test_separate_storage(int *p1, int *p2) {
+  __builtin_assume_separate_storage(p1, p2);
+  return *p1 + *p2;
+}
+
+//      CIR: cir.func {{.*}} @_Z21test_separate_storagePiS_
+//      CIR:   %[[#p1:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[#p1_voidptr:]] = cir.cast bitcast %[[#p1]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CIR-NEXT:   %[[#p2:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[#p2_voidptr:]] = cir.cast bitcast %[[#p2]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CIR-NEXT:   cir.assume.separate_storage %[[#p1_voidptr]], %[[#p2_voidptr]] : !cir.ptr<!void>
+//      CIR: }
+
+//      LLVM: @_Z21test_separate_storagePiS_
+//      LLVM: %[[#ptr1:]] = load ptr, ptr %{{.+}}, align 8
+// LLVM-NEXT: %[[#ptr2:]] = load ptr, ptr %{{.+}}, align 8
+// LLVM-NEXT: call void @llvm.assume(i1 true) [ "separate_storage"(ptr %[[#ptr1]], ptr %[[#ptr2]]) ]
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-bcopy.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-bcopy.cpp
new file mode 100644
index 0000000000000..0ac8d626c870b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-bcopy.cpp
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+void foo(void) {
+  // CIR-LABEL: cir.func {{.*}} @_Z3foov()
+  // CIR: %[[V0:.*]] = cir.alloca !cir.array<!cir.float x 4>, !cir.ptr<!cir.array<!cir.float x 4>>, ["f4"] {alignment = 16 : i64}
+  // CIR: %[[V1:.*]] = cir.alloca !cir.array<!cir.float x 8>, !cir.ptr<!cir.array<!cir.float x 8>>, ["f8"] {alignment = 16 : i64}
+  // CIR: %[[V2:.*]] = cir.cast array_to_ptrdecay %[[V0]] : !cir.ptr<!cir.array<!cir.float x 4>> -> !cir.ptr<!cir.float>
+  // CIR: %[[V3:.*]] = cir.cast bitcast %[[V2]] : !cir.ptr<!cir.float> -> !cir.ptr<!void>
+  // CIR: %[[V4:.*]] = cir.cast array_to_ptrdecay %[[V1]] : !cir.ptr<!cir.array<!cir.float x 8>> -> !cir.ptr<!cir.float>
+  // CIR: %[[V5:.*]] = cir.cast bitcast %[[V4]] : !cir.ptr<!cir.float> -> !cir.ptr<!void>
+  // CIR: %[[V6:.*]] = cir.const #cir.int<4> : !u64i
+  // CIR: %[[V7:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[V8:.*]] = cir.cast integral %[[V7]] : !s32i -> !u64i
+  // CIR: %[[V9:.*]] = cir.binop(mul, %[[V6]], %[[V8]]) : !u64i
+  // CIR: cir.libc.memmove %[[V9]] bytes from %[[V3]] to %[[V5]] : !cir.ptr<!void>, !u64i
+  // CIR: cir.return
+
+  // LLVM-LABEL: define dso_local void @_Z3foov()
+  // LLVM: %[[V1:.*]] = alloca [4 x float], i64 1, align 16
+  // LLVM: %[[V2:.*]] = alloca [8 x float], i64 1, align 16
+  // LLVM: %[[V3:.*]] = getelementptr float, ptr %[[V1]], i32 0
+  // LLVM: %[[V4:.*]] = getelementptr float, ptr %[[V2]], i32 0
+  // LLVM: call void @llvm.memmove.p0.p0.i64(ptr %[[V4]], ptr %[[V3]], i64 16, i1 false)
+  // LLVM: ret void
+
+  float f4[4];
+  float f8[8];
+  __builtin_bcopy(f4, f8, sizeof(float) * 4);
+}
+
+void test_conditional_bcopy(void) {
+  // CIR-LABEL: cir.func {{.*}} @_Z22test_conditional_bcopyv()
+  // CIR: cir.libc.memmove {{.*}} bytes from {{.*}} to {{.*}} : !cir.ptr<!void>, !u64i
+  // CIR: cir.libc.memmove {{.*}} bytes from {{.*}} to {{.*}} : !cir.ptr<!void>, !u64i
+
+  // LLVM-LABEL: define{{.*}} void @_Z22test_conditional_bcopyv
+  // LLVM: call void @llvm.memmove
+  // LLVM: call void @llvm.memmove
+  // LLVM-NOT: phi
+
+  char dst[20];
+  char src[20];
+  int _sz = 20, len = 20;
+  return (_sz ? ((_sz >= len) ? __builtin_bcopy(src, dst, len) : foo())
+              : __builtin_bcopy(src, dst, len));
+}
+
+void another_conditional_bcopy(char *dst, char *src, int sz, int len) {
+  // CIR-LABEL: cir.func {{.*}} @_Z25another_conditional_bcopyPcS_ii
+  // CIR: cir.libc.memmove {{.*}} bytes from {{.*}} to {{.*}} : !cir.ptr<!void>, !u64i
+  // CIR: cir.libc.memmove {{.*}} bytes from {{.*}} to {{.*}} : !cir.ptr<!void>, !u64i
+
+  // LLVM-LABEL: define{{.*}} void @_Z25another_conditional_bcopyPcS_ii
+  // LLVM: call void @llvm.memmove
+  // LLVM: call void @llvm.memmove
+  // LLVM-NOT: phi
+
+  if (sz >= len)
+    __builtin_bcopy(src, dst, len);
+  else
+    __builtin_bcopy(src, dst, len * 2);
+}
+
+#define size_t __SIZE_TYPE__
+
+extern "C" void bcopy(const void *__src, void *__dest, size_t __n);
+
+// LLVM: @_Z9testbcopyPKvPvm(
+// LLVM:         call void @llvm.memmove.p0.p0.i64(ptr {{.*}}, ptr {{.*}}, i64 {{.*}}, i1 false)
+// LLVM:    ret void
+
+void testbcopy(const void *src, void *dest, size_t n) {
+  bcopy(src, dest, n);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-bit-cast.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-bit-cast.cpp
new file mode 100644
index 0000000000000..a1a3f85ffadd3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-bit-cast.cpp
@@ -0,0 +1,136 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+float test_scalar(int &oper) {
+  return __builtin_bit_cast(float, oper);
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z11test_scalarRi
+//       CIR:   %[[#SRC_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+//  CIR-NEXT:   %[[#DST_PTR:]] = cir.cast bitcast %[[#SRC_PTR]] : !cir.ptr<!s32i> -> !cir.ptr<!cir.float>
+//  CIR-NEXT:   %{{.+}} = cir.load{{.*}} %[[#DST_PTR]] : !cir.ptr<!cir.float>, !cir.float
+//       CIR: }
+
+// LLVM-LABEL: define dso_local float @_Z11test_scalarRi
+//       LLVM:   %[[#PTR:]] = load ptr, ptr %{{.+}}, align 8
+//  LLVM-NEXT:   %{{.+}} = load float, ptr %[[#PTR]], align 4
+//       LLVM: }
+
+struct two_ints {
+  int x;
+  int y;
+};
+
+unsigned long test_aggregate_to_scalar(two_ints &ti) {
+  return __builtin_bit_cast(unsigned long, ti);
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z24test_aggregate_to_scalarR8two_ints
+//       CIR:   %[[#SRC_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_two_ints>>, !cir.ptr<!rec_two_ints>
+//  CIR-NEXT:   %[[#DST_PTR:]] = cir.cast bitcast %[[#SRC_PTR]] : !cir.ptr<!rec_two_ints> -> !cir.ptr<!u64i>
+//  CIR-NEXT:   %{{.+}} = cir.load{{.*}} %[[#DST_PTR]] : !cir.ptr<!u64i>, !u64i
+//       CIR: }
+
+// LLVM-LABEL: define dso_local i64 @_Z24test_aggregate_to_scalarR8two_ints
+//       LLVM:   %[[#PTR:]] = load ptr, ptr %{{.+}}, align 8
+//  LLVM-NEXT:   %{{.+}} = load i64, ptr %[[#PTR]], align 4
+//       LLVM: }
+
+struct two_floats {
+  float x;
+  float y;
+};
+
+two_floats test_aggregate_record(two_ints& ti) {
+   return __builtin_bit_cast(two_floats, ti);
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z21test_aggregate_recordR8two_ints
+//       CIR:   %[[#SRC_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_two_ints>>, !cir.ptr<!rec_two_ints>
+//  CIR-NEXT:   %[[#SRC_VOID_PTR:]] = cir.cast bitcast %[[#SRC_PTR]] : !cir.ptr<!rec_two_ints> -> !cir.ptr<!void>
+//  CIR-NEXT:   %[[#DST_VOID_PTR:]] = cir.cast bitcast %{{.+}} : !cir.ptr<!rec_two_floats> -> !cir.ptr<!void>
+//  CIR-NEXT:   %[[#SIZE:]] = cir.const #cir.int<8> : !u64i
+//  CIR-NEXT:   cir.libc.memcpy %[[#SIZE]] bytes from %[[#SRC_VOID_PTR]] to %[[#DST_VOID_PTR]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+//       CIR: }
+
+// LLVM-LABEL: define dso_local %struct.two_floats @_Z21test_aggregate_recordR8two_ints
+//       LLVM:   %[[#DST_SLOT:]] = alloca %struct.two_floats, i64 1, align 4
+//       LLVM:   %[[#SRC_PTR:]] = load ptr, ptr %2, align 8
+//  LLVM-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr %[[#DST_SLOT]], ptr %[[#SRC_PTR]], i64 8, i1 false)
+//  LLVM-NEXT:   %{{.+}} = load %struct.two_floats, ptr %[[#DST_SLOT]], align 4
+//       LLVM: }
+
+two_floats test_aggregate_array(int (&ary)[2]) {
+  return __builtin_bit_cast(two_floats, ary);
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z20test_aggregate_arrayRA2_i
+//       CIR:   %[[#SRC_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!cir.array<!s32i x 2>>>, !cir.ptr<!cir.array<!s32i x 2>>
+//  CIR-NEXT:   %[[#SRC_VOID_PTR:]] = cir.cast bitcast %[[#SRC_PTR]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!void>
+//  CIR-NEXT:   %[[#DST_VOID_PTR:]] = cir.cast bitcast %{{.+}} : !cir.ptr<!rec_two_floats> -> !cir.ptr<!void>
+//  CIR-NEXT:   %[[#SIZE:]] = cir.const #cir.int<8> : !u64i
+//  CIR-NEXT:   cir.libc.memcpy %[[#SIZE]] bytes from %[[#SRC_VOID_PTR]] to %[[#DST_VOID_PTR]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+//       CIR: }
+
+// LLVM-LABEL: define dso_local %struct.two_floats @_Z20test_aggregate_arrayRA2_i
+//       LLVM:   %[[#DST_SLOT:]] = alloca %struct.two_floats, i64 1, align 4
+//       LLVM:   %[[#SRC_PTR:]] = load ptr, ptr %2, align 8
+//  LLVM-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr %[[#DST_SLOT]], ptr %[[#SRC_PTR]], i64 8, i1 false)
+//  LLVM-NEXT:   %{{.+}} = load %struct.two_floats, ptr %[[#DST_SLOT]], align 4
+//       LLVM: }
+
+two_ints test_scalar_to_aggregate(unsigned long ul) {
+  return __builtin_bit_cast(two_ints, ul);
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z24test_scalar_to_aggregatem
+//       CIR:   %[[#SRC_VOID_PTR:]] = cir.cast bitcast %{{.+}} : !cir.ptr<!u64i> -> !cir.ptr<!void>
+//  CIR-NEXT:   %[[#DST_VOID_PTR:]] = cir.cast bitcast %{{.+}} : !cir.ptr<!rec_two_ints> -> !cir.ptr<!void>
+//  CIR-NEXT:   %[[#SIZE:]] = cir.const #cir.int<8> : !u64i
+//  CIR-NEXT:   cir.libc.memcpy %[[#SIZE]] bytes from %[[#SRC_VOID_PTR]] to %[[#DST_VOID_PTR]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+//       CIR: }
+
+// LLVM-LABEL: define dso_local %struct.two_ints @_Z24test_scalar_to_aggregatem
+//       LLVM:   %[[#DST_SLOT:]] = alloca %struct.two_ints, i64 1, align 4
+//       LLVM:   call void @llvm.memcpy.p0.p0.i64(ptr %[[#DST_SLOT]], ptr %{{.+}}, i64 8, i1 false)
+//  LLVM-NEXT:   %{{.+}} = load %struct.two_ints, ptr %[[#DST_SLOT]], align 4
+//       LLVM: }
+
+unsigned long test_array(int (&ary)[2]) {
+  return __builtin_bit_cast(unsigned long, ary);
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z10test_arrayRA2_i
+//      CIR:   %[[#SRC_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!cir.array<!s32i x 2>>>, !cir.ptr<!cir.array<!s32i x 2>>
+// CIR-NEXT:   %[[#DST_PTR:]] = cir.cast bitcast %[[#SRC_PTR]] : !cir.ptr<!cir.array<!s32i x 2>> -> !cir.ptr<!u64i>
+// CIR-NEXT:   %{{.+}} = cir.load{{.*}} %[[#DST_PTR]] : !cir.ptr<!u64i>, !u64i
+//      CIR: }
+
+// LLVM-LABEL: define dso_local i64 @_Z10test_arrayRA2_i
+//       LLVM:   %[[#SRC_PTR:]] = load ptr, ptr %{{.+}}, align 8
+//  LLVM-NEXT:   %{{.+}} = load i64, ptr %[[#SRC_PTR]], align 4
+//       LLVM: }
+
+two_ints test_rvalue_aggregate() {
+  return __builtin_bit_cast(two_ints, 42ul);
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z21test_rvalue_aggregatev()
+//       CIR:   cir.scope {
+//  CIR-NEXT:     %[[#TMP_SLOT:]] = cir.alloca !u64i, !cir.ptr<!u64i>
+//  CIR-NEXT:     %[[#A:]] = cir.const #cir.int<42> : !u64i
+//  CIR-NEXT:     cir.store{{.*}} %[[#A]], %[[#TMP_SLOT]] : !u64i, !cir.ptr<!u64i>
+//  CIR-NEXT:     %[[#SRC_VOID_PTR:]] = cir.cast bitcast %[[#TMP_SLOT]] : !cir.ptr<!u64i> -> !cir.ptr<!void>
+//  CIR-NEXT:     %[[#DST_VOID_PTR:]] = cir.cast bitcast %0 : !cir.ptr<!rec_two_ints> -> !cir.ptr<!void>
+//  CIR-NEXT:     %[[#SIZE:]] = cir.const #cir.int<8> : !u64i
+//  CIR-NEXT:     cir.libc.memcpy %[[#SIZE]] bytes from %[[#SRC_VOID_PTR]] to %[[#DST_VOID_PTR]] : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+//  CIR-NEXT:   }
+//       CIR: }
+
+// LLVM-LABEL: define dso_local %struct.two_ints @_Z21test_rvalue_aggregatev
+//  LLVM:   %[[#SRC_SLOT:]] = alloca i64, i64 1, align 8
+//  LLVM:   store i64 42, ptr %[[#SRC_SLOT]], align 8
+//  LLVM-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr %{{.+}}, ptr %[[#SRC_SLOT]], i64 8, i1 false)
+//       LLVM: }
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-bitreverse.c b/clang/test/CIR/Incubator/CodeGen/builtin-bitreverse.c
new file mode 100644
index 0000000000000..9d569f8f64b71
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-bitreverse.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+unsigned char bitreverse8(unsigned char value) {
+  return __builtin_bitreverse8(value);
+}
+
+// CIR-LABEL: @bitreverse8
+// CIR: %{{.+}} = cir.bit_reverse %{{.+}} : !u8i
+
+// LLVM-LABEL: @bitreverse8
+// LLVM: %{{.+}} = call i8 @llvm.bitreverse.i8(i8 %{{.+}})
+
+unsigned short bitreverse16(unsigned short value) {
+  return __builtin_bitreverse16(value);
+}
+
+// CIR-LABEL: @bitreverse16
+// CIR: %{{.+}} = cir.bit_reverse %{{.+}} : !u16i
+
+// LLVM-LABEL: @bitreverse16
+// LLVM: %{{.+}} = call i16 @llvm.bitreverse.i16(i16 %{{.+}})
+
+unsigned bitreverse32(unsigned value) {
+  return __builtin_bitreverse32(value);
+}
+
+// CIR-LABEL: @bitreverse32
+// CIR: %{{.+}} = cir.bit_reverse %{{.+}} : !u32i
+
+// LLVM-LABEL: @bitreverse32
+// LLVM: %{{.+}} = call i32 @llvm.bitreverse.i32(i32 %{{.+}})
+
+unsigned long long bitreverse64(unsigned long long value) {
+  return __builtin_bitreverse64(value);
+}
+
+// CIR-LABEL: @bitreverse64
+// CIR: %{{.+}} = cir.bit_reverse %{{.+}} : !u64i
+
+// LLVM-LABEL: @bitreverse64
+// LLVM: %{{.+}} = call i64 @llvm.bitreverse.i64(i64 %{{.+}})
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-bits.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-bits.cpp
new file mode 100644
index 0000000000000..7baa94e3edd1e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-bits.cpp
@@ -0,0 +1,184 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --check-prefix=CIR --input-file=%t.cir
+
+int test_builtin_clrsb(int x) {
+  return __builtin_clrsb(x);
+}
+
+// CIR-LABEL: _Z18test_builtin_clrsbi
+// CIR: [[TMP:%.+]] = cir.clrsb %{{.+}} : !s32i
+
+int test_builtin_clrsbl(long x) {
+  return __builtin_clrsbl(x);
+}
+
+// CIR-LABEL: _Z19test_builtin_clrsbll
+// CIR: [[TMP:%.+]] = cir.clrsb %{{.+}} : !s64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !s64i -> !s32i
+
+int test_builtin_clrsbll(long long x) {
+  return __builtin_clrsbll(x);
+}
+
+// CIR-LABEL: _Z20test_builtin_clrsbllx
+// CIR: [[TMP:%.+]] = cir.clrsb %{{.+}} : !s64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !s64i -> !s32i
+
+int test_builtin_ctzs(unsigned short x) {
+  return __builtin_ctzs(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_ctzst
+// CIR: [[TMP:%.+]] = cir.ctz %{{.+}} zero_poison : !u16i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u16i -> !s32i
+
+int test_builtin_ctz(unsigned x) {
+  return __builtin_ctz(x);
+}
+
+// CIR-LABEL: _Z16test_builtin_ctzj
+// CIR: [[TMP:%.+]] = cir.ctz %{{.+}} zero_poison : !u32i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
+
+int test_builtin_ctzl(unsigned long x) {
+  return __builtin_ctzl(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_ctzlm
+// CIR: [[TMP:%.+]] = cir.ctz %{{.+}} zero_poison : !u64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
+
+int test_builtin_ctzll(unsigned long long x) {
+  return __builtin_ctzll(x);
+}
+
+// CIR-LABEL: _Z18test_builtin_ctzlly
+// CIR: [[TMP:%.+]] = cir.ctz %{{.+}} zero_poison : !u64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
+
+int test_builtin_ctzg(unsigned x) {
+  return __builtin_ctzg(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_ctzgj
+// CIR: [[TMP:%.+]] = cir.ctz %{{.+}} zero_poison : !u32i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
+
+int test_builtin_clzs(unsigned short x) {
+  return __builtin_clzs(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_clzst
+// CIR: [[TMP:%.+]] = cir.clz %{{.+}} zero_poison : !u16i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u16i -> !s32i
+
+int test_builtin_clz(unsigned x) {
+  return __builtin_clz(x);
+}
+
+// CIR-LABEL: _Z16test_builtin_clzj
+// CIR: [[TMP:%.+]] = cir.clz %{{.+}} zero_poison : !u32i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
+
+int test_builtin_clzl(unsigned long x) {
+  return __builtin_clzl(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_clzlm
+// CIR: [[TMP:%.+]] = cir.clz %{{.+}} zero_poison : !u64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
+
+int test_builtin_clzll(unsigned long long x) {
+  return __builtin_clzll(x);
+}
+
+// CIR-LABEL: _Z18test_builtin_clzlly
+// CIR: [[TMP:%.+]] = cir.clz %{{.+}} zero_poison : !u64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
+
+int test_builtin_clzg(unsigned x) {
+  return __builtin_clzg(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_clzgj
+// CIR: [[TMP:%.+]] = cir.clz %{{.+}} zero_poison : !u32i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
+
+int test_builtin_ffs(int x) {
+  return __builtin_ffs(x);
+}
+
+// CIR-LABEL: _Z16test_builtin_ffsi
+// CIR: [[TMP:%.+]] = cir.ffs %{{.+}} : !s32i
+
+int test_builtin_ffsl(long x) {
+  return __builtin_ffsl(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_ffsll
+// CIR: [[TMP:%.+]] = cir.ffs %{{.+}} : !s64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !s64i -> !s32i
+
+int test_builtin_ffsll(long long x) {
+  return __builtin_ffsll(x);
+}
+
+// CIR-LABEL: _Z18test_builtin_ffsllx
+// CIR: [[TMP:%.+]] = cir.ffs %{{.+}} : !s64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !s64i -> !s32i
+
+int test_builtin_parity(unsigned x) {
+  return __builtin_parity(x);
+}
+
+// CIR-LABEL: _Z19test_builtin_parityj
+// CIR: [[TMP:%.+]] = cir.parity %{{.+}} : !u32i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
+
+int test_builtin_parityl(unsigned long x) {
+  return __builtin_parityl(x);
+}
+
+// CIR-LABEL: _Z20test_builtin_paritylm
+// CIR: [[TMP:%.+]] = cir.parity %{{.+}} : !u64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
+
+int test_builtin_parityll(unsigned long long x) {
+  return __builtin_parityll(x);
+}
+
+// CIR-LABEL: _Z21test_builtin_paritylly
+// CIR: [[TMP:%.+]] = cir.parity %{{.+}} : !u64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
+
+int test_builtin_popcount(unsigned x) {
+  return __builtin_popcount(x);
+}
+
+// CIR-LABEL: _Z21test_builtin_popcountj
+// CIR: [[TMP:%.+]] = cir.popcount %{{.+}} : !u32i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
+
+int test_builtin_popcountl(unsigned long x) {
+  return __builtin_popcountl(x);
+}
+
+// CIR-LABEL: _Z22test_builtin_popcountlm
+// CIR: [[TMP:%.+]] = cir.popcount %{{.+}} : !u64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
+
+int test_builtin_popcountll(unsigned long long x) {
+  return __builtin_popcountll(x);
+}
+
+// CIR-LABEL: _Z23test_builtin_popcountlly
+// CIR: [[TMP:%.+]] = cir.popcount %{{.+}} : !u64i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u64i -> !s32i
+
+int test_builtin_popcountg(unsigned x) {
+  return __builtin_popcountg(x);
+}
+
+// CIR-LABEL: _Z22test_builtin_popcountgj
+// CIR: [[TMP:%.+]] = cir.popcount %{{.+}} : !u32i
+// CIR: {{%.+}} = cir.cast integral [[TMP]] : !u32i -> !s32i
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-constant-evaluated.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-constant-evaluated.cpp
new file mode 100644
index 0000000000000..c86ad878282cc
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-constant-evaluated.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --check-prefix=CIR --input-file=%t.cir
+
+auto func() -> int {
+  return __builtin_strcmp("", "");
+  // CIR:      cir.func {{.*}} @_Z4funcv()
+  // CIR-NEXT: %[[RET_VAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+  // CIR-NEXT: %[[VAL:.*]] = cir.const #cir.int<0> : !s32i
+  // CIR-NEXT: cir.store{{.*}} %[[VAL]], %[[RET_VAL]] : !s32i, !cir.ptr<!s32i>
+  // CIR-NEXT: %[[TMP:.*]] = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+  // CIR-NEXT: cir.return %[[TMP]] : !s32i
+}
+
+auto func2() -> int {
+  return __builtin_choose_expr(true, 1, 2);
+
+  // CIR:      cir.func {{.*}} @_Z5func2v()
+  // CIR-NEXT:   %[[RET_VAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+  // CIR-NEXT:   %[[VAL:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR-NEXT:   cir.store{{.*}} %[[VAL]], %[[RET_VAL]] : !s32i, !cir.ptr<!s32i>
+  // CIR-NEXT:   %[[TMP:.*]] = cir.load{{.*}} %[[RET_VAL]] : !cir.ptr<!s32i>, !s32i
+  // CIR-NEXT:   cir.return %[[TMP]] : !s32i
+}
+
+auto func3() -> int {
+  return __builtin_choose_expr(false, 1, 2);
+
+  // CIR:      cir.func {{.*}} @_Z5func3v()
+  // CIR-NEXT:   %[[RET_VAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+  // CIR-NEXT:   %[[VAL:.*]] = cir.const #cir.int<2> : !s32i
+  // CIR-NEXT:   cir.store{{.*}} %[[VAL]], %[[RET_VAL]] : !s32i, !cir.ptr<!s32i>
+  // CIR-NEXT:   %[[TMP:.*]] = cir.load{{.*}} %[[RET_VAL]] : !cir.ptr<!s32i>, !s32i
+  // CIR-NEXT:   cir.return %[[TMP]] : !s32i
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-constant-fold.c b/clang/test/CIR/Incubator/CodeGen/builtin-constant-fold.c
new file mode 100644
index 0000000000000..878a406c9cd9d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-constant-fold.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck -check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+
+void test1() {
+  float f;
+  double d;
+  f = __builtin_huge_valf();    
+  d = __builtin_huge_val();
+}
+
+// CIR-LABEL: test1
+// CIR: [[F:%.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["f"] {alignment = 4 : i64} 
+// CIR: [[D:%.*]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["d"] {alignment = 8 : i64}
+// CIR: [[F_VAL:%.*]] = cir.const #cir.fp<0x7F800000> : !cir.float 
+// CIR: cir.store{{.*}} [[F_VAL]], [[F]] : !cir.float, !cir.ptr<!cir.float> 
+// CIR: [[D_VAL:%.*]] = cir.const #cir.fp<0x7FF0000000000000> : !cir.double 
+// CIR: cir.store{{.*}} [[D_VAL]], [[D]] : !cir.double, !cir.ptr<!cir.double> loc(#loc17)
+// CIR: cir.return
+
+// LLVM-LABEL: test1
+// [[F:%.*]] = alloca float, align 4
+// [[D:%.*]] = alloca double, align 8
+// store float 0x7FF0000000000000, ptr [[F]], align 4
+// store double 0x7FF0000000000000, ptr[[D]], align 8
+// ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-constant-p.c b/clang/test/CIR/Incubator/CodeGen/builtin-constant-p.c
new file mode 100644
index 0000000000000..07a534ea12b00
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-constant-p.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+int a = 0;
+int foo() {
+  return __builtin_constant_p(a);
+}
+
+// CIR:  cir.func {{.*}} @foo() -> !s32i extra(#fn_attr)
+// CIR:    [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR:    [[TMP1:%.*]] = cir.get_global @a : !cir.ptr<!s32i>
+// CIR:    [[TMP2:%.*]] = cir.load{{.*}} [[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CIR:    [[TMP3:%.*]] = cir.is_constant([[TMP2]] : !s32i) : !cir.bool
+// CIR:    [[TMP4:%.*]] = cir.cast bool_to_int [[TMP3]] : !cir.bool -> !s32i
+// CIR:    cir.store{{.*}} [[TMP4]], [[TMP0]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[TMP5:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!s32i>, !s32i
+// CIR:    cir.return [[TMP5]] : !s32i
+
+// LLVM:define dso_local i32 @foo()
+// LLVM:  [[TMP1:%.*]] = alloca i32, i64 1
+// LLVM:  [[TMP2:%.*]] = load i32, ptr @a
+// LLVM:  [[TMP3:%.*]] = call i1 @llvm.is.constant.i32(i32 [[TMP2]])
+// LLVM:  [[TMP5:%.*]] = zext i1 [[TMP3]] to i32
+// LLVM:  store i32 [[TMP5]], ptr [[TMP1]]
+// LLVM:  [[TMP6:%.*]] = load i32, ptr [[TMP1]]
+// LLVM:  ret i32 [[TMP6]]
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-fcmp-sse.c b/clang/test/CIR/Incubator/CodeGen/builtin-fcmp-sse.c
new file mode 100644
index 0000000000000..903cf2492f321
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-fcmp-sse.c
@@ -0,0 +1,102 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o -  | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OGCG
+
+typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
+
+__m128 test_cmpnleps(__m128 A, __m128 B) {
+  // CIR-LABEL: @test_cmpnleps
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+  // CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<!s32i x 4> -> !cir.vector<!cir.float x 4>
+  // CIR-NEXT: cir.store{{.*}} [[CAST]], [[ALLOCA:%.*]] :  !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR-NEXT: [[LD:%.*]] = cir.load{{.*}} [[ALLOCA]] :
+  // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: test_cmpnleps
+  // LLVM: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
+  // LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // LLVM-NEXT: ret <4 x float> [[CAST]]
+
+  // OGCG-LABEL: test_cmpnleps
+  // OGCG: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}}
+  // OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // OGCG-NEXT: ret <4 x float> [[CAST]]
+  return __builtin_ia32_cmpnleps(A, B);
+}
+
+
+__m128d test_cmpnlepd(__m128d A, __m128d B) {
+  // CIR-LABEL: @test_cmpnlepd
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) :  !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
+  // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] :  !cir.vector<!s64i x 2> -> !cir.vector<!cir.double x 2>
+  // CIR-NEXT: cir.store{{.*}} [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
+  // CIR-NEXT: [[LD:%.*]] = cir.load{{.*}} [[ALLOCA]] :
+  // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: test_cmpnlepd
+  // LLVM: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
+  // LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // LLVM-NEXT: ret <2 x double> [[CAST]]
+
+  // OGCG-LABEL: test_cmpnlepd
+  // OGCG: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}}
+  // OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // OGCG-NEXT: ret <2 x double> [[CAST]]
+ return  __builtin_ia32_cmpnlepd(A, B);
+}
+
+
+__m128 test_cmpnltps(__m128 A, __m128 B) {
+  // CIR-LABEL: @test_cmpnltps
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+  // CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<!s32i x 4> -> !cir.vector<!cir.float x 4>
+  // CIR-NEXT: cir.store{{.*}} [[CAST]], [[ALLOCA:%.*]] :  !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>
+  // CIR-NEXT: [[LD:%.*]] = cir.load{{.*}} [[ALLOCA]] :
+  // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4>
+
+  // LLVM-LABEL: test_cmpnltps
+  // LLVM: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
+  // LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // LLVM-NEXT: ret <4 x float> [[CAST]]
+
+  // OGCG-LABEL: test_cmpnltps
+  // OGCG: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}}
+  // OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // OGCG-NEXT: ret <4 x float> [[CAST]]
+  return __builtin_ia32_cmpnltps(A, B);
+}
+
+
+__m128d test_cmpnltpd(__m128d A, __m128d B) {
+  // CIR-LABEL: @test_cmpnltpd
+  // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) :  !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2>
+  // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] :  !cir.vector<!s64i x 2> -> !cir.vector<!cir.double x 2>
+  // CIR-NEXT: cir.store{{.*}} [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
+  // CIR-NEXT: [[LD:%.*]] = cir.load{{.*}} [[ALLOCA]] :
+  // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2>
+
+  // LLVM-LABEL: test_cmpnltpd
+  // LLVM: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
+  // LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // LLVM-NEXT: ret <2 x double> [[CAST]]
+
+  // OGCG-LABEL: test_cmpnltpd
+  // OGCG: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}}
+  // OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // OGCG-NEXT: ret <2 x double> [[CAST]]
+ return  __builtin_ia32_cmpnltpd(A, B);
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-floating-point.c b/clang/test/CIR/Incubator/CodeGen/builtin-floating-point.c
new file mode 100644
index 0000000000000..f49f2c6ac2635
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-floating-point.c
@@ -0,0 +1,1916 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t1.cir 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-apple-darwin-macho -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t1.cir 2>&1 | FileCheck %s --check-prefix=AARCH64
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+// lround
+
+long my_lroundf(float f) {
+  return __builtin_lroundf(f);
+  // CHECK: cir.func {{.*}} @my_lroundf
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_lroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long my_lround(double f) {
+  return __builtin_lround(f);
+  // CHECK: cir.func {{.*}} @my_lround
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_lround
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long my_lroundl(long double f) {
+  return __builtin_lroundl(f);
+  // CHECK: cir.func {{.*}} @my_lroundl
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_lroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long lroundf(float);
+long lround(double);
+long lroundl(long double);
+
+long call_lroundf(float f) {
+  return lroundf(f);
+  // CHECK: cir.func {{.*}} @call_lroundf
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_lroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long call_lround(double f) {
+  return lround(f);
+  // CHECK: cir.func {{.*}} @call_lround
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_lround
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long call_lroundl(long double f) {
+  return lroundl(f);
+  // CHECK: cir.func {{.*}} @call_lroundl
+  // CHECK: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_lroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.lround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// llround
+
+long long my_llroundf(float f) {
+  return __builtin_llroundf(f);
+  // CHECK: cir.func {{.*}} @my_llroundf
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_llroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long my_llround(double f) {
+  return __builtin_llround(f);
+  // CHECK: cir.func {{.*}} @my_llround
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_llround
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long my_llroundl(long double f) {
+  return __builtin_llroundl(f);
+  // CHECK: cir.func {{.*}} @my_llroundl
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_llroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long long llroundf(float);
+long long llround(double);
+long long llroundl(long double);
+
+long long call_llroundf(float f) {
+  return llroundf(f);
+  // CHECK: cir.func {{.*}} @call_llroundf
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_llroundf
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long call_llround(double f) {
+  return llround(f);
+  // CHECK: cir.func {{.*}} @call_llround
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_llround
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long call_llroundl(long double f) {
+  return llroundl(f);
+  // CHECK: cir.func {{.*}} @call_llroundl
+  // CHECK: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llround %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_llroundl
+  // LLVM:   %{{.+}} = call i64 @llvm.llround.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// lrint
+
+long my_lrintf(float f) {
+  return __builtin_lrintf(f);
+  // CHECK: cir.func {{.*}} @my_lrintf
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_lrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long my_lrint(double f) {
+  return __builtin_lrint(f);
+  // CHECK: cir.func {{.*}} @my_lrint
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_lrint
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long my_lrintl(long double f) {
+  return __builtin_lrintl(f);
+  // CHECK: cir.func {{.*}} @my_lrintl
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_lrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long lrintf(float);
+long lrint(double);
+long lrintl(long double);
+
+long call_lrintf(float f) {
+  return lrintf(f);
+  // CHECK: cir.func {{.*}} @call_lrintf
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_lrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long call_lrint(double f) {
+  return lrint(f);
+  // CHECK: cir.func {{.*}} @call_lrint
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_lrint
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long call_lrintl(long double f) {
+  return lrintl(f);
+  // CHECK: cir.func {{.*}} @call_lrintl
+  // CHECK: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.lrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_lrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.lrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// llrint
+
+long long my_llrintf(float f) {
+  return __builtin_llrintf(f);
+  // CHECK: cir.func {{.*}} @my_llrintf
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @my_llrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long my_llrint(double f) {
+  return __builtin_llrint(f);
+  // CHECK: cir.func {{.*}} @my_llrint
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @my_llrint
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long my_llrintl(long double f) {
+  return __builtin_llrintl(f);
+  // CHECK: cir.func {{.*}} @my_llrintl
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @my_llrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+long long llrintf(float);
+long long llrint(double);
+long long llrintl(long double);
+
+long long call_llrintf(float f) {
+  return llrintf(f);
+  // CHECK: cir.func {{.*}} @call_llrintf
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.float -> !s64i
+
+  // LLVM: define dso_local i64 @call_llrintf
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f32(float %{{.+}})
+  // LLVM: }
+}
+
+long long call_llrint(double f) {
+  return llrint(f);
+  // CHECK: cir.func {{.*}} @call_llrint
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.double -> !s64i
+
+  // LLVM: define dso_local i64 @call_llrint
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long long call_llrintl(long double f) {
+  return llrintl(f);
+  // CHECK: cir.func {{.*}} @call_llrintl
+  // CHECK: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.f80> -> !s64i
+  // AARCH64: %{{.+}} = cir.llrint %{{.+}} : !cir.long_double<!cir.double> -> !s64i
+
+  // LLVM: define dso_local i64 @call_llrintl
+  // LLVM:   %{{.+}} = call i64 @llvm.llrint.i64.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// ceil
+
+float my_ceilf(float f) {
+  return __builtin_ceilf(f);
+  // CHECK: cir.func {{.*}} @my_ceilf
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_ceilf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.ceil.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_ceil(double f) {
+  return __builtin_ceil(f);
+  // CHECK: cir.func {{.*}} @my_ceil
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_ceil(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.ceil.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_ceill(long double f) {
+  return __builtin_ceill(f);
+  // CHECK: cir.func {{.*}} @my_ceill
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_ceill(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.ceil.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float ceilf(float);
+double ceil(double);
+long double ceill(long double);
+
+float call_ceilf(float f) {
+  return ceilf(f);
+  // CHECK: cir.func {{.*}} @call_ceilf
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_ceilf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.ceil.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_ceil(double f) {
+  return ceil(f);
+  // CHECK: cir.func {{.*}} @call_ceil
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_ceil(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.ceil.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_ceill(long double f) {
+  return ceill(f);
+  // CHECK: cir.func {{.*}} @call_ceill
+  // CHECK: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.ceil {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_ceill(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.ceil.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// cos
+
+float my_cosf(float f) {
+  return __builtin_cosf(f);
+  // CHECK: cir.func {{.*}} @my_cosf
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_cosf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.cos.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_cos(double f) {
+  return __builtin_cos(f);
+  // CHECK: cir.func {{.*}} @my_cos
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_cos(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.cos.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_cosl(long double f) {
+  return __builtin_cosl(f);
+  // CHECK: cir.func {{.*}} @my_cosl
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_cosl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.cos.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float cosf(float);
+double cos(double);
+long double cosl(long double);
+
+float call_cosf(float f) {
+  return cosf(f);
+  // CHECK: cir.func {{.*}} @call_cosf
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_cosf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.cos.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_cos(double f) {
+  return cos(f);
+  // CHECK: cir.func {{.*}} @call_cos
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_cos(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.cos.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_cosl(long double f) {
+  return cosl(f);
+  // CHECK: cir.func {{.*}} @call_cosl
+  // CHECK: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.cos {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_cosl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.cos.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// exp
+
+float my_expf(float f) {
+  return __builtin_expf(f);
+  // CHECK: cir.func {{.*}} @my_expf
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_expf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_exp(double f) {
+  return __builtin_exp(f);
+  // CHECK: cir.func {{.*}} @my_exp
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_exp(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_expl(long double f) {
+  return __builtin_expl(f);
+  // CHECK: cir.func {{.*}} @my_expl
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_expl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float expf(float);
+double exp(double);
+long double expl(long double);
+
+float call_expf(float f) {
+  return expf(f);
+  // CHECK: cir.func {{.*}} @call_expf
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_expf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_exp(double f) {
+  return exp(f);
+  // CHECK: cir.func {{.*}} @call_exp
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_exp(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_expl(long double f) {
+  return expl(f);
+  // CHECK: cir.func {{.*}} @call_expl
+  // CHECK: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_expl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// exp2
+
+float my_exp2f(float f) {
+  return __builtin_exp2f(f);
+  // CHECK: cir.func {{.*}} @my_exp2f
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_exp2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_exp2(double f) {
+  return __builtin_exp2(f);
+  // CHECK: cir.func {{.*}} @my_exp2
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_exp2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_exp2l(long double f) {
+  return __builtin_exp2l(f);
+  // CHECK: cir.func {{.*}} @my_exp2l
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_exp2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float exp2f(float);
+double exp2(double);
+long double exp2l(long double);
+
+float call_exp2f(float f) {
+  return exp2f(f);
+  // CHECK: cir.func {{.*}} @call_exp2f
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_exp2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.exp2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_exp2(double f) {
+  return exp2(f);
+  // CHECK: cir.func {{.*}} @call_exp2
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_exp2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.exp2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_exp2l(long double f) {
+  return exp2l(f);
+  // CHECK: cir.func {{.*}} @call_exp2l
+  // CHECK: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.exp2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_exp2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.exp2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// floor
+
+float my_floorf(float f) {
+  return __builtin_floorf(f);
+  // CHECK: cir.func {{.*}} @my_floorf
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_floorf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.floor.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_floor(double f) {
+  return __builtin_floor(f);
+  // CHECK: cir.func {{.*}} @my_floor
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_floor(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.floor.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_floorl(long double f) {
+  return __builtin_floorl(f);
+  // CHECK: cir.func {{.*}} @my_floorl
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_floorl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.floor.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float floorf(float);
+double floor(double);
+long double floorl(long double);
+
+float call_floorf(float f) {
+  return floorf(f);
+  // CHECK: cir.func {{.*}} @call_floorf
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_floorf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.floor.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_floor(double f) {
+  return floor(f);
+  // CHECK: cir.func {{.*}} @call_floor
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_floor(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.floor.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_floorl(long double f) {
+  return floorl(f);
+  // CHECK: cir.func {{.*}} @call_floorl
+  // CHECK: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.floor {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_floorl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.floor.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// log
+
+float my_logf(float f) {
+  return __builtin_logf(f);
+  // CHECK: cir.func {{.*}} @my_logf
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_logf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_log(double f) {
+  return __builtin_log(f);
+  // CHECK: cir.func {{.*}} @my_log
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_log(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_logl(long double f) {
+  return __builtin_logl(f);
+  // CHECK: cir.func {{.*}} @my_logl
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_logl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float logf(float);
+double log(double);
+long double logl(long double);
+
+float call_logf(float f) {
+  return logf(f);
+  // CHECK: cir.func {{.*}} @call_logf
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_logf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_log(double f) {
+  return log(f);
+  // CHECK: cir.func {{.*}} @call_log
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_log(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_logl(long double f) {
+  return logl(f);
+  // CHECK: cir.func {{.*}} @call_logl
+  // CHECK: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_logl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// log10
+
+float my_log10f(float f) {
+  return __builtin_log10f(f);
+  // CHECK: cir.func {{.*}} @my_log10f
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_log10f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log10.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_log10(double f) {
+  return __builtin_log10(f);
+  // CHECK: cir.func {{.*}} @my_log10
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_log10(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log10.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_log10l(long double f) {
+  return __builtin_log10l(f);
+  // CHECK: cir.func {{.*}} @my_log10l
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_log10l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log10.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float log10f(float);
+double log10(double);
+long double log10l(long double);
+
+float call_log10f(float f) {
+  return log10f(f);
+  // CHECK: cir.func {{.*}} @call_log10f
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_log10f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log10.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_log10(double f) {
+  return log10(f);
+  // CHECK: cir.func {{.*}} @call_log10
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_log10(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log10.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_log10l(long double f) {
+  return log10l(f);
+  // CHECK: cir.func {{.*}} @call_log10l
+  // CHECK: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log10 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_log10l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log10.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// log2
+
+float my_log2f(float f) {
+  return __builtin_log2f(f);
+  // CHECK: cir.func {{.*}} @my_log2f
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_log2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_log2(double f) {
+  return __builtin_log2(f);
+  // CHECK: cir.func {{.*}} @my_log2
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_log2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_log2l(long double f) {
+  return __builtin_log2l(f);
+  // CHECK: cir.func {{.*}} @my_log2l
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_log2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float log2f(float);
+double log2(double);
+long double log2l(long double);
+
+float call_log2f(float f) {
+  return log2f(f);
+  // CHECK: cir.func {{.*}} @call_log2f
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_log2f(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.log2.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_log2(double f) {
+  return log2(f);
+  // CHECK: cir.func {{.*}} @call_log2
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_log2(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.log2.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_log2l(long double f) {
+  return log2l(f);
+  // CHECK: cir.func {{.*}} @call_log2l
+  // CHECK: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.log2 {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_log2l(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.log2.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// nearbyint
+
+float my_nearbyintf(float f) {
+  return __builtin_nearbyintf(f);
+  // CHECK: cir.func {{.*}} @my_nearbyintf
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_nearbyintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.nearbyint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_nearbyint(double f) {
+  return __builtin_nearbyint(f);
+  // CHECK: cir.func {{.*}} @my_nearbyint
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_nearbyint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.nearbyint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_nearbyintl(long double f) {
+  return __builtin_nearbyintl(f);
+  // CHECK: cir.func {{.*}} @my_nearbyintl
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_nearbyintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.nearbyint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float nearbyintf(float);
+double nearbyint(double);
+long double nearbyintl(long double);
+
+float call_nearbyintf(float f) {
+  return nearbyintf(f);
+  // CHECK: cir.func {{.*}} @call_nearbyintf
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_nearbyintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.nearbyint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_nearbyint(double f) {
+  return nearbyint(f);
+  // CHECK: cir.func {{.*}} @call_nearbyint
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_nearbyint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.nearbyint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_nearbyintl(long double f) {
+  return nearbyintl(f);
+  // CHECK: cir.func {{.*}} @call_nearbyintl
+  // CHECK: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.nearbyint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_nearbyintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.nearbyint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// rint
+
+float my_rintf(float f) {
+  return __builtin_rintf(f);
+  // CHECK: cir.func {{.*}} @my_rintf
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_rintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.rint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_rint(double f) {
+  return __builtin_rint(f);
+  // CHECK: cir.func {{.*}} @my_rint
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_rint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.rint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_rintl(long double f) {
+  return __builtin_rintl(f);
+  // CHECK: cir.func {{.*}} @my_rintl
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_rintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.rint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float rintf(float);
+double rint(double);
+long double rintl(long double);
+
+float call_rintf(float f) {
+  return rintf(f);
+  // CHECK: cir.func {{.*}} @call_rintf
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_rintf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.rint.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_rint(double f) {
+  return rint(f);
+  // CHECK: cir.func {{.*}} @call_rint
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_rint(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.rint.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_rintl(long double f) {
+  return rintl(f);
+  // CHECK: cir.func {{.*}} @call_rintl
+  // CHECK: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.rint {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_rintl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.rint.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// round
+
+float my_roundf(float f) {
+  return __builtin_roundf(f);
+  // CHECK: cir.func {{.*}} @my_roundf
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_roundf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.round.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_round(double f) {
+  return __builtin_round(f);
+  // CHECK: cir.func {{.*}} @my_round
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_round(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.round.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_roundl(long double f) {
+  return __builtin_roundl(f);
+  // CHECK: cir.func {{.*}} @my_roundl
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_roundl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.round.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float roundf(float);
+double round(double);
+long double roundl(long double);
+
+float call_roundf(float f) {
+  return roundf(f);
+  // CHECK: cir.func {{.*}} @call_roundf
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_roundf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.round.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_round(double f) {
+  return round(f);
+  // CHECK: cir.func {{.*}} @call_round
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_round(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.round.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_roundl(long double f) {
+  return roundl(f);
+  // CHECK: cir.func {{.*}} @call_roundl
+  // CHECK: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.round {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_roundl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.round.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// sin
+
+float my_sinf(float f) {
+  return __builtin_sinf(f);
+  // CHECK: cir.func {{.*}} @my_sinf
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_sinf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sin.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_sin(double f) {
+  return __builtin_sin(f);
+  // CHECK: cir.func {{.*}} @my_sin
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_sin(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sin.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_sinl(long double f) {
+  return __builtin_sinl(f);
+  // CHECK: cir.func {{.*}} @my_sinl
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_sinl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sin.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float sinf(float);
+double sin(double);
+long double sinl(long double);
+
+float call_sinf(float f) {
+  return sinf(f);
+  // CHECK: cir.func {{.*}} @call_sinf
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_sinf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sin.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_sin(double f) {
+  return sin(f);
+  // CHECK: cir.func {{.*}} @call_sin
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_sin(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sin.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_sinl(long double f) {
+  return sinl(f);
+  // CHECK: cir.func {{.*}} @call_sinl
+  // CHECK: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sin {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_sinl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sin.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// sqrt
+
+float my_sqrtf(float f) {
+  return __builtin_sqrtf(f);
+  // CHECK: cir.func {{.*}} @my_sqrtf
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_sqrtf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sqrt.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_sqrt(double f) {
+  return __builtin_sqrt(f);
+  // CHECK: cir.func {{.*}} @my_sqrt
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_sqrt(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sqrt.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_sqrtl(long double f) {
+  return __builtin_sqrtl(f);
+  // CHECK: cir.func {{.*}} @my_sqrtl
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_sqrtl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sqrt.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float sqrtf(float);
+double sqrt(double);
+long double sqrtl(long double);
+
+float call_sqrtf(float f) {
+  return sqrtf(f);
+  // CHECK: cir.func {{.*}} @call_sqrtf
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_sqrtf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.sqrt.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_sqrt(double f) {
+  return sqrt(f);
+  // CHECK: cir.func {{.*}} @call_sqrt
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_sqrt(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.sqrt.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_sqrtl(long double f) {
+  return sqrtl(f);
+  // CHECK: cir.func {{.*}} @call_sqrtl
+  // CHECK: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.sqrt {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_sqrtl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.sqrt.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// tan
+
+float my_tanf(float f) {
+  return __builtin_tanf(f);
+  // CHECK: cir.func {{.*}} @my_tanf
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_tanf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.tan.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_tan(double f) {
+  return __builtin_tan(f);
+  // CHECK: cir.func {{.*}} @my_tan
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_tan(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.tan.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_tanl(long double f) {
+  return __builtin_tanl(f);
+  // CHECK: cir.func {{.*}} @my_tanl
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.tan {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_tanl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.tan.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float tanf(float);
+double tan(double);
+long double tanl(long double);
+
+float call_tanf(float f) {
+  return tanf(f);
+  // CHECK: cir.func {{.*}} @call_tanf
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_tanf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.tan.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_tan(double f) {
+  return tan(f);
+  // CHECK: cir.func {{.*}} @call_tan
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_tan(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.tan.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_tanl(long double f) {
+  return tanl(f);
+  // CHECK: cir.func {{.*}} @call_tanl
+  // CHECK: {{.+}} = cir.tan {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.tan {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_tanl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.tan.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// trunc
+
+float my_truncf(float f) {
+  return __builtin_truncf(f);
+  // CHECK: cir.func {{.*}} @my_truncf
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_truncf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.trunc.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double my_trunc(double f) {
+  return __builtin_trunc(f);
+  // CHECK: cir.func {{.*}} @my_trunc
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_trunc(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.trunc.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double my_truncl(long double f) {
+  return __builtin_truncl(f);
+  // CHECK: cir.func {{.*}} @my_truncl
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_truncl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.trunc.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float truncf(float);
+double trunc(double);
+long double truncl(long double);
+
+float call_truncf(float f) {
+  return truncf(f);
+  // CHECK: cir.func {{.*}} @call_truncf
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_truncf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.trunc.f32(float %{{.+}})
+  // LLVM: }
+}
+
+double call_trunc(double f) {
+  return trunc(f);
+  // CHECK: cir.func {{.*}} @call_trunc
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_trunc(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.trunc.f64(double %{{.+}})
+  // LLVM: }
+}
+
+long double call_truncl(long double f) {
+  return truncl(f);
+  // CHECK: cir.func {{.*}} @call_truncl
+  // CHECK: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: {{.+}} = cir.trunc {{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_truncl(x86_fp80 %0)
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.trunc.f80(x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// copysign
+
+float my_copysignf(float x, float y) {
+  return __builtin_copysignf(x, y);
+  // CHECK: cir.func {{.*}} @my_copysignf
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_copysignf
+  // LLVM:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_copysign(double x, double y) {
+  return __builtin_copysign(x, y);
+  // CHECK: cir.func {{.*}} @my_copysign
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_copysign
+  // LLVM:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_copysignl(long double x, long double y) {
+  return __builtin_copysignl(x, y);
+  // CHECK: cir.func {{.*}} @my_copysignl
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_copysignl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float copysignf(float, float);
+double copysign(double, double);
+long double copysignl(long double, long double);
+
+float call_copysignf(float x, float y) {
+  return copysignf(x, y);
+  // CHECK: cir.func {{.*}} @call_copysignf
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_copysignf
+  // LLVM:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_copysign(double x, double y) {
+  return copysign(x, y);
+  // CHECK: cir.func {{.*}} @call_copysign
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_copysign
+  // LLVM:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_copysignl(long double x, long double y) {
+  return copysignl(x, y);
+  // CHECK: cir.func {{.*}} @call_copysignl
+  // CHECK:   %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.copysign %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_copysignl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// fmax
+
+float my_fmaxf(float x, float y) {
+  return __builtin_fmaxf(x, y);
+  // CHECK: cir.func {{.*}} @my_fmaxf
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_fmaxf
+  // LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_fmax(double x, double y) {
+  return __builtin_fmax(x, y);
+  // CHECK: cir.func {{.*}} @my_fmax
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_fmax
+  // LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_fmaxl(long double x, long double y) {
+  return __builtin_fmaxl(x, y);
+  // CHECK: cir.func {{.*}} @my_fmaxl
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_fmaxl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float fmaxf(float, float);
+double fmax(double, double);
+long double fmaxl(long double, long double);
+
+float call_fmaxf(float x, float y) {
+  return fmaxf(x, y);
+  // CHECK: cir.func {{.*}} @call_fmaxf
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_fmaxf
+  // LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_fmax(double x, double y) {
+  return fmax(x, y);
+  // CHECK: cir.func {{.*}} @call_fmax
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_fmax
+  // LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_fmaxl(long double x, long double y) {
+  return fmaxl(x, y);
+  // CHECK: cir.func {{.*}} @call_fmaxl
+  // CHECK:   %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmaxnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_fmaxl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// fmin
+
+float my_fminf(float x, float y) {
+  return __builtin_fminf(x, y);
+  // CHECK: cir.func {{.*}} @my_fminf
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_fminf
+  // LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_fmin(double x, double y) {
+  return __builtin_fmin(x, y);
+  // CHECK: cir.func {{.*}} @my_fmin
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_fmin
+  // LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_fminl(long double x, long double y) {
+  return __builtin_fminl(x, y);
+  // CHECK: cir.func {{.*}} @my_fminl
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_fminl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float fminf(float, float);
+double fmin(double, double);
+long double fminl(long double, long double);
+
+float call_fminf(float x, float y) {
+  return fminf(x, y);
+  // CHECK: cir.func {{.*}} @call_fminf
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_fminf
+  // LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_fmin(double x, double y) {
+  return fmin(x, y);
+  // CHECK: cir.func {{.*}} @call_fmin
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_fmin
+  // LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_fminl(long double x, long double y) {
+  return fminl(x, y);
+  // CHECK: cir.func {{.*}} @call_fminl
+  // CHECK:   %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fminnum %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_fminl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// fmod
+
+float my_fmodf(float x, float y) {
+  return __builtin_fmodf(x, y);
+  // CHECK: cir.func {{.*}} @my_fmodf
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_fmodf
+  // LLVM:   %{{.+}} = frem float %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+double my_fmod(double x, double y) {
+  return __builtin_fmod(x, y);
+  // CHECK: cir.func {{.*}} @my_fmod
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_fmod
+  // LLVM:   %{{.+}} = frem double %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+long double my_fmodl(long double x, long double y) {
+  return __builtin_fmodl(x, y);
+  // CHECK: cir.func {{.*}} @my_fmodl
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_fmodl
+  // LLVM:   %{{.+}} = frem x86_fp80 %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+float fmodf(float, float);
+double fmod(double, double);
+long double fmodl(long double, long double);
+
+float call_fmodf(float x, float y) {
+  return fmodf(x, y);
+  // CHECK: cir.func {{.*}} @call_fmodf
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_fmodf
+  // LLVM:   %{{.+}} = frem float %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+double call_fmod(double x, double y) {
+  return fmod(x, y);
+  // CHECK: cir.func {{.*}} @call_fmod
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_fmod
+  // LLVM:   %{{.+}} = frem double %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+long double call_fmodl(long double x, long double y) {
+  return fmodl(x, y);
+  // CHECK: cir.func {{.*}} @call_fmodl
+  // CHECK:   %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.fmod %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_fmodl
+  // LLVM:   %{{.+}} = frem x86_fp80 %{{.+}}, %{{.+}}
+  // LLVM: }
+}
+
+// pow
+
+float my_powf(float x, float y) {
+  return __builtin_powf(x, y);
+  // CHECK: cir.func {{.*}} @my_powf
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_powf
+  // LLVM:   %{{.+}} = call float @llvm.pow.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double my_pow(double x, double y) {
+  return __builtin_pow(x, y);
+  // CHECK: cir.func {{.*}} @my_pow
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_pow
+  // LLVM:   %{{.+}} = call double @llvm.pow.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double my_powl(long double x, long double y) {
+  return __builtin_powl(x, y);
+  // CHECK: cir.func {{.*}} @my_powl
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @my_powl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.pow.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+float powf(float, float);
+double pow(double, double);
+long double powl(long double, long double);
+
+float call_powf(float x, float y) {
+  return powf(x, y);
+  // CHECK: cir.func {{.*}} @call_powf
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.float
+
+  // LLVM: define dso_local float @call_powf
+  // LLVM:   %{{.+}} = call float @llvm.pow.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+}
+
+double call_pow(double x, double y) {
+  return pow(x, y);
+  // CHECK: cir.func {{.*}} @call_pow
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.double
+
+  // LLVM: define dso_local double @call_pow
+  // LLVM:   %{{.+}} = call double @llvm.pow.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+}
+
+long double call_powl(long double x, long double y) {
+  return powl(x, y);
+  // CHECK: cir.func {{.*}} @call_powl
+  // CHECK:   %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.f80>
+  // AARCH64: %{{.+}} = cir.pow %{{.+}}, %{{.+}} : !cir.long_double<!cir.double>
+
+  // LLVM: define dso_local x86_fp80 @call_powl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.pow.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+  // LLVM: }
+}
+
+// acos
+
+float my_acosf(float f) {
+  return __builtin_acosf(f);
+  // CHECK: cir.func {{.*}} @my_acosf
+  // CHECK: {{.+}} = cir.acos {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_acosf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.acos.f32(float %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local float @my_acosf(float {{.+}})
+  // OGCG:   %{{.+}} = call float @llvm.acos.f32(float %{{.+}})
+  // OGCG: }
+}
+
+double my_acos(double f) {
+  return __builtin_acos(f);
+  // CHECK: cir.func {{.*}} @my_acos
+  // CHECK: {{.+}} = cir.acos {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_acos(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.acos.f64(double %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local double @my_acos(double {{.+}})
+  // OGCG:   %{{.+}} = call double @llvm.acos.f64(double %{{.+}})
+  // OGCG: }
+}
+
+// asin
+
+float my_asinf(float f) {
+  return __builtin_asinf(f);
+  // CHECK: cir.func {{.*}} @my_asinf
+  // CHECK: {{.+}} = cir.asin {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_asinf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.asin.f32(float %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local float @my_asinf(float {{.+}})
+  // OGCG:   %{{.+}} = call float @llvm.asin.f32(float %{{.+}})
+  // OGCG: }
+}
+
+double my_asin(double f) {
+  return __builtin_asin(f);
+  // CHECK: cir.func {{.*}} @my_asin
+  // CHECK: {{.+}} = cir.asin {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_asin(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.asin.f64(double %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local double @my_asin(double {{.+}})
+  // OGCG:   %{{.+}} = call double @llvm.asin.f64(double %{{.+}})
+  // OGCG: }
+}
+
+// atan
+
+float my_atanf(float f) {
+  return __builtin_atanf(f);
+  // CHECK: cir.func {{.*}} @my_atanf
+  // CHECK: {{.+}} = cir.atan {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_atanf(float %0)
+  // LLVM:   %{{.+}} = call float @llvm.atan.f32(float %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local float @my_atanf(float {{.+}})
+  // OGCG:   %{{.+}} = call float @llvm.atan.f32(float %{{.+}})
+  // OGCG: }
+}
+
+double my_atan(double f) {
+  return __builtin_atan(f);
+  // CHECK: cir.func {{.*}} @my_atan
+  // CHECK: {{.+}} = cir.atan {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_atan(double %0)
+  // LLVM:   %{{.+}} = call double @llvm.atan.f64(double %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local double @my_atan(double {{.+}})
+  // OGCG:   %{{.+}} = call double @llvm.atan.f64(double %{{.+}})
+  // OGCG: }
+}
+
+// atan2
+
+float my_atan2f(float y, float x) {
+  return __builtin_atan2f(y, x);
+  // CHECK: cir.func {{.*}} @my_atan2f
+  // CHECK: {{.+}} = cir.atan2 {{.+}}, {{.+}} : !cir.float
+
+  // LLVM: define dso_local float @my_atan2f
+  // LLVM:   %{{.+}} = call float @llvm.atan2.f32(float %{{.+}}, float %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local float @my_atan2f
+  // OGCG:   %{{.+}} = call float @llvm.atan2.f32(float %{{.+}}, float %{{.+}})
+  // OGCG: }
+}
+
+double my_atan2(double y, double x) {
+  return __builtin_atan2(y, x);
+  // CHECK: cir.func {{.*}} @my_atan2
+  // CHECK: {{.+}} = cir.atan2 {{.+}}, {{.+}} : !cir.double
+
+  // LLVM: define dso_local double @my_atan2
+  // LLVM:   %{{.+}} = call double @llvm.atan2.f64(double %{{.+}}, double %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local double @my_atan2
+  // OGCG:   %{{.+}} = call double @llvm.atan2.f64(double %{{.+}}, double %{{.+}})
+  // OGCG: }
+}
+
+// isgreater
+
+int my_isgreater(double a, double b) {
+  return __builtin_isgreater(a, b);
+  // CHECK: cir.func {{.*}} @my_isgreater
+  // CHECK: %[[CMP:.+]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.double, !cir.bool
+  // CHECK: {{.+}} = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+
+  // LLVM: define dso_local i32 @my_isgreater
+  // LLVM:   %{{.+}} = fcmp ogt double %{{.+}}, %{{.+}}
+  // LLVM:   %{{.+}} = zext i1 %{{.+}} to i32
+  // LLVM: }
+
+  // OGCG: define dso_local i32 @my_isgreater
+  // OGCG:   %{{.+}} = fcmp ogt double %{{.+}}, %{{.+}}
+  // OGCG:   %{{.+}} = zext i1 %{{.+}} to i32
+  // OGCG: }
+}
+
+// isgreaterequal
+
+int my_isgreaterequal(double a, double b) {
+  return __builtin_isgreaterequal(a, b);
+  // CHECK: cir.func {{.*}} @my_isgreaterequal
+  // CHECK: %[[CMP:.+]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.double, !cir.bool
+  // CHECK: {{.+}} = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+
+  // LLVM: define dso_local i32 @my_isgreaterequal
+  // LLVM:   %{{.+}} = fcmp oge double %{{.+}}, %{{.+}}
+  // LLVM:   %{{.+}} = zext i1 %{{.+}} to i32
+  // LLVM: }
+
+  // OGCG: define dso_local i32 @my_isgreaterequal
+  // OGCG:   %{{.+}} = fcmp oge double %{{.+}}, %{{.+}}
+  // OGCG:   %{{.+}} = zext i1 %{{.+}} to i32
+  // OGCG: }
+}
+
+// isless
+
+int my_isless(double a, double b) {
+  return __builtin_isless(a, b);
+  // CHECK: cir.func {{.*}} @my_isless
+  // CHECK: %[[CMP:.+]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.double, !cir.bool
+  // CHECK: {{.+}} = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+
+  // LLVM: define dso_local i32 @my_isless
+  // LLVM:   %{{.+}} = fcmp olt double %{{.+}}, %{{.+}}
+  // LLVM:   %{{.+}} = zext i1 %{{.+}} to i32
+  // LLVM: }
+
+  // OGCG: define dso_local i32 @my_isless
+  // OGCG:   %{{.+}} = fcmp olt double %{{.+}}, %{{.+}}
+  // OGCG:   %{{.+}} = zext i1 %{{.+}} to i32
+  // OGCG: }
+}
+
+// islessequal
+
+int my_islessequal(double a, double b) {
+  return __builtin_islessequal(a, b);
+  // CHECK: cir.func {{.*}} @my_islessequal
+  // CHECK: %[[CMP:.+]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.double, !cir.bool
+  // CHECK: {{.+}} = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+
+  // LLVM: define dso_local i32 @my_islessequal
+  // LLVM:   %{{.+}} = fcmp ole double %{{.+}}, %{{.+}}
+  // LLVM:   %{{.+}} = zext i1 %{{.+}} to i32
+  // LLVM: }
+
+  // OGCG: define dso_local i32 @my_islessequal
+  // OGCG:   %{{.+}} = fcmp ole double %{{.+}}, %{{.+}}
+  // OGCG:   %{{.+}} = zext i1 %{{.+}} to i32
+  // OGCG: }
+}
+
+// islessgreater
+
+int my_islessgreater(double a, double b) {
+  return __builtin_islessgreater(a, b);
+  // CHECK: cir.func {{.*}} @my_islessgreater
+  // CHECK: %[[CMP:.+]] = cir.cmp(fone, %{{.+}}, %{{.+}}) : !cir.double, !cir.bool
+  // CHECK: {{.+}} = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+
+  // LLVM: define dso_local i32 @my_islessgreater
+  // LLVM:   %{{.+}} = fcmp one double %{{.+}}, %{{.+}}
+  // LLVM:   %{{.+}} = zext i1 %{{.+}} to i32
+  // LLVM: }
+
+  // OGCG: define dso_local i32 @my_islessgreater
+  // OGCG:   %{{.+}} = fcmp one double %{{.+}}, %{{.+}}
+  // OGCG:   %{{.+}} = zext i1 %{{.+}} to i32
+  // OGCG: }
+}
+
+// isunordered
+
+int my_isunordered(double a, double b) {
+  return __builtin_isunordered(a, b);
+  // CHECK: cir.func {{.*}} @my_isunordered
+  // CHECK: %[[CMP:.+]] = cir.cmp(funo, %{{.+}}, %{{.+}}) : !cir.double, !cir.bool
+  // CHECK: {{.+}} = cir.cast bool_to_int %[[CMP]] : !cir.bool -> !s32i
+
+  // LLVM: define dso_local i32 @my_isunordered
+  // LLVM:   %{{.+}} = fcmp uno double %{{.+}}, %{{.+}}
+  // LLVM:   %{{.+}} = zext i1 %{{.+}} to i32
+  // LLVM: }
+
+  // OGCG: define dso_local i32 @my_isunordered
+  // OGCG:   %{{.+}} = fcmp uno double %{{.+}}, %{{.+}}
+  // OGCG:   %{{.+}} = zext i1 %{{.+}} to i32
+  // OGCG: }
+}
+
+// ldexp
+
+double my_ldexp(double x, int exp) {
+  return __builtin_ldexp(x, exp);
+  // CHECK: cir.func {{.*}} @my_ldexp
+  // CHECK: %{{.+}} = cir.llvm.intrinsic "ldexp" %{{.+}}, %{{.+}} : (!cir.double, !s32i) -> !cir.double
+
+  // LLVM: define dso_local double @my_ldexp
+  // LLVM:   %{{.+}} = call double @llvm.ldexp.f64.i32(double %{{.+}}, i32 %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local double @my_ldexp
+  // OGCG:   %{{.+}} = call double @llvm.ldexp.f64.i32(double %{{.+}}, i32 %{{.+}})
+  // OGCG: }
+}
+
+float my_ldexpf(float x, int exp) {
+  return __builtin_ldexpf(x, exp);
+  // CHECK: cir.func {{.*}} @my_ldexpf
+  // CHECK: %{{.+}} = cir.llvm.intrinsic "ldexp" %{{.+}}, %{{.+}} : (!cir.float, !s32i) -> !cir.float
+
+  // LLVM: define dso_local float @my_ldexpf
+  // LLVM:   %{{.+}} = call float @llvm.ldexp.f32.i32(float %{{.+}}, i32 %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local float @my_ldexpf
+  // OGCG:   %{{.+}} = call float @llvm.ldexp.f32.i32(float %{{.+}}, i32 %{{.+}})
+  // OGCG: }
+}
+
+long double my_ldexpl(long double x, int exp) {
+  return __builtin_ldexpl(x, exp);
+  // CHECK: cir.func {{.*}} @my_ldexpl
+  // CHECK: %{{.+}} = cir.llvm.intrinsic "ldexp" %{{.+}}, %{{.+}} : (!cir.long_double<!cir.f80>, !s32i) -> !cir.long_double<!cir.f80>
+
+  // LLVM: define dso_local x86_fp80 @my_ldexpl
+  // LLVM:   %{{.+}} = call x86_fp80 @llvm.ldexp.f80.i32(x86_fp80 %{{.+}}, i32 %{{.+}})
+  // LLVM: }
+
+  // OGCG: define dso_local x86_fp80 @my_ldexpl
+  // OGCG:   %{{.+}} = call x86_fp80 @llvm.ldexp.f80.i32(x86_fp80 %{{.+}}, i32 %{{.+}})
+  // OGCG: }
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-isfpclass.c b/clang/test/CIR/Incubator/CodeGen/builtin-isfpclass.c
new file mode 100644
index 0000000000000..f5486bac5445f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-isfpclass.c
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int finite(double);
+
+// CHECK: cir.func{{.*}} @test_is_finite
+void test_is_finite(__fp16 *H, float F, double D, long double LD) {
+    volatile int res;
+    res = __builtin_isinf(*H);
+    // CHECK: cir.is_fp_class %{{.*}}, 516 : (!cir.f16) -> !cir.bool
+
+    res = __builtin_isinf(F);
+    // CHECK: cir.is_fp_class %{{.*}}, 516 : (!cir.float) -> !cir.bool
+
+    res = __builtin_isinf(D);
+    // CHECK: cir.is_fp_class %{{.*}}, 516 : (!cir.double) -> !cir.bool
+
+    res = __builtin_isinf(LD);
+    // CHECK: cir.is_fp_class %{{.*}}, 516 : (!cir.long_double<!cir.f80>) -> !cir.bool
+
+    res = __builtin_isfinite(*H);
+    // CHECK: cir.is_fp_class %{{.*}}, 504 : (!cir.f16) -> !cir.bool
+    res = __builtin_isfinite(F);
+    // CHECK: cir.is_fp_class %{{.*}}, 504 : (!cir.float) -> !cir.bool
+    res = finite(D);
+    // CHECK: cir.is_fp_class %{{.*}}, 504 : (!cir.double) -> !cir.bool
+
+    res = __builtin_isnormal(*H);
+    // CHECK: cir.is_fp_class %{{.*}}, 264 : (!cir.f16) -> !cir.bool
+    res = __builtin_isnormal(F);
+    // CHECK: cir.is_fp_class %{{.*}}, 264 : (!cir.float) -> !cir.bool
+
+    res = __builtin_issubnormal(F);
+    // CHECK: cir.is_fp_class %{{.*}}, 144 : (!cir.float) -> !cir.bool
+    res = __builtin_iszero(F);
+    // CHECK: cir.is_fp_class %{{.*}}, 96 : (!cir.float) -> !cir.bool
+    res = __builtin_issignaling(F);
+    // CHECK: cir.is_fp_class %{{.*}}, 1 : (!cir.float) -> !cir.bool
+}
+
+_Bool check_isfpclass_finite(float x) {
+  return __builtin_isfpclass(x, 504 /*Finite*/);
+}
+
+// CHECK: cir.func{{.*}} @check_isfpclass_finite
+// CHECK: cir.is_fp_class %{{.*}}, 504 : (!cir.float)
+
+_Bool check_isfpclass_nan_f32(float x) {
+  return __builtin_isfpclass(x, 3 /*NaN*/);
+}
+
+// CHECK: cir.func{{.*}} @check_isfpclass_nan_f32
+// CHECK: cir.is_fp_class %{{.*}}, 3 : (!cir.float)
+
+
+_Bool check_isfpclass_snan_f64(double x) {
+  return __builtin_isfpclass(x, 1 /*SNaN*/);
+}
+
+// CHECK: cir.func{{.*}} @check_isfpclass_snan_f64
+// CHECK: cir.is_fp_class %{{.*}}, 1 : (!cir.double)
+
+
+_Bool check_isfpclass_zero_f16(_Float16 x) {
+  return __builtin_isfpclass(x, 96 /*Zero*/);
+}
+
+// CHECK: cir.func{{.*}} @check_isfpclass_zero_f16
+// CHECK: cir.is_fp_class %{{.*}}, 96 : (!cir.f16)
+
+// Update when we support FP pragma in functions and can convert BoolType in prvalue to i1.
+
+// _Bool check_isfpclass_finite_strict(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 504 /*Finite*/);
+// }
+// 
+// _Bool check_isfpclass_nan_f32_strict(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 3 /*NaN*/);
+// }
+// 
+// _Bool check_isfpclass_snan_f64_strict(double x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 1 /*NaN*/);
+// }
+// 
+// _Bool check_isfpclass_zero_f16_strict(_Float16 x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 96 /*Zero*/);
+// }
+// 
+// _Bool check_isnan(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isnan(x);
+// }
+// 
+// _Bool check_isinf(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isinf(x);
+// }
+// 
+// _Bool check_isfinite(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfinite(x);
+// }
+// 
+// _Bool check_isnormal(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isnormal(x);
+// }
+// 
+// typedef float __attribute__((ext_vector_type(4))) float4;
+// typedef double __attribute__((ext_vector_type(4))) double4;
+// typedef int __attribute__((ext_vector_type(4))) int4;
+// typedef long __attribute__((ext_vector_type(4))) long4;
+// 
+// int4 check_isfpclass_nan_v4f32(float4 x) {
+//   return __builtin_isfpclass(x, 3 /*NaN*/);
+// }
+// 
+// int4 check_isfpclass_nan_strict_v4f32(float4 x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 3 /*NaN*/);
+// }
+// 
+// long4 check_isfpclass_nan_v4f64(double4 x) {
+//   return __builtin_isfpclass(x, 3 /*NaN*/);
+// }
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-isinf-sign.c b/clang/test/CIR/Incubator/CodeGen/builtin-isinf-sign.c
new file mode 100644
index 0000000000000..7996b796eb639
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-isinf-sign.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+int test_float_isinf_sign(float x) {
+    // CIR-LABEL: test_float_isinf_sign
+    // CIR: %[[TMP0:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.float>, !cir.float
+    // CIR: %[[TMP1:.*]] = cir.fabs %[[TMP0]] : !cir.float
+    // CIR: %[[IS_INF:.*]] = cir.is_fp_class %[[TMP1]], 516 : (!cir.float) -> !cir.bool
+    // CIR: %[[IS_NEG:.*]] = cir.signbit %[[TMP0]] : !cir.float -> !cir.bool
+    // CIR: %[[C_0:.*]] = cir.const #cir.int<0> : !s32i
+    // CIR: %[[C_1:.*]] = cir.const #cir.int<1> : !s32i
+    // CIR: %[[C_m1:.*]] = cir.const #cir.int<-1> : !s32i
+    // CIR: %[[TMP4:.*]] = cir.select if %[[IS_NEG]] then %[[C_m1]] else %[[C_1]] : (!cir.bool, !s32i, !s32i) -> !s32i
+    // CIR: %[[RET:.*]] = cir.select if %[[IS_INF]] then %[[TMP4]] else %[[C_0]] : (!cir.bool, !s32i, !s32i) -> !s32i
+    // CIR: cir.store{{.*}} %[[RET]], %{{.*}} : !s32i, !cir.ptr<!s32i>
+
+    // LLVM-LABEL: test_float_isinf_sign
+    // LLVM: %[[TMP0:.*]] = load float, ptr %{{.*}}
+    // LLVM: %[[TMP1:.*]] = call float @llvm.fabs.f32(float %[[TMP0]])
+    // LLVM: %[[IS_INF:.*]] = call i1 @llvm.is.fpclass.f32(float %[[TMP1]], i32 516)
+    // LLVM: %[[TMP1:.*]] = bitcast float %[[TMP0]] to i32
+    // LLVM: %[[IS_NEG:.*]] = icmp slt i32 %[[TMP1]], 0
+    // LLVM: %[[TMP2:.*]] = select i1 %[[IS_NEG]], i32 -1, i32 1
+    // LLVM: %[[RET:.*]] = select i1 %[[IS_INF]], i32 %[[TMP2]], i32 0
+    // LLVM: store i32 %[[RET]], ptr %{{.*}}, align 4
+    return __builtin_isinf_sign(x);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-ms-alloca.c b/clang/test/CIR/Incubator/CodeGen/builtin-ms-alloca.c
new file mode 100644
index 0000000000000..000c1de1c5d96
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-ms-alloca.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fms-extensions -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fms-extensions -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+typedef __SIZE_TYPE__ size_t;
+
+void my_win_alloca(size_t n)
+{
+  int *c1 = (int *)_alloca(n);
+}
+
+// CIR:       cir.func {{.*}} @my_win_alloca([[ALLOCA_SIZE:%.*]]: !u64i
+// CIR:       cir.store [[ALLOCA_SIZE]], [[LOCAL_VAR_ALLOCA_SIZE:%.*]] : !u64i, !cir.ptr<!u64i>
+// CIR:       [[TMP_ALLOCA_SIZE:%.*]] = cir.load{{.*}} [[LOCAL_VAR_ALLOCA_SIZE]] : !cir.ptr<!u64i>, !u64i
+// CIR:       [[ALLOCA_RES:%.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, [[TMP_ALLOCA_SIZE]] : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// CIR-NEXT:  cir.cast bitcast [[ALLOCA_RES]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+// CIR: }
+
+
+// LLVM:       define dso_local void @my_win_alloca(i64 [[ALLOCA_SIZE:%.*]])
+// LLVM:       store i64 [[ALLOCA_SIZE]], ptr [[LOCAL_VAR_ALLOCA_SIZE:%.*]],
+// LLVM:       [[TMP_ALLOCA_SIZE:%.*]] =  load i64, ptr [[LOCAL_VAR_ALLOCA_SIZE]],
+// LLVM:       [[ALLOCA_RES:%.*]] = alloca i8, i64 [[TMP_ALLOCA_SIZE]], align 16
+// LLVM: }
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-nontemporal.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-nontemporal.cpp
new file mode 100644
index 0000000000000..8348d6ee6228f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-nontemporal.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+int nontemporal_load(const int *ptr) {
+  return __builtin_nontemporal_load(ptr);
+}
+
+// CIR-LABEL: @_Z16nontemporal_loadPKi
+// CIR: %{{.+}} = cir.load nontemporal{{.*}}  %{{.+}} : !cir.ptr<!s32i>, !s32i
+
+// LLVM-LABEL: @_Z16nontemporal_loadPKi
+// LLVM: %{{.+}} = load i32, ptr %{{.+}}, align 4, !nontemporal !1
+
+void nontemporal_store(int *ptr, int value) {
+  __builtin_nontemporal_store(value, ptr);
+}
+
+// CIR-LABEL: @_Z17nontemporal_storePii
+// CIR: cir.store nontemporal{{.*}} %{{.+}}, %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @_Z17nontemporal_storePii
+// LLVM: store i32 %{{.+}}, ptr %{{.+}}, align 4, !nontemporal !1
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-prefetch.c b/clang/test/CIR/Incubator/CodeGen/builtin-prefetch.c
new file mode 100644
index 0000000000000..952dbe9887319
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-prefetch.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+void foo(void *a) {
+  __builtin_prefetch(a, 1, 1);
+}
+
+// CIR:  cir.func {{.*}} @foo(%arg0: !cir.ptr<!void> loc({{.*}}))
+// CIR:    [[PTR_ALLOC:%.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["a", init] {alignment = 8 : i64}
+// CIR:    cir.store %arg0, [[PTR_ALLOC]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR:    [[PTR:%.*]] = cir.load{{.*}} [[PTR_ALLOC]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:    cir.prefetch([[PTR]] : !cir.ptr<!void>) locality(1) write
+// CIR:    cir.return
+
+// LLVM:  define dso_local void @foo(ptr [[ARG0:%.*]])
+// LLVM:    [[PTR_ALLOC:%.*]] = alloca ptr, i64 1
+// LLVM:    store ptr [[ARG0]], ptr [[PTR_ALLOC]]
+// LLVM:    [[PTR:%.*]] = load ptr, ptr [[PTR_ALLOC]]
+// LLVM:    call void @llvm.prefetch.p0(ptr [[PTR]], i32 1, i32 1, i32 1)
+// LLVM:    ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-rotate.c b/clang/test/CIR/Incubator/CodeGen/builtin-rotate.c
new file mode 100644
index 0000000000000..85aa6be13ebfb
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-rotate.c
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+void f() {
+// CIR-LABEL: @f
+// LLVM-LABEL: @f
+  unsigned int v[4];
+  unsigned int h = __builtin_rotateleft32(v[0], 1);
+// CIR: %[[CONST:.*]] = cir.const #cir.int<1> : !s32i
+// CIR: %[[CAST:.*]] = cir.cast integral %[[CONST]] : !s32i -> !u32i
+// CIR: cir.rotate left {{.*}}, %[[CAST]] -> !u32i
+
+// LLVM: %[[SRC:.*]] = load i32, ptr
+// LLVM: call i32 @llvm.fshl.i32(i32 %[[SRC]], i32 %[[SRC]], i32 1)
+}
+
+unsigned char rotl8(unsigned char x, unsigned char y) {
+// CIR-LABEL: rotl8
+// CIR: cir.rotate left {{.*}}, {{.*}} -> !u8i
+
+// LLVM-LABEL: rotl8
+// LLVM: [[F:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 [[Y:%.*]])
+  return __builtin_rotateleft8(x, y);
+}
+
+short rotl16(short x, short y) {
+// CIR-LABEL: rotl16
+// CIR: cir.rotate left {{.*}}, {{.*}} -> !u16i
+
+// LLVM-LABEL: rotl16
+// LLVM: [[F:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+  return __builtin_rotateleft16(x, y);
+}
+
+int rotl32(int x, unsigned int y) {
+// CIR-LABEL: rotl32
+// CIR: cir.rotate left {{.*}}, {{.*}} -> !u32i
+
+// LLVM-LABEL: rotl32
+// LLVM: [[F:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+  return __builtin_rotateleft32(x, y);
+}
+
+unsigned long long rotl64(unsigned long long x, long long y) {
+// CIR-LABEL: rotl64
+// CIR: cir.rotate left {{.*}}, {{.*}} -> !u64i
+
+// LLVM-LABEL: rotl64
+// LLVM: [[F:%.*]] = call i64 @llvm.fshl.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+  return __builtin_rotateleft64(x, y);
+}
+
+char rotr8(char x, char y) {
+// CIR-LABEL: rotr8
+// CIR: cir.rotate right {{.*}}, {{.*}} -> !u8i
+
+// LLVM-LABEL: rotr8
+// LLVM: [[F:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[Y:%.*]])
+  return __builtin_rotateright8(x, y);
+}
+
+unsigned short rotr16(unsigned short x, unsigned short y) {
+// CIR-LABEL: rotr16
+// CIR: cir.rotate right {{.*}}, {{.*}} -> !u16i
+
+// LLVM-LABEL: rotr16
+// LLVM: [[F:%.*]] = call i16 @llvm.fshr.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y:%.*]])
+  return __builtin_rotateright16(x, y);
+}
+
+unsigned int rotr32(unsigned int x, int y) {
+// CIR-LABEL: rotr32
+// CIR: cir.rotate right {{.*}}, {{.*}} -> !u32i
+
+// LLVM-LABEL: rotr32
+// LLVM: [[F:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
+  return __builtin_rotateright32(x, y);
+}
+
+long long rotr64(long long x, unsigned long long y) {
+// CIR-LABEL: rotr64
+// CIR: cir.rotate right {{.*}}, {{.*}} -> !u64i
+
+// LLVM-LABEL: rotr64
+// LLVM: [[F:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
+  return __builtin_rotateright64(x, y);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-setjmp-longjmp.c b/clang/test/CIR/Incubator/CodeGen/builtin-setjmp-longjmp.c
new file mode 100644
index 0000000000000..2d96139c347f9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-setjmp-longjmp.c
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -O2 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -O2 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -O2 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+void test_setjmp(void *env) {
+  // CIR-LABEL: test_setjmp
+  // CIR-SAME: [[ENV:%.*]]: 
+  // CIR-NEXT: [[ENV_ALLOCA:%[0-9]+]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>,
+  // CIR-NEXT: cir.store [[ENV]], [[ENV_ALLOCA]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // CIR-NEXT: [[ENV_LOAD:%[0-9]+]] = cir.load align(8) [[ENV_ALLOCA]]
+  // CIR-NEXT: [[CAST:%[0-9]+]] = cir.cast bitcast [[ENV_LOAD]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!void>>
+  // CIR-NEXT: [[ZERO:%[0-9]+]] = cir.const #cir.int<0>
+  // CIR-NEXT: [[FA:%[0-9]+]] = cir.frame_address([[ZERO]])
+  // CIR-NEXT: cir.store [[FA]], [[CAST]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // CIR-NEXT: [[SS:%[0-9]+]] = cir.stack_save
+  // CIR-NEXT: [[TWO:%[0-9]+]] = cir.const #cir.int<2>
+  // CIR-NEXT: [[GEP:%[0-9]+]] = cir.ptr_stride [[CAST]], [[TWO]] : (!cir.ptr<!cir.ptr<!void>>, !s32i) -> !cir.ptr<!cir.ptr<!void>>
+  // CIR-NEXT: cir.store [[SS]], [[GEP]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // CIR-NEXT: [[SJ:%[0-9]+]] = cir.eh.setjmp builtin [[CAST]] : (!cir.ptr<!cir.ptr<!void>>) -> !s32i
+
+
+  // LLVM-LABEL: test_setjmp
+  // LLVM-SAME: (ptr{{.*}}[[ENV:%.*]])
+  // LLVM-NEXT: [[FA:%[0-9]+]] = {{.*}}@llvm.frameaddress.p0(i32 0) 
+  // LLVM-NEXT: store ptr [[FA]], ptr [[ENV]]
+  // LLVM-NEXT: [[SS:%[0-9]+]] = {{.*}}@llvm.stacksave.p0() 
+  // LLVM-NEXT: [[GEP:%[0-9]+]] = getelementptr i8, ptr [[ENV]], i64 16
+  // LLVM-NEXT: store ptr [[SS]], ptr [[GEP]]
+  // LLVM-NEXT: @llvm.eh.sjlj.setjmp(ptr{{.*}}[[ENV]])
+  
+  // OGCG-LABEL: test_setjmp
+  // OGCG-SAME: (ptr{{.*}}[[ENV:%.*]])
+  // OGCG: [[FA:%.*]] = {{.*}}@llvm.frameaddress.p0(i32 0) 
+  // OGCG-NEXT: store ptr [[FA]], ptr [[ENV]]
+  // OGCG-NEXT: [[SS:%.*]] = {{.*}}@llvm.stacksave.p0() 
+  // OGCG-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[ENV]], i64 16
+  // OGCG-NEXT: store ptr [[SS]], ptr [[GEP]]
+  // OGCG-NEXT: @llvm.eh.sjlj.setjmp(ptr{{.*}}[[ENV]])
+  __builtin_setjmp(env);
+}
+
+extern int _setjmp(void *env);
+void test_setjmp2(void *env) {
+  // CIR-LABEL: test_setjmp2
+  // CIR-SAME: [[ENV:%.*]]:
+  // CIR-NEXT: [[ENV_ALLOCA]] = cir.alloca
+  // CIR-NEXT: cir.store [[ENV]], [[ENV_ALLOCA]]
+  // CIR-NEXT: [[ENV_LOAD:%.*]] = cir.load align(8) [[ENV_ALLOCA]]
+  // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[ENV_LOAD]]
+  // CIR-NEXT: cir.eh.setjmp [[CAST]] : (!cir.ptr<!cir.ptr<!void>>) -> !s32i
+
+
+  // LLVM-LABEL: test_setjmp2
+  // LLVM-SAME: (ptr{{.*}}[[ENV:%.*]])
+  // LLVM-NEXT: call i32 @_setjmp(ptr [[ENV]])
+  //
+  // OGCG-LABEL: test_setjmp2
+  // OGCG-SAME: (ptr{{.*}}[[ENV:%.*]])
+  // OGCG: call i32 @_setjmp(ptr noundef [[ENV]])
+  _setjmp (env);
+}
+
+void test_longjmp(void *env) {
+  // CIR-LABEL: test_longjmp
+  // CIR-SAME: [[ENV:%.*]]: 
+  // CIR-NEXT: [[ENV_ALLOCA:%[0-9]+]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>,
+  // CIR-NEXT: cir.store [[ENV]], [[ENV_ALLOCA]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // CIR-NEXT: [[ENV_LOAD:%[0-9]+]] = cir.load align(8) [[ENV_ALLOCA]]
+  // CIR-NEXT: [[CAST:%[0-9]+]] = cir.cast bitcast [[ENV_LOAD]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!void>>
+  // CIR-NEXT: cir.eh.longjmp [[CAST]] : !cir.ptr<!cir.ptr<!void>>
+  // CIR-NEXT: cir.unreachable
+
+
+  // LLVM-LABEL: test_longjmp
+  // LLVM: @llvm.eh.sjlj.longjmp
+  // LLVM-NEXT: unreachable
+  
+  // OGCG-LABEL: test_longjmp
+  // OGCG: @llvm.eh.sjlj.longjmp
+  // OGCG-NEXT: unreachable
+  __builtin_longjmp(env, 1);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-signbit.c b/clang/test/CIR/Incubator/CodeGen/builtin-signbit.c
new file mode 100644
index 0000000000000..99c2b09d26e54
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-signbit.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+void test_signbit_float(float val) {
+    // CIR-LABEL: test_signbit_float
+    // CIR: %{{.+}} = cir.signbit %{{.+}} : !cir.float -> !cir.bool
+    // LLVM-LABEL: test_signbit_float
+    // LLVM: [[TMP1:%.*]] = bitcast float %{{.+}} to i32
+    // LLVM: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+    if (__builtin_signbit(val)) {};
+}
+
+void test_signbit_double(double val) {
+    // CIR-LABEL: test_signbit_double
+    // CIR: %{{.+}} = cir.signbit %{{.+}} : !cir.float -> !cir.bool
+    // LLVM-LABEL: test_signbit_double
+    // LLVM: [[CONV:%.*]] = fptrunc double %{{.+}} to float
+    // LLVM: [[TMP1:%.*]] = bitcast float [[CONV]] to i32
+    // LLVM: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0
+    if (__builtin_signbitf(val)) {}
+}
+
+void test_signbit_long_double(long double val) {
+    // CIR: test_signbit_long_double
+    // LLVM: test_signbit_long_double
+    if (__builtin_signbitl(val)) {}
+    // CIR: %{{.+}} = cir.signbit %{{.+}} : !cir.long_double<!cir.f80> -> !cir.bool
+    // LLVM: [[TMP1:%.*]] = bitcast x86_fp80 %{{.+}} to i80
+    // LLVM: [[TMP2:%.*]] = icmp slt i80 [[TMP1]], 0
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-types.c b/clang/test/CIR/Incubator/CodeGen/builtin-types.c
new file mode 100644
index 0000000000000..d6229cb3a900c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-types.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2-256 -fclangir -emit-cir -o %t.cir
+// RUN: FileCheck --check-prefix=CIR-CHECK --input-file=%t.cir %s
+
+// CIR-CHECK: !cir.vector<!s16i x 8>
+#include <emmintrin.h>
+int A() { __m128i h = _mm_srli_epi16(h, 0); }
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-x86-pshufd.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-x86-pshufd.cpp
new file mode 100644
index 0000000000000..e956e71a9378a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-x86-pshufd.cpp
@@ -0,0 +1,113 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test that __builtin_ia32_pshufd and __builtin_ia32_vpermilp generates correct CIR vec.shuffle operations
+// This verifies the fix for SIMD intrinsic support that was previously NYI
+
+typedef int __v4si __attribute__((__vector_size__(16)));
+typedef float __v4sf __attribute__((__vector_size__(16)));
+typedef double __v2df __attribute__((__vector_size__(16)));
+typedef float __v8sf __attribute__((__vector_size__(32)));
+typedef double __v4df __attribute__((__vector_size__(32)));
+typedef float __v16sf __attribute__((__vector_size__(64)));
+typedef double __v8df __attribute__((__vector_size__(64)));
+
+typedef __v4si __m128i;
+typedef __v4sf __m128;
+typedef __v2df __m128d;
+typedef __v8sf __m256;
+typedef __v4df __m256d;
+typedef __v16sf __m512;
+typedef __v8df __m512d;
+
+// CHECK-LABEL: @_Z11test_pshufdv
+void test_pshufd() {
+    __m128i vec = {1, 2, 3, 4};
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 4>
+    __m128i result = __builtin_ia32_pshufd(vec, 0x4E);
+}
+
+// CHECK-LABEL: @_Z19test_different_maskv  
+void test_different_mask() {
+    __m128i vec = {10, 20, 30, 40};
+    // Test different immediate value: 0x1B = 00011011 = [3,2,1,0] reversed
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!s32i x 4>
+    __m128i result = __builtin_ia32_pshufd(vec, 0x1B);
+}
+
+// CHECK-LABEL: @_Z9test_casev
+void test_case() {
+    __m128i p0 = {1, 2, 3, 4};
+    
+    // This reproduces the exact pattern from stb_image.h:2685 that was failing:
+    // _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e));
+    // Which expands to: __builtin_ia32_pshufd(p0, 0x4e)
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s32i x 4>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 4>
+    __m128i out_vec = __builtin_ia32_pshufd(p0, 0x4e);
+}
+
+// CHECK-LABEL: @_Z15test_vpermilps4v
+void test_vpermilps4() {
+    __m128 vec = {1.0f, 2.0f, 3.0f, 4.0f};
+    // vpermilps with immediate 0x4E = 01001110 = [1,3,2,0] for 4 elements
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} :  !cir.vector<!cir.float x 4>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 4>  
+    __m128 result = __builtin_ia32_vpermilps(vec, 0x4E);
+}
+
+// CHECK-LABEL: @_Z15test_vpermilpd2v
+void test_vpermilpd2() {
+    __m128d vec = {1.0, 2.0};
+    // vpermilpd with immediate 0x1 = 01 = [1,0] for 2 elements
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 2>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.double x 2>
+    __m128d result = __builtin_ia32_vpermilpd(vec, 0x1);
+}
+
+// CHECK-LABEL: @_Z17test_vpermilps256v
+void test_vpermilps256() {
+    __m256 vec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
+    // vpermilps256 with immediate 0x1B = 00011011 = [3,2,1,0] for each 128-bit lane
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 8>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i] : !cir.vector<!cir.float x 8>
+    __m256 result = __builtin_ia32_vpermilps256(vec, 0x1B);
+}
+
+// CHECK-LABEL: @_Z17test_vpermilpd256v
+void test_vpermilpd256() {
+    __m256d vec = {1.0, 2.0, 3.0, 4.0};
+    // vpermilpd256 with immediate 0x5 = 0101 = [1,0,1,0] for 4 elements
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 4>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!cir.double x 4> 
+    __m256d result = __builtin_ia32_vpermilpd256(vec, 0x5);
+}
+
+// CHECK-LABEL: @_Z17test_vpermilps512v
+void test_vpermilps512() {
+    __m512 vec = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 
+                  9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f};
+    // vpermilps512 with immediate 0x4E = 01001110 = [1,3,2,0] for each 128-bit lane
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 16>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i] : !cir.vector<!cir.float x 16>
+    __m512 result = __builtin_ia32_vpermilps512(vec, 0x4E);
+}
+
+// CHECK-LABEL: @_Z17test_vpermilpd512v
+void test_vpermilpd512() {
+    __m512d vec = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
+    // vpermilpd512 with immediate 0x55 = 01010101 = [1,0,1,0,1,0,1,0] for 8 elements
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 8>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i] : !cir.vector<!cir.double x 8> 
+    __m512d result = __builtin_ia32_vpermilpd512(vec, 0x55);
+}
+
+// Test different immediate values
+// CHECK-LABEL: @_Z24test_vpermilps_differentv
+void test_vpermilps_different() {
+    __m128 vec = {10.0f, 20.0f, 30.0f, 40.0f};
+    // Test different immediate value: 0x1B = 00011011 = [3,2,1,0] reversed
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.float x 4>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.float x 4> 
+    __m128 result = __builtin_ia32_vpermilps(vec, 0x1B);
+}
+
+// CHECK-LABEL: @_Z24test_vpermilpd_differentv
+void test_vpermilpd_different() {
+    __m128d vec = {100.0, 200.0};
+    // Test immediate 0x0 = 00 = [0,0] - duplicate first element
+    // CHECK: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.double x 2>) [#cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!cir.double x 2>
+    __m128d result = __builtin_ia32_vpermilpd(vec, 0x0);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-x86-pslldqi.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-x86-pslldqi.cpp
new file mode 100644
index 0000000000000..9cc009c5db795
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-x86-pslldqi.cpp
@@ -0,0 +1,176 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -target-feature +avx512f -target-feature +avx512bw
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -target-feature +avx512f -target-feature +avx512bw
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll -target-feature +avx512f -target-feature +avx512bw
+// RUN: FileCheck --input-file=%t.og.ll %s -check-prefix=OGCG
+
+// Tests PSLLDQI byte shift intrinsics implementation in ClangIR
+// Compares CIR emission, LLVM lowering, and original CodeGen output
+
+typedef long long __m128i __attribute__((__vector_size__(16)));
+typedef long long __m256i __attribute__((__vector_size__(32)));
+typedef long long __m512i __attribute__((__vector_size__(64)));
+
+// ============================================================================
+// Core Functionality Tests
+// ============================================================================
+
+// CIR-LABEL: @_Z22test_pslldqi128_shift4Dv2_x
+// LLVM-LABEL: @_Z22test_pslldqi128_shift4Dv2_x
+// OGCG-LABEL: @_Z22test_pslldqi128_shift4Dv2_x
+__m128i test_pslldqi128_shift4(__m128i a) {
+    // Should shift left by 4 bytes, filling with zeros
+    // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 16>) [#cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i] : !cir.vector<!s8i x 16>
+    // LLVM: %{{.*}} = shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+    // OGCG: %{{.*}} = shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+    return __builtin_ia32_pslldqi128_byteshift(a, 4);
+}
+
+// CIR-LABEL: @_Z22test_pslldqi128_shift0Dv2_x
+// LLVM-LABEL: @_Z22test_pslldqi128_shift0Dv2_x
+// OGCG-LABEL: @_Z22test_pslldqi128_shift0Dv2_x
+__m128i test_pslldqi128_shift0(__m128i a) {
+    // Should return input unchanged (shift by 0)
+    // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 16>) [#cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i, #cir.int<28> : !s32i, #cir.int<29> : !s32i, #cir.int<30> : !s32i, #cir.int<31> : !s32i] : !cir.vector<!s8i x 16>
+    // LLVM: %{{.*}} = shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+    // OGCG: %{{.*}} = shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+    return __builtin_ia32_pslldqi128_byteshift(a, 0);
+}
+
+// CIR-LABEL: @_Z23test_pslldqi128_shift16Dv2_x
+// LLVM-LABEL: @_Z23test_pslldqi128_shift16Dv2_x
+// OGCG-LABEL: @_Z23test_pslldqi128_shift16Dv2_x
+__m128i test_pslldqi128_shift16(__m128i a) {
+    // Entire vector shifted out, should return zero
+    // CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!s8i x 16>
+    // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+    // LLVM: store <2 x i64> zeroinitializer, ptr %{{.*}}, align 16
+    // OGCG: ret <2 x i64> zeroinitializer
+    return __builtin_ia32_pslldqi128_byteshift(a, 16);
+}
+
+// ============================================================================
+// 256-bit Tests (Two Independent 128-bit Lanes)
+// ============================================================================
+
+// CIR-LABEL: @_Z22test_pslldqi256_shift4Dv4_x
+// LLVM-LABEL: @_Z22test_pslldqi256_shift4Dv4_x
+// OGCG-LABEL: @_Z22test_pslldqi256_shift4Dv4_x
+__m256i test_pslldqi256_shift4(__m256i a) {
+    // Each 128-bit lane shifts independently by 4 bytes
+    // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 32>) [#cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<32> : !s32i, #cir.int<33> : !s32i, #cir.int<34> : !s32i, #cir.int<35> : !s32i, #cir.int<36> : !s32i, #cir.int<37> : !s32i, #cir.int<38> : !s32i, #cir.int<39> : !s32i, #cir.int<40> : !s32i, #cir.int<41> : !s32i, #cir.int<42> : !s32i, #cir.int<43> : !s32i, #cir.int<28> : !s32i, #cir.int<29> : !s32i, #cir.int<30> : !s32i, #cir.int<31> : !s32i, #cir.int<48> : !s32i, #cir.int<49> : !s32i, #cir.int<50> : !s32i, #cir.int<51> : !s32i, #cir.int<52> : !s32i, #cir.int<53> : !s32i, #cir.int<54> : !s32i, #cir.int<55> : !s32i, #cir.int<56> : !s32i, #cir.int<57> : !s32i, #cir.int<58> : !s32i, #cir.int<59> : !s32i] : !cir.vector<!s8i x 32>
+    // LLVM: %{{.*}} = shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}}, <32 x i32> <i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59>
+    // OGCG: %{{.*}} = shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}}, <32 x i32> <i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59>
+    return __builtin_ia32_pslldqi256_byteshift(a, 4);
+}
+
+// CIR-LABEL: @_Z23test_pslldqi256_shift16Dv4_x
+// LLVM-LABEL: @_Z23test_pslldqi256_shift16Dv4_x
+// OGCG-LABEL: @_Z23test_pslldqi256_shift16Dv4_x
+__m256i test_pslldqi256_shift16(__m256i a) {
+    // Both lanes completely shifted out, returns zero
+    // CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!s8i x 32>
+    // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s8i x 32> -> !cir.vector<!s64i x 4>
+    // LLVM: store <4 x i64> zeroinitializer, ptr %{{.*}}, align 32
+    // OGCG: ret <4 x i64> zeroinitializer
+    return __builtin_ia32_pslldqi256_byteshift(a, 16);
+}
+
+// ============================================================================
+// 512-bit Tests (Four Independent 128-bit Lanes)
+// ============================================================================
+
+// CIR-LABEL: @_Z22test_pslldqi512_shift4Dv8_x
+// LLVM-LABEL: @_Z22test_pslldqi512_shift4Dv8_x
+// OGCG-LABEL: @_Z22test_pslldqi512_shift4Dv8_x
+__m512i test_pslldqi512_shift4(__m512i a) {
+    // All 4 lanes shift independently by 4 bytes
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 64>)
+    // LLVM: shufflevector <64 x i8> zeroinitializer, <64 x i8> %{{.*}}, <64 x i32> <i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123>
+    // OGCG: shufflevector <64 x i8> zeroinitializer, <64 x i8> %{{.*}}, <64 x i32> <i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 75, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 91, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 107, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122, i32 123>
+    return __builtin_ia32_pslldqi512_byteshift(a, 4);
+}
+
+// CIR-LABEL: @_Z23test_pslldqi512_shift16Dv8_x
+// LLVM-LABEL: @_Z23test_pslldqi512_shift16Dv8_x
+// OGCG-LABEL: @_Z23test_pslldqi512_shift16Dv8_x
+__m512i test_pslldqi512_shift16(__m512i a) {
+    // All 4 lanes completely cleared
+    // CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!s8i x 64>
+    // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s8i x 64> -> !cir.vector<!s64i x 8>
+    // LLVM: store <8 x i64> zeroinitializer, ptr %{{.*}}, align 64
+    // OGCG: ret <8 x i64> zeroinitializer
+    return __builtin_ia32_pslldqi512_byteshift(a, 16);
+}
+
+// ============================================================================
+// Input-Output Verification Tests
+// ============================================================================
+
+// Test with specific input values to verify correct data transformation
+// CIR-LABEL: @_Z26test_input_output_shift4_1Dv2_x
+// LLVM-LABEL: @_Z26test_input_output_shift4_1Dv2_x
+// OGCG-LABEL: @_Z26test_input_output_shift4_1Dv2_x
+__m128i test_input_output_shift4_1(__m128i a) {
+    // Input:  [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] (bytes)
+    // Shift left by 4 bytes (insert 4 zeros at start)
+    // Output: [0, 0, 0, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4] (bytes)
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 16>) [#cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i] : !cir.vector<!s8i x 16>
+    // LLVM: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+    // OGCG: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+    return __builtin_ia32_pslldqi128_byteshift(a, 4);
+}
+
+// Test 256-bit lane independence with specific input pattern
+// CIR-LABEL: @_Z34test_input_output_256_independenceDv4_x
+// LLVM-LABEL: @_Z34test_input_output_256_independenceDv4_x
+// OGCG-LABEL: @_Z34test_input_output_256_independenceDv4_x
+__m256i test_input_output_256_independence(__m256i a) {
+    // Input: Two 128-bit lanes, each with pattern [15,14,13,...,2,1,0]
+    // Lane 0: [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    // Lane 1: [31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16]
+    // After shift by 4 bytes:
+    // Lane 0: [0, 0, 0, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4]
+    // Lane 1: [0, 0, 0, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20]
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 32>)
+    // LLVM: shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}}, <32 x i32> <i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59>
+    // OGCG: shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}}, <32 x i32> <i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59>
+    return __builtin_ia32_pslldqi256_byteshift(a, 4);
+}
+
+// ============================================================================
+// Edge Cases
+// ============================================================================
+
+// Test with concrete constant values to verify exact transformation
+// CIR-LABEL: @_Z28test_concrete_input_constantv
+// LLVM-LABEL: @_Z28test_concrete_input_constantv
+// OGCG-LABEL: @_Z28test_concrete_input_constantv
+__m128i test_concrete_input_constant() {
+    // Create a known input pattern: 0x0F0E0D0C0B0A09080706050403020100
+    // This represents bytes [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+    __m128i input = (__m128i){0x0706050403020100LL, 0x0F0E0D0C0B0A0908LL};
+
+    // Shift left by 4 bytes - should produce: 0x0B0A090807060504030201000000000
+    // This represents bytes [0,0,0,0,15,14,13,12,11,10,9,8,7,6,5,4]
+    __m128i result = __builtin_ia32_pslldqi128_byteshift(input, 4);
+
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 16>) [#cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<20> : !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i] : !cir.vector<!s8i x 16>
+    // LLVM: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+    // OGCG: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+
+    return result;
+}
+
+// CIR-LABEL: @_Z22test_large_shift_valueDv2_x
+// LLVM-LABEL: @_Z22test_large_shift_valueDv2_x
+// OGCG-LABEL: @_Z22test_large_shift_valueDv2_x
+__m128i test_large_shift_value(__m128i a) {
+    // 240 & 0xFF = 240, so this should return zero (240 > 16)
+    // CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!s8i x 16>
+    // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+    // LLVM: store <2 x i64> zeroinitializer, ptr %{{.*}}, align 16
+    // OGCG: ret <2 x i64> zeroinitializer
+    return __builtin_ia32_pslldqi128_byteshift(a, 240);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtin-x86-psrldqi.cpp b/clang/test/CIR/Incubator/CodeGen/builtin-x86-psrldqi.cpp
new file mode 100644
index 0000000000000..ab30773714ac3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtin-x86-psrldqi.cpp
@@ -0,0 +1,177 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -target-feature +avx512f -target-feature +avx512bw
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -target-feature +avx512f -target-feature +avx512bw
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll -target-feature +avx512f -target-feature +avx512bw
+// RUN: FileCheck --input-file=%t.og.ll %s -check-prefix=OGCG
+
+// Tests PSRLDQI byte shift intrinsics implementation in ClangIR
+// Compares CIR emission, LLVM lowering, and original CodeGen output
+
+typedef long long __m128i __attribute__((__vector_size__(16)));
+typedef long long __m256i __attribute__((__vector_size__(32)));
+typedef long long __m512i __attribute__((__vector_size__(64)));
+
+// ============================================================================
+// Core Functionality Tests
+// ============================================================================
+
+// CIR-LABEL: @_Z22test_psrldqi128_shift4Dv2_x
+// LLVM-LABEL: @_Z22test_psrldqi128_shift4Dv2_x
+// OGCG-LABEL: @_Z22test_psrldqi128_shift4Dv2_x
+__m128i test_psrldqi128_shift4(__m128i a) {
+    // Should shift right by 4 bytes, filling with zeros from the left
+    // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 16>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i] : !cir.vector<!s8i x 16>
+    // LLVM: %{{.*}} = shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+    // OGCG: %{{.*}} = shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+    return __builtin_ia32_psrldqi128_byteshift(a, 4);
+}
+
+// CIR-LABEL: @_Z22test_psrldqi128_shift0Dv2_x
+// LLVM-LABEL: @_Z22test_psrldqi128_shift0Dv2_x
+// OGCG-LABEL: @_Z22test_psrldqi128_shift0Dv2_x
+__m128i test_psrldqi128_shift0(__m128i a) {
+    // Should return input unchanged (shift by 0)
+    // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 16>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<!s8i x 16>
+    // LLVM: %{{.*}} = shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+    // OGCG: %{{.*}} = shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+    return __builtin_ia32_psrldqi128_byteshift(a, 0);
+}
+
+// CIR-LABEL: @_Z23test_psrldqi128_shift16Dv2_x
+// LLVM-LABEL: @_Z23test_psrldqi128_shift16Dv2_x
+// OGCG-LABEL: @_Z23test_psrldqi128_shift16Dv2_x
+__m128i test_psrldqi128_shift16(__m128i a) {
+    // Entire vector shifted out, should return zero
+    // CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!s8i x 16>
+    // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+    // LLVM: store <2 x i64> zeroinitializer, ptr %{{.*}}, align 16
+    // OGCG: ret <2 x i64> zeroinitializer
+    return __builtin_ia32_psrldqi128_byteshift(a, 16);
+}
+
+// ============================================================================
+// 256-bit Tests (Two Independent 128-bit Lanes)
+// ============================================================================
+
+// CIR-LABEL: @_Z22test_psrldqi256_shift8Dv4_x
+// LLVM-LABEL: @_Z22test_psrldqi256_shift8Dv4_x
+// OGCG-LABEL: @_Z22test_psrldqi256_shift8Dv4_x
+__m256i test_psrldqi256_shift8(__m256i a) {
+    // Each 128-bit lane shifts independently by 8 bytes
+    // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 32>) [#cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<32> : !s32i, #cir.int<33> : !s32i, #cir.int<34> : !s32i, #cir.int<35> : !s32i, #cir.int<36> : !s32i, #cir.int<37> : !s32i, #cir.int<38> : !s32i, #cir.int<39> : !s32i, #cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> : !s32i, #cir.int<28> : !s32i, #cir.int<29> : !s32i, #cir.int<30> : !s32i, #cir.int<31> : !s32i, #cir.int<48> : !s32i, #cir.int<49> : !s32i, #cir.int<50> : !s32i, #cir.int<51> : !s32i, #cir.int<52> : !s32i, #cir.int<53> : !s32i, #cir.int<54> : !s32i, #cir.int<55> : !s32i] : !cir.vector<!s8i x 32>
+    // LLVM: %{{.*}} = shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55>
+    // OGCG: %{{.*}} = shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55>
+    return __builtin_ia32_psrldqi256_byteshift(a, 8);
+}
+
+// CIR-LABEL: @_Z23test_psrldqi256_shift16Dv4_x
+// LLVM-LABEL: @_Z23test_psrldqi256_shift16Dv4_x
+// OGCG-LABEL: @_Z23test_psrldqi256_shift16Dv4_x
+__m256i test_psrldqi256_shift16(__m256i a) {
+    // Both lanes completely shifted out, returns zero
+    // CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!s8i x 32>
+    // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s8i x 32> -> !cir.vector<!s64i x 4>
+    // LLVM: store <4 x i64> zeroinitializer, ptr %{{.*}}, align 32
+    // OGCG: ret <4 x i64> zeroinitializer
+    return __builtin_ia32_psrldqi256_byteshift(a, 16);
+}
+
+// ============================================================================
+// 512-bit Tests (Four Independent 128-bit Lanes)
+// ============================================================================
+
+// CIR-LABEL: @_Z22test_psrldqi512_shift4Dv8_x
+// LLVM-LABEL: @_Z22test_psrldqi512_shift4Dv8_x
+// OGCG-LABEL: @_Z22test_psrldqi512_shift4Dv8_x
+__m512i test_psrldqi512_shift4(__m512i a) {
+    // All 4 lanes shift independently by 4 bytes
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 64>)
+    // LLVM: shufflevector <64 x i8> %{{.*}}, <64 x i8> zeroinitializer, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115>
+    // OGCG: shufflevector <64 x i8> %{{.*}}, <64 x i8> zeroinitializer, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115>
+    return __builtin_ia32_psrldqi512_byteshift(a, 4);
+}
+
+// CIR-LABEL: @_Z23test_psrldqi512_shift16Dv8_x
+// LLVM-LABEL: @_Z23test_psrldqi512_shift16Dv8_x
+// OGCG-LABEL: @_Z23test_psrldqi512_shift16Dv8_x
+__m512i test_psrldqi512_shift16(__m512i a) {
+    // All 4 lanes completely cleared
+    // CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!s8i x 64>
+    // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s8i x 64> -> !cir.vector<!s64i x 8>
+    // LLVM: store <8 x i64> zeroinitializer, ptr %{{.*}}, align 64
+    // OGCG: ret <8 x i64> zeroinitializer
+    return __builtin_ia32_psrldqi512_byteshift(a, 16);
+}
+
+// ============================================================================
+// Input-Output Verification Tests
+// ============================================================================
+
+// Test with specific input values to verify correct data transformation
+// CIR-LABEL: @_Z26test_input_output_shift4_1Dv2_x
+// LLVM-LABEL: @_Z26test_input_output_shift4_1Dv2_x
+// OGCG-LABEL: @_Z26test_input_output_shift4_1Dv2_x
+__m128i test_input_output_shift4_1(__m128i a) {
+    // Input:  [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] (bytes)
+    // Shift right by 4 bytes (insert 4 zeros at end)
+    // Output: [11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0] (bytes)
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 16>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i] : !cir.vector<!s8i x 16>
+    // LLVM: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+    // OGCG: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+    return __builtin_ia32_psrldqi128_byteshift(a, 4);
+}
+
+// Test 256-bit lane independence with specific input pattern
+// CIR-LABEL: @_Z34test_input_output_256_independenceDv4_x
+// LLVM-LABEL: @_Z34test_input_output_256_independenceDv4_x
+// OGCG-LABEL: @_Z34test_input_output_256_independenceDv4_x
+__m256i test_input_output_256_independence(__m256i a) {
+    // Input: Two 128-bit lanes, each with pattern [15,14,13,...,2,1,0]
+    // Lane 0: [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+    // Lane 1: [31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16]
+    // After shift by 8 bytes:
+    // Lane 0: [7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+    // Lane 1: [23, 22, 21, 20, 19, 18, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0]
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 32>)
+    // LLVM: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55>
+    // OGCG: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55>
+    return __builtin_ia32_psrldqi256_byteshift(a, 8);
+}
+
+// ============================================================================
+// Edge Cases
+// ============================================================================
+
+// Test with concrete constant values to verify exact transformation
+// CIR-LABEL: @_Z28test_concrete_input_constantv
+// LLVM-LABEL: @_Z28test_concrete_input_constantv
+// OGCG-LABEL: @_Z28test_concrete_input_constantv
+__m128i test_concrete_input_constant() {
+    // Create a known input pattern: 0x0F0E0D0C0B0A09080706050403020100
+    // This represents bytes [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+    __m128i input = (__m128i){0x0706050403020100LL, 0x0F0E0D0C0B0A0908LL};
+
+    // Shift right by 4 bytes - should produce: 0x00000000000000000F0E0D0C0B0A0908
+    // This represents bytes [11,10,9,8,7,6,5,4,3,2,1,0,0,0,0,0]
+    __m128i result = __builtin_ia32_psrldqi128_byteshift(input, 4);
+
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!s8i x 16>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<16> : !s32i, #cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i] : !cir.vector<!s8i x 16>
+    // LLVM: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+    // OGCG: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+
+    return result;
+}
+
+// CIR-LABEL: @_Z22test_large_shift_valueDv2_x
+// LLVM-LABEL: @_Z22test_large_shift_valueDv2_x
+// OGCG-LABEL: @_Z22test_large_shift_valueDv2_x
+__m128i test_large_shift_value(__m128i a) {
+    // 240 & 0xFF = 240, so this should return zero (240 > 16)
+    // CIR: %{{.*}} = cir.const #cir.zero : !cir.vector<!s8i x 16>
+    // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!s8i x 16> -> !cir.vector<!s64i x 2>
+    // LLVM: store <2 x i64> zeroinitializer, ptr %{{.*}}, align 16
+    // OGCG: ret <2 x i64> zeroinitializer
+    return __builtin_ia32_psrldqi128_byteshift(a, 240);
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/builtins-elementwise.c b/clang/test/CIR/Incubator/CodeGen/builtins-elementwise.c
new file mode 100644
index 0000000000000..39699b6cec83c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtins-elementwise.c
@@ -0,0 +1,416 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -emit-cir %s -o %t.cir  
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:  -emit-llvm  %s -o %t.ll
+// RUN: FileCheck  --check-prefix=LLVM --input-file=%t.ll %s
+
+typedef int vint4 __attribute__((ext_vector_type(4)));
+typedef float vfloat4 __attribute__((ext_vector_type(4)));
+typedef double vdouble4 __attribute__((ext_vector_type(4)));
+
+void test_builtin_elementwise_abs(vint4 vi4, int i, float f, double d, 
+                                  vfloat4 vf4, vdouble4  vd4) {
+    // CIR-LABEL: test_builtin_elementwise_abs
+    // LLVM-LABEL: test_builtin_elementwise_abs
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.float
+    // LLVM: {{%.*}} = call float @llvm.fabs.f32(float {{%.*}})
+    f = __builtin_elementwise_abs(f);
+
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.double
+    // LLVM: {{%.*}} = call double @llvm.fabs.f64(double {{%.*}})
+    d = __builtin_elementwise_abs(d);
+
+    // CIR: {{%.*}} = cir.abs {{%.*}} : !cir.vector<!s32i x 4>
+    // LLVM: {{%.*}} = call <4 x i32> @llvm.abs.v4i32(<4 x i32> {{%.*}}, i1 false)
+    vi4 = __builtin_elementwise_abs(vi4);
+
+    // CIR: {{%.*}} = cir.abs {{%.*}} : !s32
+    // LLVM: {{%.*}} = call i32 @llvm.abs.i32(i32 {{%.*}}, i1 false)
+    i = __builtin_elementwise_abs(i);
+
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.vector<!cir.float x 4>
+    // LLVM: {{%.*}} = call <4 x float> @llvm.fabs.v4f32(<4 x float> {{%.*}})
+    vf4 = __builtin_elementwise_abs(vf4);
+
+    // CIR: {{%.*}} = cir.fabs {{%.*}} : !cir.vector<!cir.double x 4>
+    // LLVM: {{%.*}} = call <4 x double> @llvm.fabs.v4f64(<4 x double> {{%.*}})
+    vd4 = __builtin_elementwise_abs(vd4);
+}
+
+void test_builtin_elementwise_acos(float f, double d, vfloat4 vf4,
+                                   vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_acos
+  // LLVM-LABEL: test_builtin_elementwise_acos
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.acos.f32(float {{%.*}})
+  f = __builtin_elementwise_acos(f);
+
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.acos.f64(double {{%.*}})
+  d = __builtin_elementwise_acos(d);
+
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.acos.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_acos(vf4);
+
+  // CIR: {{%.*}} = cir.acos {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.acos.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_acos(vd4);
+}
+
+void test_builtin_elementwise_asin(float f, double d, vfloat4 vf4,
+  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_asin
+  // LLVM-LABEL: test_builtin_elementwise_asin
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.asin.f32(float {{%.*}})
+  f = __builtin_elementwise_asin(f);
+
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.asin.f64(double {{%.*}})
+  d = __builtin_elementwise_asin(d);
+
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.asin.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_asin(vf4);
+
+  // CIR: {{%.*}} = cir.asin {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.asin.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_asin(vd4);
+}
+
+void test_builtin_elementwise_atan(float f, double d, vfloat4 vf4,
+  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_atan
+  // LLVM-LABEL: test_builtin_elementwise_atan
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.atan.f32(float {{%.*}})
+  f = __builtin_elementwise_atan(f);
+
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.atan.f64(double {{%.*}})
+  d = __builtin_elementwise_atan(d);
+
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.atan.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_atan(vf4);
+
+  // CIR: {{%.*}} = cir.atan {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.atan.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_atan(vd4);
+}
+
+void test_builtin_elementwise_atan2(float f, double d, vfloat4 vf4,
+  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_atan2
+  // LLVM-LABEL: test_builtin_elementwise_atan2
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.atan2.f32(float {{%.*}}, float {{%.*}})
+  f = __builtin_elementwise_atan2(f, f);
+
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.atan2.f64(double {{%.*}}, double {{%.*}})
+  d = __builtin_elementwise_atan2(d, d);
+
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.atan2.v4f32(<4 x float> {{%.*}}, <4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_atan2(vf4, vf4);
+
+  // CIR: {{%.*}} = cir.atan2 {{%.*}}, {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.atan2.v4f64(<4 x double> {{%.*}}, <4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_atan2(vd4, vd4);
+}
+
+void test_builtin_elementwise_exp(float f, double d, vfloat4 vf4,
+                                  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_exp
+  // LLVM-LABEL: test_builtin_elementwise_exp
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.exp.f32(float {{%.*}})
+  f = __builtin_elementwise_exp(f);
+
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.exp.f64(double {{%.*}})
+  d = __builtin_elementwise_exp(d);
+
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.exp.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_exp(vf4);
+
+  // CIR: {{%.*}} = cir.exp {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.exp.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_exp(vd4);
+}
+
+void test_builtin_elementwise_exp2(float f, double d, vfloat4 vf4,
+                                  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_exp
+  // LLVM-LABEL: test_builtin_elementwise_exp
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.exp2.f32(float {{%.*}})
+  f = __builtin_elementwise_exp2(f);
+
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.exp2.f64(double {{%.*}})
+  d = __builtin_elementwise_exp2(d);
+
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.exp2.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_exp2(vf4);
+
+  // CIR: {{%.*}} = cir.exp2 {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.exp2.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_exp2(vd4);
+}
+
+void test_builtin_elementwise_log(float f, double d, vfloat4 vf4,
+                                  vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_log
+  // LLVM-LABEL: test_builtin_elementwise_log
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.log.f32(float {{%.*}})
+  f = __builtin_elementwise_log(f);
+
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.log.f64(double {{%.*}})
+  d = __builtin_elementwise_log(d);
+
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.log.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_log(vf4);
+
+  // CIR: {{%.*}} = cir.log {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.log.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_log(vd4);
+}
+
+void test_builtin_elementwise_log2(float f, double d, vfloat4 vf4,
+                                    vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_log2
+  // LLVM-LABEL: test_builtin_elementwise_log2
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.log2.f32(float {{%.*}})
+  f = __builtin_elementwise_log2(f);
+
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.log2.f64(double {{%.*}})
+  d = __builtin_elementwise_log2(d);
+
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.log2.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_log2(vf4);
+
+  // CIR: {{%.*}} = cir.log2 {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.log2.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_log2(vd4);
+}
+
+void test_builtin_elementwise_log10(float f, double d, vfloat4 vf4,
+                                     vdouble4  vd4) {
+  // CIR-LABEL: test_builtin_elementwise_log10
+  // LLVM-LABEL: test_builtin_elementwise_log10
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.log10.f32(float {{%.*}})
+  f = __builtin_elementwise_log10(f);
+
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.log10.f64(double {{%.*}})
+  d = __builtin_elementwise_log10(d);
+
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.log10.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_log10(vf4);
+
+  // CIR: {{%.*}} = cir.log10 {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.log10.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_log10(vd4);
+}
+
+void test_builtin_elementwise_cos(float f, double d, vfloat4 vf4,
+                                     vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_cos
+  // LLVM-LABEL: test_builtin_elementwise_cos
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.cos.f32(float {{%.*}})
+  f = __builtin_elementwise_cos(f);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.cos.f64(double {{%.*}})
+  d = __builtin_elementwise_cos(d);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.cos.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_cos(vf4);
+
+  // CIR: {{%.*}} = cir.cos {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.cos.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_cos(vd4);
+}
+
+void test_builtin_elementwise_floor(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_floor
+  // LLVM-LABEL: test_builtin_elementwise_floor
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.floor.f32(float {{%.*}})
+  f = __builtin_elementwise_floor(f);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.floor.f64(double {{%.*}})
+  d = __builtin_elementwise_floor(d);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.floor.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_floor(vf4);
+
+  // CIR: {{%.*}} = cir.floor {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.floor.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_floor(vd4);
+}
+
+void test_builtin_elementwise_round(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_round
+  // LLVM-LABEL: test_builtin_elementwise_round
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.round.f32(float {{%.*}})
+  f = __builtin_elementwise_round(f);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.round.f64(double {{%.*}})
+  d = __builtin_elementwise_round(d);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.round.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_round(vf4);
+
+  // CIR: {{%.*}} = cir.round {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.round.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_round(vd4);
+}
+
+void test_builtin_elementwise_rint(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_rint
+  // LLVM-LABEL: test_builtin_elementwise_rint
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.rint.f32(float {{%.*}})
+  f = __builtin_elementwise_rint(f);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.rint.f64(double {{%.*}})
+  d = __builtin_elementwise_rint(d);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.rint.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_rint(vf4);
+
+  // CIR: {{%.*}} = cir.rint {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.rint.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_rint(vd4);
+}
+
+void test_builtin_elementwise_nearbyint(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_nearbyint
+  // LLVM-LABEL: test_builtin_elementwise_nearbyint
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.nearbyint.f32(float {{%.*}})
+  f = __builtin_elementwise_nearbyint(f);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.nearbyint.f64(double {{%.*}})
+  d = __builtin_elementwise_nearbyint(d);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_nearbyint(vf4);
+
+  // CIR: {{%.*}} = cir.nearbyint {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_nearbyint(vd4);
+}
+
+void test_builtin_elementwise_sin(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_sin
+  // LLVM-LABEL: test_builtin_elementwise_sin
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.sin.f32(float {{%.*}})
+  f = __builtin_elementwise_sin(f);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.sin.f64(double {{%.*}})
+  d = __builtin_elementwise_sin(d);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.sin.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_sin(vf4);
+
+  // CIR: {{%.*}} = cir.sin {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.sin.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_sin(vd4);
+}
+
+void test_builtin_elementwise_sqrt(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_sqrt
+  // LLVM-LABEL: test_builtin_elementwise_sqrt
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.sqrt.f32(float {{%.*}})
+  f = __builtin_elementwise_sqrt(f);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.sqrt.f64(double {{%.*}})
+  d = __builtin_elementwise_sqrt(d);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.sqrt.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_sqrt(vf4);
+
+  // CIR: {{%.*}} = cir.sqrt {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.sqrt.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_sqrt(vd4);
+}
+
+void test_builtin_elementwise_tan(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_tan
+  // LLVM-LABEL: test_builtin_elementwise_tan
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.tan.f32(float {{%.*}})
+  f = __builtin_elementwise_tan(f);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.tan.f64(double {{%.*}})
+  d = __builtin_elementwise_tan(d);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.tan.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_tan(vf4);
+
+  // CIR: {{%.*}} = cir.tan {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.tan.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_tan(vd4);
+}
+
+void test_builtin_elementwise_trunc(float f, double d, vfloat4 vf4,
+                   vdouble4 vd4) {
+  // CIR-LABEL: test_builtin_elementwise_trunc
+  // LLVM-LABEL: test_builtin_elementwise_trunc
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.float
+  // LLVM: {{%.*}} = call float @llvm.trunc.f32(float {{%.*}})
+  f = __builtin_elementwise_trunc(f);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.double
+  // LLVM: {{%.*}} = call double @llvm.trunc.f64(double {{%.*}})
+  d = __builtin_elementwise_trunc(d);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.vector<!cir.float x 4>
+  // LLVM: {{%.*}} = call <4 x float> @llvm.trunc.v4f32(<4 x float> {{%.*}})
+  vf4 = __builtin_elementwise_trunc(vf4);
+
+  // CIR: {{%.*}} = cir.trunc {{%.*}} : !cir.vector<!cir.double x 4>
+  // LLVM: {{%.*}} = call <4 x double> @llvm.trunc.v4f64(<4 x double> {{%.*}})
+  vd4 = __builtin_elementwise_trunc(vd4);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtins-memory.c b/clang/test/CIR/Incubator/CodeGen/builtins-memory.c
new file mode 100644
index 0000000000000..74eba362cc576
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtins-memory.c
@@ -0,0 +1,260 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --check-prefix=CIR --input-file=%t.cir
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - \
+// RUN:  | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck  --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - \
+// RUN:  | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck  --check-prefix=OGCG --input-file=%t.ll %s
+
+typedef __SIZE_TYPE__ size_t;
+void test_memcpy_chk(void *dest, const void *src, size_t n) {
+  // CIR-LABEL: cir.func {{.*}} @test_memcpy_chk
+  // CIR:         %[[#DEST:]] = cir.alloca {{.*}} ["dest", init]
+  // CIR:         %[[#SRC:]] = cir.alloca {{.*}} ["src", init]
+  // CIR:         %[[#N:]] = cir.alloca {{.*}} ["n", init]
+
+  // An unchecked memcpy should be emitted when the count and buffer size are
+  // constants and the count is less than or equal to the buffer size.
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<8>
+  // CIR: cir.libc.memcpy %[[#COUNT]] bytes from %[[#SRC_LOAD]] to %[[#DEST_LOAD]]
+  __builtin___memcpy_chk(dest, src, 8, 10);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<10>
+  // CIR: cir.libc.memcpy %[[#COUNT]] bytes from %[[#SRC_LOAD]] to %[[#DEST_LOAD]]
+  __builtin___memcpy_chk(dest, src, 10, 10);
+
+  // __memcpy_chk should be called when the count is greater than the buffer
+  // size, or when either the count or buffer size isn't a constant.
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<10>
+  // CIR: %[[#SIZE:]] = cir.const #cir.int<8>
+  // CIR: cir.call @__memcpy_chk(%[[#DEST_LOAD]], %[[#SRC_LOAD]], %[[#COUNT]], %[[#SIZE]])
+  __builtin___memcpy_chk(dest, src, 10lu, 8lu);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#N_LOAD:]] = cir.load{{.*}} %[[#N]]
+  // CIR: %[[#SIZE:]] = cir.const #cir.int<10>
+  // CIR: cir.call @__memcpy_chk(%[[#DEST_LOAD]], %[[#SRC_LOAD]], %[[#N_LOAD]], %[[#SIZE]])
+  __builtin___memcpy_chk(dest, src, n, 10lu);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<10>
+  // CIR: %[[#N_LOAD:]] = cir.load{{.*}} %[[#N]]
+  // CIR: cir.call @__memcpy_chk(%[[#DEST_LOAD]], %[[#SRC_LOAD]], %[[#COUNT]], %[[#N_LOAD]])
+  __builtin___memcpy_chk(dest, src, 10lu, n);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#N_LOAD1:]] = cir.load{{.*}} %[[#N]]
+  // CIR: %[[#N_LOAD2:]] = cir.load{{.*}} %[[#N]]
+  // CIR: cir.call @__memcpy_chk(%[[#DEST_LOAD]], %[[#SRC_LOAD]], %[[#N_LOAD1]], %[[#N_LOAD2]])
+  __builtin___memcpy_chk(dest, src, n, n);
+}
+
+void test_memmove_chk(void *dest, const void *src, size_t n) {
+  // CIR-LABEL: cir.func {{.*}} @test_memmove_chk
+  // CIR:         %[[#DEST:]] = cir.alloca {{.*}} ["dest", init]
+  // CIR:         %[[#SRC:]] = cir.alloca {{.*}} ["src", init]
+  // CIR:         %[[#N:]] = cir.alloca {{.*}} ["n", init]
+
+  // LLVM-LABEL: test_memmove_chk
+
+  // An unchecked memmove should be emitted when the count and buffer size are
+  // constants and the count is less than or equal to the buffer size.
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<8>
+  // CIR: cir.libc.memmove %[[#COUNT]] bytes from %[[#SRC_LOAD]] to %[[#DEST_LOAD]]
+  // LLVM: call void @llvm.memmove.p0.p0.i64(ptr {{%.*}}, ptr {{%.*}}, i64 8, i1 false)
+  // COM: LLVM: call void @llvm.memmove.p0.p0.i64(ptr align 1 {{%.*}}, ptr align 1 {{%.*}}, i64 8, i1 false)
+  __builtin___memmove_chk(dest, src, 8, 10);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<10>
+  // CIR: cir.libc.memmove %[[#COUNT]] bytes from %[[#SRC_LOAD]] to %[[#DEST_LOAD]]
+  // LLVM: call void @llvm.memmove.p0.p0.i64(ptr {{%.*}}, ptr {{%.*}}, i64 10, i1 false)
+  // COM: LLVM: call void @llvm.memmove.p0.p0.i64(ptr align 1 {{%.*}}, ptr align 1 {{%.*}}, i64 10, i1 false)
+  __builtin___memmove_chk(dest, src, 10, 10);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<10>
+  // CIR: %[[#SIZE:]] = cir.const #cir.int<8>
+  // CIR: cir.call @__memmove_chk(%[[#DEST_LOAD]], %[[#SRC_LOAD]], %[[#COUNT]], %[[#SIZE]])
+  // LLVM: call ptr @__memmove_chk(ptr {{%.*}}, ptr {{%.*}}, i64 10, i64 8)
+  // COM: LLVM: call ptr @__memmove_chk(ptr noundef %4, ptr noundef %5, i64 noundef 10, i64 noundef 8)
+  __builtin___memmove_chk(dest, src, 10lu, 8lu);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#N_LOAD:]] = cir.load{{.*}} %[[#N]]
+  // CIR: %[[#SIZE:]] = cir.const #cir.int<10>
+  // CIR: cir.call @__memmove_chk(%[[#DEST_LOAD]], %[[#SRC_LOAD]], %[[#N_LOAD]], %[[#SIZE]])
+  // LLVM: call ptr @__memmove_chk(ptr {{%.*}}, ptr {{%.*}}, i64 {{%.*}}, i64 10)
+  // COM: LLVM: call ptr @__memmove_chk(ptr noundef {{%.*}}, ptr noundef {{%.*}}, i64 noundef {{%.*}}, i64 noundef 10)
+  __builtin___memmove_chk(dest, src, n, 10lu);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<10>
+  // CIR: %[[#N_LOAD:]] = cir.load{{.*}} %[[#N]]
+  // CIR: cir.call @__memmove_chk(%[[#DEST_LOAD]], %[[#SRC_LOAD]], %[[#COUNT]], %[[#N_LOAD]])
+  // LLVM: call ptr @__memmove_chk(ptr {{%.*}}, ptr {{%.*}}, i64 10, i64 {{%.*}})
+  // COM: LLVM: call ptr @__memmove_chk(ptr noundef {{%.*}}, ptr noundef {{%.*}}, i64 noundef 10, i64 noundef {{%.*}})
+  __builtin___memmove_chk(dest, src, 10lu, n);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#SRC_LOAD:]] = cir.load{{.*}} %[[#SRC]]
+  // CIR: %[[#N_LOAD1:]] = cir.load{{.*}} %[[#N]]
+  // CIR: %[[#N_LOAD2:]] = cir.load{{.*}} %[[#N]]
+  // CIR: cir.call @__memmove_chk(%[[#DEST_LOAD]], %[[#SRC_LOAD]], %[[#N_LOAD1]], %[[#N_LOAD2]])
+  // LLVM: call ptr @__memmove_chk(ptr {{%.*}}, ptr {{%.*}}, i64 {{%.*}}, i64 {{%.*}})
+  // COM: LLVM: call ptr @__memmove_chk(ptr noundef {{%.*}}, ptr noundef {{%.*}}, i64 noundef {{%.*}}, i64 noundef {{%.*}})
+  __builtin___memmove_chk(dest, src, n, n);
+}
+
+
+void test_memset_chk(void *dest, int ch, size_t n) {
+  // CIR-LABEL: cir.func {{.*}} @test_memset_chk
+  // CIR:         %[[#DEST:]] = cir.alloca {{.*}} ["dest", init]
+  // CIR:         %[[#CH:]] = cir.alloca {{.*}} ["ch", init]
+  // CIR:         %[[#N:]] = cir.alloca {{.*}} ["n", init]
+
+  // An unchecked memset should be emitted when the count and buffer size are
+  // constants and the count is less than or equal to the buffer size.
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#CH_LOAD:]] = cir.load{{.*}} %[[#CH]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<8>
+  // CIR: cir.libc.memset %[[#COUNT]] bytes from %[[#DEST_LOAD]] set to %[[#CH_LOAD]]
+  __builtin___memset_chk(dest, ch, 8, 10);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#CH_LOAD:]] = cir.load{{.*}} %[[#CH]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<10>
+  // CIR: cir.libc.memset %[[#COUNT]] bytes from %[[#DEST_LOAD]] set to %[[#CH_LOAD]]
+  __builtin___memset_chk(dest, ch, 10, 10);
+
+  // __memset_chk should be called when the count is greater than the buffer
+  // size, or when either the count or buffer size isn't a constant.
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#CH_LOAD:]] = cir.load{{.*}} %[[#CH]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<10>
+  // CIR: %[[#SIZE:]] = cir.const #cir.int<8>
+  // CIR: cir.call @__memset_chk(%[[#DEST_LOAD]], %[[#CH_LOAD]], %[[#COUNT]], %[[#SIZE]])
+  __builtin___memset_chk(dest, ch, 10lu, 8lu);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#CH_LOAD:]] = cir.load{{.*}} %[[#CH]]
+  // CIR: %[[#N_LOAD:]] = cir.load{{.*}} %[[#N]]
+  // CIR: %[[#SIZE:]] = cir.const #cir.int<10>
+  // CIR: cir.call @__memset_chk(%[[#DEST_LOAD]], %[[#CH_LOAD]], %[[#N_LOAD]], %[[#SIZE]])
+  __builtin___memset_chk(dest, ch, n, 10lu);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#CH_LOAD:]] = cir.load{{.*}} %[[#CH]]
+  // CIR: %[[#COUNT:]] = cir.const #cir.int<10>
+  // CIR: %[[#N_LOAD:]] = cir.load{{.*}} %[[#N]]
+  // CIR: cir.call @__memset_chk(%[[#DEST_LOAD]], %[[#CH_LOAD]], %[[#COUNT]], %[[#N_LOAD]])
+  __builtin___memset_chk(dest, ch, 10lu, n);
+
+  // CIR: %[[#DEST_LOAD:]] = cir.load{{.*}} %[[#DEST]]
+  // CIR: %[[#CH_LOAD:]] = cir.load{{.*}} %[[#CH]]
+  // CIR: %[[#N_LOAD1:]] = cir.load{{.*}} %[[#N]]
+  // CIR: %[[#N_LOAD2:]] = cir.load{{.*}} %[[#N]]
+  // CIR: cir.call @__memset_chk(%[[#DEST_LOAD]], %[[#CH_LOAD]], %[[#N_LOAD1]], %[[#N_LOAD2]])
+  __builtin___memset_chk(dest, ch, n, n);
+}
+
+// FIXME: The test should test intrinsic argument alignment, however,
+// currently we lack support for argument attributes.
+// Thus, added `COM: LLVM:` lines so we can easily flip the test
+// when the support of argument attributes is in.
+void test_memcpy_inline(void *dst, const void *src, size_t n) {
+
+  // CIR-LABEL: test_memcpy_inline
+  // CIR: cir.memcpy_inline 0 bytes from {{%.*}} to {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!void>
+
+  // LLVM-LABEL: test_memcpy_inline
+  // LLVM: call void @llvm.memcpy.inline.p0.p0.i64(ptr {{%.*}}, ptr {{%.*}}, i64 0, i1 false)
+  // COM: LLVM: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 {{%.*}}, ptr align 1 {{%.*}}, i64 0, i1 false)
+  __builtin_memcpy_inline(dst, src, 0);
+
+  // CIR: cir.memcpy_inline 1 bytes from {{%.*}} to {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!void>
+
+  // LLVM: call void @llvm.memcpy.inline.p0.p0.i64(ptr {{%.*}}, ptr {{%.*}}, i64 1, i1 false)
+  // COM: LLVM: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 {{%.*}}, ptr align 1 {{%.*}}, i64 1, i1 false)
+  __builtin_memcpy_inline(dst, src, 1);
+
+  // CIR: cir.memcpy_inline 4 bytes from {{%.*}} to {{%.*}} : !cir.ptr<!void> -> !cir.ptr<!void>
+
+  // LLVM: call void @llvm.memcpy.inline.p0.p0.i64(ptr {{%.*}}, ptr {{%.*}}, i64 4, i1 false)
+  // COM: LLVM: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 {{%.*}}, ptr align 1 {{%.*}}, i64 4, i1 false)
+  __builtin_memcpy_inline(dst, src, 4);
+}
+
+void test_memcpy_inline_aligned_buffers(unsigned long long *dst, const unsigned long long *src) {
+
+  // LLVM-LABEL: test_memcpy_inline_aligned_buffers
+  // LLVM: call void @llvm.memcpy.inline.p0.p0.i64(ptr {{%.*}}, ptr {{%.*}}, i64 4, i1 false)
+  // COM: LLVM: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 {{%.*}}, ptr align 8 {{%.*}}, i64 4, i1 false)
+  __builtin_memcpy_inline(dst, src, 4);
+}
+
+void test_memset_inline(void *dst, int val) {
+
+  // CIR-LABEL: test_memset_inline
+  // CIR: cir.memset_inline 0 bytes from {{%.*}} set to {{%.*}} : !cir.ptr<!void>, !s32i
+
+  // LLVM-LABEL: test_memset_inline
+  // LLVM: call void @llvm.memset.inline.p0.i64(ptr {{%.*}}, i8 {{%.*}}, i64 0, i1 false)
+  __builtin_memset_inline(dst, val, 0);
+
+  // CIR: cir.memset_inline 1 bytes from {{%.*}} set to {{%.*}} : !cir.ptr<!void>, !s32i
+
+  // LLVM: call void @llvm.memset.inline.p0.i64(ptr {{%.*}}, i8 {{%.*}}, i64 1, i1 false)
+  __builtin_memset_inline(dst, val, 1);
+
+  // CIR: cir.memset_inline 4 bytes from {{%.*}} set to {{%.*}} : !cir.ptr<!void>, !s32i
+
+  // LLVM: call void @llvm.memset.inline.p0.i64(ptr {{%.*}}, i8 {{%.*}}, i64 4, i1 false)
+  __builtin_memset_inline(dst, val, 4);
+}
+
+void* test_builtin_mempcpy(void *dest, void *src, size_t n) {
+  // CIR-LABEL: test_builtin_mempcpy
+  // CIR: [[ALLOCA:%.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["__retval"]
+  // CIR: cir.libc.memcpy [[NUM:%.*]] bytes from [[S:.*]] to [[DST:.*]] :
+  // CIR: [[CAST2:%.*]] = cir.cast bitcast [[DST]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!u8i>>
+  // CIR: [[GEP:%.*]] = cir.ptr_stride [[CAST2]], [[NUM]] : (!cir.ptr<!cir.ptr<!u8i>>, !u64i) -> !cir.ptr<!cir.ptr<!u8i>>
+  // CIR: [[CAST3:%.*]] = cir.cast bitcast [[ALLOCA]]
+  // CIR: cir.store{{.*}} [[GEP]], [[CAST3:%.*]]
+  // CIR-NEXT: [[LD:%.*]] = cir.load{{.*}} [[ALLOCA]]
+  // CIR-NEXT: cir.return [[LD]]
+ 
+  // LLVM-LABEL: test_builtin_mempcpy
+  // LLVM: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr {{%.*}}, i64 [[NUM:%.*]], i1 false)
+  // LLVM-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[NUM]]
+  // LLVM-NEXT: store ptr [[GEP]], ptr [[P:%.*]] 
+  // LLVM-NEXT: [[LD:%.*]] = load ptr, ptr [[P]]
+  // LLVM-NEXT: ret ptr [[LD]]
+
+  // OGCG-LABEL: test_builtin_mempcpy
+  // OGCG: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DST:%.*]], ptr align 1 {{%.*}}, i64 [[NUM:%.*]], i1 false)
+  // OGCG-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[NUM]]
+  // OGCG-NEXT: ret ptr [[GEP]]
+  return __builtin_mempcpy(dest, src, n);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/builtins-overflow.cpp b/clang/test/CIR/Incubator/CodeGen/builtins-overflow.cpp
new file mode 100644
index 0000000000000..9a9999ad1fbd5
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtins-overflow.cpp
@@ -0,0 +1,364 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --check-prefix=CIR --input-file=%t.cir
+
+bool test_add_overflow_uint_uint_uint(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z32test_add_overflow_uint_uint_uintjjPj
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#LHS]], %[[#RHS]]) : !u32i, (!u32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CIR: }
+
+bool test_add_overflow_int_int_int(int x, int y, int *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z29test_add_overflow_int_int_intiiPi
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#LHS]], %[[#RHS]]) : !s32i, (!s32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CIR: }
+
+bool test_add_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y, _BitInt(31) *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z38test_add_overflow_xint31_xint31_xint31DB31_S_PS_
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!cir.int<s, 31>>>, !cir.ptr<!cir.int<s, 31>>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#LHS]], %[[#RHS]]) : <s, 31>, (<s, 31>, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !cir.int<s, 31>, !cir.ptr<!cir.int<s, 31>>
+//      CIR: }
+
+bool test_sub_overflow_uint_uint_uint(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_sub_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z32test_sub_overflow_uint_uint_uintjjPj
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#LHS]], %[[#RHS]]) : !u32i, (!u32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CIR: }
+
+bool test_sub_overflow_int_int_int(int x, int y, int *res) {
+  return __builtin_sub_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z29test_sub_overflow_int_int_intiiPi
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#LHS]], %[[#RHS]]) : !s32i, (!s32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CIR: }
+
+bool test_sub_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y, _BitInt(31) *res) {
+  return __builtin_sub_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z38test_sub_overflow_xint31_xint31_xint31DB31_S_PS_
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!cir.int<s, 31>>>, !cir.ptr<!cir.int<s, 31>>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#LHS]], %[[#RHS]]) : <s, 31>, (<s, 31>, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !cir.int<s, 31>, !cir.ptr<!cir.int<s, 31>>
+//      CIR: }
+
+bool test_mul_overflow_uint_uint_uint(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_mul_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z32test_mul_overflow_uint_uint_uintjjPj
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#LHS]], %[[#RHS]]) : !u32i, (!u32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CIR: }
+
+bool test_mul_overflow_int_int_int(int x, int y, int *res) {
+  return __builtin_mul_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z29test_mul_overflow_int_int_intiiPi
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#LHS]], %[[#RHS]]) : !s32i, (!s32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CIR: }
+
+bool test_mul_overflow_xint31_xint31_xint31(_BitInt(31) x, _BitInt(31) y, _BitInt(31) *res) {
+  return __builtin_mul_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z38test_mul_overflow_xint31_xint31_xint31DB31_S_PS_
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.int<s, 31>>, !cir.int<s, 31>
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!cir.int<s, 31>>>, !cir.ptr<!cir.int<s, 31>>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#LHS]], %[[#RHS]]) : <s, 31>, (<s, 31>, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !cir.int<s, 31>, !cir.ptr<!cir.int<s, 31>>
+//      CIR: }
+
+bool test_mul_overflow_ulong_ulong_long(unsigned long x, unsigned long y, unsigned long *res) {
+  return __builtin_mul_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z34test_mul_overflow_ulong_ulong_longmmPm
+//      CIR:   %[[#LHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#RHS:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#LHS]], %[[#RHS]]) : !u64i, (!u64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CIR: }
+
+bool test_add_overflow_uint_int_int(unsigned x, int y, int *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z30test_add_overflow_uint_int_intjiPi
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[#PROM_X:]] = cir.cast integral %[[#X]] : !u32i -> !cir.int<s, 33>
+// CIR-NEXT:   %[[#PROM_Y:]] = cir.cast integral %[[#Y]] : !s32i -> !cir.int<s, 33>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#PROM_X]], %[[#PROM_Y]]) : <s, 33>, (!s32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CIR: }
+
+bool test_add_overflow_volatile(int x, int y, volatile int *res) {
+  return __builtin_add_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z26test_add_overflow_volatileiiPVi
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !s32i, (!s32i, !cir.bool)
+// CIR-NEXT:   cir.store volatile{{.*}} %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CIR: }
+
+bool test_uadd_overflow(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_uadd_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z18test_uadd_overflowjjPj
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !u32i, (!u32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CIR: }
+
+bool test_uaddl_overflow(unsigned long x, unsigned long y, unsigned long *res) {
+  return __builtin_uaddl_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z19test_uaddl_overflowmmPm
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CIR: }
+
+bool test_uaddll_overflow(unsigned long long x, unsigned long long y, unsigned long long *res) {
+  return __builtin_uaddll_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z20test_uaddll_overflowyyPy
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CIR: }
+
+bool test_usub_overflow(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_usub_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z18test_usub_overflowjjPj
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !u32i, (!u32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CIR: }
+
+bool test_usubl_overflow(unsigned long x, unsigned long y, unsigned long *res) {
+  return __builtin_usubl_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z19test_usubl_overflowmmPm
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CIR: }
+
+bool test_usubll_overflow(unsigned long long x, unsigned long long y, unsigned long long *res) {
+  return __builtin_usubll_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z20test_usubll_overflowyyPy
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CIR: }
+
+bool test_umul_overflow(unsigned x, unsigned y, unsigned *res) {
+  return __builtin_umul_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z18test_umul_overflowjjPj
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !u32i, (!u32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u32i, !cir.ptr<!u32i>
+//      CIR: }
+
+bool test_umull_overflow(unsigned long x, unsigned long y, unsigned long *res) {
+  return __builtin_umull_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z19test_umull_overflowmmPm
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CIR: }
+
+bool test_umulll_overflow(unsigned long long x, unsigned long long y, unsigned long long *res) {
+  return __builtin_umulll_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z20test_umulll_overflowyyPy
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u64i>, !u64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !u64i, (!u64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !u64i, !cir.ptr<!u64i>
+//      CIR: }
+
+bool test_sadd_overflow(int x, int y, int *res) {
+  return __builtin_sadd_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z18test_sadd_overflowiiPi
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !s32i, (!s32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CIR: }
+
+bool test_saddl_overflow(long x, long y, long *res) {
+  return __builtin_saddl_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z19test_saddl_overflowllPl
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CIR: }
+
+bool test_saddll_overflow(long long x, long long y, long long *res) {
+  return __builtin_saddll_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z20test_saddll_overflowxxPx
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(add, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CIR: }
+
+bool test_ssub_overflow(int x, int y, int *res) {
+  return __builtin_ssub_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z18test_ssub_overflowiiPi
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !s32i, (!s32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CIR: }
+
+bool test_ssubl_overflow(long x, long y, long *res) {
+  return __builtin_ssubl_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z19test_ssubl_overflowllPl
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CIR: }
+
+bool test_ssubll_overflow(long long x, long long y, long long *res) {
+  return __builtin_ssubll_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z20test_ssubll_overflowxxPx
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(sub, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CIR: }
+
+bool test_smul_overflow(int x, int y, int *res) {
+  return __builtin_smul_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z18test_smul_overflowiiPi
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !s32i, (!s32i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s32i, !cir.ptr<!s32i>
+//      CIR: }
+
+bool test_smull_overflow(long x, long y, long *res) {
+  return __builtin_smull_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z19test_smull_overflowllPl
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CIR: }
+
+bool test_smulll_overflow(long long x, long long y, long long *res) {
+  return __builtin_smulll_overflow(x, y, res);
+}
+
+//      CIR: cir.func {{.*}} @_Z20test_smulll_overflowxxPx
+//      CIR:   %[[#X:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#Y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s64i>, !s64i
+// CIR-NEXT:   %[[#RES_PTR:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+// CIR-NEXT:   %[[RES:.+]], %{{.+}} = cir.binop.overflow(mul, %[[#X]], %[[#Y]]) : !s64i, (!s64i, !cir.bool)
+// CIR-NEXT:   cir.store{{.*}} %[[RES]], %[[#RES_PTR]] : !s64i, !cir.ptr<!s64i>
+//      CIR: }
diff --git a/clang/test/CIR/Incubator/CodeGen/builtins.cpp b/clang/test/CIR/Incubator/CodeGen/builtins.cpp
new file mode 100644
index 0000000000000..8d2759bc7e69a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/builtins.cpp
@@ -0,0 +1,407 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -emit-cir %s -o %t.cir  
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir \
+// RUN:  -emit-llvm -fno-clangir-call-conv-lowering -o - %s \
+// RUN:  | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll 
+// RUN: FileCheck  --check-prefix=LLVM --input-file=%t.ll %s
+
+// This test file is a collection of test cases for all target-independent
+// builtins that are related to memory operations.
+
+int s;
+
+int *test_addressof() {
+  return __builtin_addressof(s);
+  
+  // CIR-LABEL: test_addressof
+  // CIR: [[ADDR:%.*]] = cir.get_global @s : !cir.ptr<!s32i>
+  // CIR: cir.store{{.*}} [[ADDR]], [[SAVE:%.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: [[RES:%.*]] = cir.load{{.*}} [[SAVE]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CIR: cir.return [[RES]] : !cir.ptr<!s32i>
+
+  // LLVM-LABEL: test_addressof
+  // LLVM: store ptr @s, ptr [[ADDR:%.*]], align 8
+  // LLVM: [[RES:%.*]] = load ptr, ptr [[ADDR]], align 8
+  // LLVM: ret ptr [[RES]]
+}
+
+namespace std { template<typename T> T *addressof(T &); }
+int *test_std_addressof() {
+  return std::addressof(s);
+  
+  // CIR-LABEL: test_std_addressof
+  // CIR: [[ADDR:%.*]] = cir.get_global @s : !cir.ptr<!s32i>
+  // CIR: cir.store{{.*}} [[ADDR]], [[SAVE:%.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: [[RES:%.*]] = cir.load{{.*}} [[SAVE]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CIR: cir.return [[RES]] : !cir.ptr<!s32i>
+
+  // LLVM-LABEL: test_std_addressof
+  // LLVM: store ptr @s, ptr [[ADDR:%.*]], align 8
+  // LLVM: [[RES:%.*]] = load ptr, ptr [[ADDR]], align 8
+  // LLVM: ret ptr [[RES]]
+}
+
+namespace std { template<typename T> T *__addressof(T &); }
+int *test_std_addressof2() {
+  return std::__addressof(s);
+  
+  // CIR-LABEL: test_std_addressof2
+  // CIR: [[ADDR:%.*]] = cir.get_global @s : !cir.ptr<!s32i>
+  // CIR: cir.store{{.*}} [[ADDR]], [[SAVE:%.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: [[RES:%.*]] = cir.load{{.*}} [[SAVE]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CIR: cir.return [[RES]] : !cir.ptr<!s32i>
+
+  /// LLVM-LABEL: test_std_addressof2
+  // LLVM: store ptr @s, ptr [[ADDR:%.*]], align 8
+  // LLVM: [[RES:%.*]] = load ptr, ptr [[ADDR]], align 8
+  // LLVM: ret ptr [[RES]]
+}
+
+extern "C" char* test_memchr(const char arg[32]) {
+  return __builtin_char_memchr(arg, 123, 32);
+
+  // CIR-LABEL: test_memchr
+  // CIR: [[PATTERN:%.*]] = cir.const #cir.int<123> : !s32i 
+  // CIR: [[LEN:%.*]] = cir.const #cir.int<32> : !s32i 
+  // CIR: [[LEN_U64:%.*]] = cir.cast integral [[LEN]] : !s32i -> !u64i 
+  // CIR: {{%.*}} = cir.libc.memchr({{%.*}}, [[PATTERN]], [[LEN_U64]])
+
+  // LLVM: {{.*}}@test_memchr(ptr{{.*}}[[ARG:%.*]]) 
+  // LLVM: [[TMP0:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM: store ptr [[ARG]], ptr [[TMP0]], align 8
+  // LLVM: [[SRC:%.*]] = load ptr, ptr [[TMP0]], align 8
+  // LLVM: [[RES:%.*]] = call ptr @memchr(ptr [[SRC]], i32 123, i64 32)
+  // LLVM: store ptr [[RES]], ptr [[RET_P:%.*]], align 8
+  // LLVM: [[RET:%.*]] = load ptr, ptr [[RET_P]], align 8
+  // LLVM: ret ptr [[RET]]
+}
+
+extern "C"  wchar_t* test_wmemchr(const wchar_t *wc) {
+  return __builtin_wmemchr(wc, 257u, 32);
+
+  // CIR-LABEL: test_wmemchr
+  // CIR: [[PATTERN:%.*]] = cir.const #cir.int<257> : !u32i 
+  // CIR: [[LEN:%.*]] = cir.const #cir.int<32> : !s32i 
+  // CIR: [[LEN_U64:%.*]] = cir.cast integral [[LEN]] : !s32i -> !u64i 
+  // CIR: cir.call @wmemchr({{%.*}}, [[PATTERN]], [[LEN_U64]]) : (!cir.ptr<!u32i>, !u32i, !u64i) -> !cir.ptr<!u32i>
+
+  // LLVM: {{.*}}@test_wmemchr(ptr{{.*}}[[ARG:%.*]])
+  // LLVM: [[TMP0:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM: store ptr [[ARG]], ptr [[TMP0]], align 8
+  // LLVM: [[SRC:%.*]] = load ptr, ptr [[TMP0]], align 8
+  // LLVM: [[RES:%.*]] = call ptr @wmemchr(ptr [[SRC]], i32 257, i64 32)
+  // LLVM: store ptr [[RES]], ptr [[RET_P:%.*]], align 8
+  // LLVM: [[RET:%.*]] = load ptr, ptr [[RET_P]], align 8
+  // LLVM: ret ptr [[RET]]
+}
+
+extern "C" void *test_return_address(void) {
+  return __builtin_return_address(1);
+
+  // CIR-LABEL: test_return_address
+  // CIR: [[ARG:%.*]] = cir.const #cir.int<1> : !u32i
+  // CIR: {{%.*}} = cir.return_address([[ARG]])
+
+  // LLVM-LABEL: @test_return_address
+  // LLVM: {{%.*}} = call ptr @llvm.returnaddress(i32 1)
+}
+
+extern "C" void *test_frame_address(void) {
+  return __builtin_frame_address(1);
+
+  // CIR-LABEL: test_frame_address
+  // CIR: [[ARG:%.*]] = cir.const #cir.int<1> : !u32i
+  // CIR: {{%.*}} = cir.frame_address([[ARG]])
+
+  // LLVM-LABEL: @test_frame_address
+  // LLVM: {{%.*}} = call ptr @llvm.frameaddress.p0(i32 1)
+}
+
+// Following block of tests are for __builtin_launder
+// FIXME: Once we fully __builtin_launder by allowing -fstrict-vtable-pointers,
+//        we should move following block of tests to a separate file.
+namespace launder_test {
+//===----------------------------------------------------------------------===//
+//                            Positive Cases
+//===----------------------------------------------------------------------===//
+
+struct TestVirtualFn {
+  virtual void foo() {}
+};
+
+// CIR-LABEL: test_builtin_launder_virtual_fn
+// LLVM: define{{.*}} void @test_builtin_launder_virtual_fn(ptr [[P:%.*]])
+extern "C" void test_builtin_launder_virtual_fn(TestVirtualFn *p) {
+  // CIR: cir.return
+
+  // LLVM: store ptr [[P]], ptr [[P_ADDR:%.*]], align 8
+  // LLVM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+  // LLVM-NEXT: store ptr [[TMP0]], ptr {{%.*}}
+  // LLVM-NEXT: ret void
+  TestVirtualFn *d = __builtin_launder(p);
+}
+
+struct TestPolyBase : TestVirtualFn {
+};
+
+// CIR-LABEL: test_builtin_launder_poly_base
+// LLVM: define{{.*}} void @test_builtin_launder_poly_base(ptr [[P:%.*]])
+extern "C" void test_builtin_launder_poly_base(TestPolyBase *p) {
+  // CIR: cir.return
+
+  // LLVM: store ptr [[P]], ptr [[P_ADDR:%.*]], align 8
+  // LLVM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+  // LLVM-NEXT: store ptr [[TMP0]], ptr {{%.*}}
+  // LLVM-NEXT: ret void
+  TestPolyBase *d = __builtin_launder(p);
+}
+
+struct TestBase {};
+struct TestVirtualBase : virtual TestBase {};
+
+// CIR-LABEL: test_builtin_launder_virtual_base
+// LLVM: define{{.*}} void @test_builtin_launder_virtual_base(ptr [[P:%.*]])
+extern "C" void test_builtin_launder_virtual_base(TestVirtualBase *p) {
+  TestVirtualBase *d = __builtin_launder(p);
+
+  // CIR: cir.return
+
+  // LLVM: store ptr [[P]], ptr [[P_ADDR:%.*]], align 8
+  // LLVM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+  // LLVM-NEXT: store ptr [[TMP0]], ptr {{%.*}}
+  // LLVM-NEXT: ret void
+}
+
+//===----------------------------------------------------------------------===//
+//                            Negative Cases
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: test_builtin_launder_ommitted_one
+// LLVM: define{{.*}} void @test_builtin_launder_ommitted_one(ptr [[P:%.*]])
+extern "C" void test_builtin_launder_ommitted_one(int *p) {
+  int *d = __builtin_launder(p);
+
+  // CIR: cir.return
+
+  // LLVM-NEXT: [[P_ADDR:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT: [[D:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM: store ptr [[P]], ptr [[P_ADDR:%.*]], align 8
+  // LLVM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+  // LLVM-NEXT: store ptr [[TMP0]], ptr [[D]]
+  // LLVM-NEXT: ret void
+}
+
+struct TestNoInvariant {
+  int x;
+};
+
+// CIR-LABEL: test_builtin_launder_ommitted_two
+// LLVM: define{{.*}} void @test_builtin_launder_ommitted_two(ptr [[P:%.*]])
+extern "C" void test_builtin_launder_ommitted_two(TestNoInvariant *p) {
+  TestNoInvariant *d = __builtin_launder(p);
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM-NEXT: [[P_ADDR:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT: [[D:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM: store ptr [[P]], ptr [[P_ADDR:%.*]], align 8
+  // LLVM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+  // LLVM-NEXT: store ptr [[TMP0]], ptr [[D]]
+  // LLVM-NEXT: ret void
+}
+
+struct TestVirtualMember {
+  TestVirtualFn member;
+};
+
+// CIR-LABEL: test_builtin_launder_virtual_member
+// LLVM: define{{.*}} void @test_builtin_launder_virtual_member
+extern "C" void test_builtin_launder_virtual_member(TestVirtualMember *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  TestVirtualMember *d = __builtin_launder(p);
+}
+
+struct TestVirtualMemberDepth2 {
+  TestVirtualMember member;
+};
+
+// CIR-LABEL: test_builtin_launder_virtual_member_depth_2
+// LLVM: define{{.*}} void @test_builtin_launder_virtual_member_depth_2
+extern "C" void test_builtin_launder_virtual_member_depth_2(TestVirtualMemberDepth2 *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  TestVirtualMemberDepth2 *d = __builtin_launder(p);
+}
+
+struct TestVirtualReferenceMember {
+  TestVirtualFn &member;
+};
+
+// CIR-LABEL: test_builtin_launder_virtual_reference_member
+// LLVM: define{{.*}} void @test_builtin_launder_virtual_reference_member
+extern "C" void test_builtin_launder_virtual_reference_member(TestVirtualReferenceMember *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  TestVirtualReferenceMember *d = __builtin_launder(p);
+}
+
+struct TestRecursiveMember {
+  TestRecursiveMember() : member(*this) {}
+  TestRecursiveMember &member;
+};
+
+// CIR-LABEL: test_builtin_launder_recursive_member
+// LLVM: define{{.*}} void @test_builtin_launder_recursive_member
+extern "C" void test_builtin_launder_recursive_member(TestRecursiveMember *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  TestRecursiveMember *d = __builtin_launder(p);
+}
+
+struct TestVirtualRecursiveMember {
+  TestVirtualRecursiveMember() : member(*this) {}
+  TestVirtualRecursiveMember &member;
+  virtual void foo();
+};
+
+// CIR-LABEL: test_builtin_launder_virtual_recursive_member
+// LLVM: define{{.*}} void @test_builtin_launder_virtual_recursive_member
+extern "C" void test_builtin_launder_virtual_recursive_member(TestVirtualRecursiveMember *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  TestVirtualRecursiveMember *d = __builtin_launder(p);
+}
+
+// CIR-LABEL: test_builtin_launder_array
+// LLVM: define{{.*}} void @test_builtin_launder_array
+extern "C" void test_builtin_launder_array(TestVirtualFn (&Arr)[5]) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  TestVirtualFn *d = __builtin_launder(Arr);
+}
+
+// CIR-LABEL: test_builtin_launder_array_nested
+// LLVM: define{{.*}} void @test_builtin_launder_array_nested
+extern "C" void test_builtin_launder_array_nested(TestVirtualFn (&Arr)[5][2]) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  using RetTy = TestVirtualFn(*)[2];
+  RetTy d = __builtin_launder(Arr);
+}
+
+// CIR-LABEL: test_builtin_launder_array_no_invariant
+// LLVM: define{{.*}} void @test_builtin_launder_array_no_invariant
+extern "C" void test_builtin_launder_array_no_invariant(TestNoInvariant (&Arr)[5]) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  TestNoInvariant *d = __builtin_launder(Arr);
+}
+
+// CIR-LABEL: test_builtin_launder_array_nested_no_invariant
+// LLVM: define{{.*}} void @test_builtin_launder_array_nested_no_invariant
+extern "C" void test_builtin_launder_array_nested_no_invariant(TestNoInvariant (&Arr)[5][2]) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  using RetTy = TestNoInvariant(*)[2];
+  RetTy d = __builtin_launder(Arr);
+}
+
+template <class Member>
+struct WithMember {
+  Member mem;
+};
+
+template struct WithMember<TestVirtualFn[5]>;
+
+// CIR-LABEL: test_builtin_launder_member_array
+// LLVM: define{{.*}} void @test_builtin_launder_member_array
+extern "C" void test_builtin_launder_member_array(WithMember<TestVirtualFn[5]> *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  auto *d = __builtin_launder(p);
+}
+
+template struct WithMember<TestVirtualFn[5][2]>;
+
+// CIR-LABEL: test_builtin_launder_member_array_nested
+// LLVM: define{{.*}} void @test_builtin_launder_member_array_nested
+extern "C" void test_builtin_launder_member_array_nested(WithMember<TestVirtualFn[5][2]> *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  auto *d = __builtin_launder(p);
+}
+
+template struct WithMember<TestNoInvariant[5]>;
+
+// CIR-LABEL: test_builtin_launder_member_array_no_invariant
+// LLVM: define{{.*}} void @test_builtin_launder_member_array_no_invariant
+extern "C" void test_builtin_launder_member_array_no_invariant(WithMember<TestNoInvariant[5]> *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  auto *d = __builtin_launder(p);
+}
+
+template struct WithMember<TestNoInvariant[5][2]>;
+
+// CIR-LABEL: test_builtin_launder_member_array_nested_no_invariant
+// LLVM: define{{.*}} void @test_builtin_launder_member_array_nested_no_invariant
+extern "C" void test_builtin_launder_member_array_nested_no_invariant(WithMember<TestNoInvariant[5][2]> *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  auto *d = __builtin_launder(p);
+}
+
+template <class T>
+struct WithBase : T {};
+
+template struct WithBase<TestNoInvariant>;
+
+// CIR-LABEL: test_builtin_launder_base_no_invariant
+// LLVM: define{{.*}} void @test_builtin_launder_base_no_invariant
+extern "C" void test_builtin_launder_base_no_invariant(WithBase<TestNoInvariant> *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  auto *d = __builtin_launder(p);
+}
+
+template struct WithBase<TestVirtualFn>;
+
+// CIR-LABEL: test_builtin_launder_base
+// LLVM: define{{.*}} void @test_builtin_launder_base
+extern "C" void test_builtin_launder_base(WithBase<TestVirtualFn> *p) {
+  // CIR: cir.return
+
+  // LLVM-NOT: llvm.launder.invariant.group
+  // LLVM: ret void
+  auto *d = __builtin_launder(p);
+}
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/c11atomic.c b/clang/test/CIR/Incubator/CodeGen/c11atomic.c
new file mode 100644
index 0000000000000..50b99092baf5c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/c11atomic.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -triple aarch64-none-linux-android21 -fclangir -emit-cir -std=c11 -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 %s -triple aarch64-none-linux-android21 -fclangir -emit-llvm -std=c11 -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// CIR-DAG: ![[PS:.*]] = !cir.record<struct "PS" {!s16i, !s16i, !s16i}
+// CIR-DAG: ![[ANON:.*]] = !cir.record<struct  {![[PS]], !cir.array<!u8i x 2>}>
+// CIR-DAG: cir.global external @testPromotedStructGlobal = #cir.const_record<{#cir.const_record<{#cir.int<1> : !s16i, #cir.int<2> : !s16i, #cir.int<3> : !s16i}> : ![[PS]], #cir.zero : !cir.array<!u8i x 2>}> : ![[ANON]]
+
+// LLVM-DAG: %[[PS:.*]] = type { i16, i16, i16 }
+// LLVM-DAG: @testPromotedStructGlobal = global { %[[PS]], [2 x i8] } { %[[PS]] { i16 1, i16 2, i16 3 }, [2 x i8] zeroinitializer }
+typedef struct { short x, y, z; } PS;
+_Atomic PS testPromotedStructGlobal = (PS){1, 2, 3};
diff --git a/clang/test/CIR/Incubator/CodeGen/c89-implicit-int.c b/clang/test/CIR/Incubator/CodeGen/c89-implicit-int.c
new file mode 100644
index 0000000000000..9882d2cf4c05e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/c89-implicit-int.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c89 -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Implicit int return type.
+test = 0;
+// CHECK: cir.global external @test = #cir.int<0> : !s32i
+func (void) {
+// CHECK: cir.func {{.*}} @func() -> !s32i
+  return 0;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/call-extra-attrs.cpp b/clang/test/CIR/Incubator/CodeGen/call-extra-attrs.cpp
new file mode 100644
index 0000000000000..1176a46929d31
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/call-extra-attrs.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+__attribute__((nothrow))
+int s0(int a, int b) {
+  int x = a + b;
+  return x;
+}
+
+__attribute__((noinline))
+int s1(int a, int b) {
+  return s0(a,b);
+}
+
+int s2(int a, int b) {
+  return s1(a, b);
+}
+
+// CIR: #fn_attr = #cir<extra({nothrow = #cir.nothrow})>
+
+// CIR: cir.func{{.*}} no_inline optnone {{.*}} @_Z2s0ii(%{{.*}}, %{{.*}}) -> {{.*}} extra(#fn_attr)
+// CIR: cir.func{{.*}} no_inline optnone {{.*}} @_Z2s1ii(%{{.*}}, %{{.*}}) -> {{.*}} extra(#fn_attr)
+// CIR: cir.call @_Z2s0ii(%{{.*}}, %{{.*}}) : ({{.*}}, {{.*}}) -> {{.*}} extra(#fn_attr)
+// CIR: cir.func {{.*}} optnone {{.*}} @_Z2s2ii(%{{.*}}, %{{.*}}) -> {{.*}}
+// CHECK-NOT: cir.call @_Z2s1ii(%{{.*}}, %{{.*}}) : ({{.*}}, {{.*}}) -> {{.*}} extra(#fn_attr)
+
+// LLVM: define dso_local i32 @_Z2s0ii(i32 %0, i32 %1) #[[#ATTR1:]]
+// LLVM: define dso_local i32 @_Z2s1ii(i32 %0, i32 %1) #[[#ATTR1:]]
+// LLVM: define dso_local i32 @_Z2s2ii(i32 %0, i32 %1) #[[#ATTR1:]]
+
+// LLVM: attributes #[[#ATTR1]] = {{.*}} noinline nounwind optnone
diff --git a/clang/test/CIR/Incubator/CodeGen/call-side-effect.cpp b/clang/test/CIR/Incubator/CodeGen/call-side-effect.cpp
new file mode 100644
index 0000000000000..a29b1c1063f59
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/call-side-effect.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+[[gnu::pure]] int pure_func(int x);
+[[gnu::const]] int const_func(int x);
+
+int test(int x) {
+  int y1 = pure_func(x);
+  int y2 = const_func(x);
+  return y1 + y2;
+}
+
+// CIR-LABEL: @_Z4testi
+// CIR:   %{{.+}} = cir.call @_Z9pure_funci(%{{.+}}) : (!s32i) -> !s32i side_effect(pure)
+// CIR:   %{{.+}} = cir.call @_Z10const_funci(%{{.+}}) : (!s32i) -> !s32i side_effect(const)
+// CIR: }
+
+// LLVM-LABEL: @_Z4testi(i32 %0)
+// LLVM:   %{{.+}} = call i32 @_Z9pure_funci(i32 %{{.+}}) #[[#meta_pure:]]
+// LLVM:   %{{.+}} = call i32 @_Z10const_funci(i32 %{{.+}}) #[[#meta_const:]]
+// LLVM: }
+// LLVM: attributes #[[#meta_pure]] = { nounwind willreturn memory(read, errnomem: none) }
+// LLVM: attributes #[[#meta_const]] = { nounwind willreturn memory(none) }
diff --git a/clang/test/CIR/Incubator/CodeGen/call-via-class-member-funcptr.cpp b/clang/test/CIR/Incubator/CodeGen/call-via-class-member-funcptr.cpp
new file mode 100644
index 0000000000000..4631ef3642c9a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/call-via-class-member-funcptr.cpp
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+class a {
+public:
+  static char *b(int);
+};
+int h=0;
+class f {
+public:
+  const char *b();
+  a g;
+};
+const char *f::b() { return g.b(h); }
+void fn1() { f f1; }
+
+// CIR: !rec_a = !cir.record<class "a" padded {!u8i} #cir.record.decl.ast>
+// CIR: !rec_f = !cir.record<class "f" {!rec_a}>
+
+// CIR: cir.global external @h = #cir.int<0>
+// CIR: cir.func {{.*}} @_ZN1a1bEi(!s32i) -> !cir.ptr<!s8i>
+
+// CIR: cir.func {{.*}} @_ZN1f1bEv(%arg0: !cir.ptr<!rec_f> loc{{.*}}) -> !cir.ptr<!s8i>
+// CIR: [[H_PTR:%.*]] = cir.get_global @h : !cir.ptr<!s32i> loc(#loc18)
+// CIR: [[H_VAL:%.*]] = cir.load{{.*}} [[H_PTR]] : !cir.ptr<!s32i>, !s32i
+// CIR: [[RET1_VAL:%.*]] = cir.call @_ZN1a1bEi([[H_VAL]]) : (!s32i) -> !cir.ptr<!s8i>
+// CIR: cir.store{{.*}} [[RET1_VAL]], [[RET1_P:%.*]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[RET1_VAL2:%.*]] = cir.load{{.*}} [[RET1_P]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+//    %7 = cir.load %1 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CIR: cir.return [[RET1_VAL2]] : !cir.ptr<!s8i>
+
+// CIR: cir.func {{.*}} @_Z3fn1v()
+// CIR: [[CLS_F:%.*]] = cir.alloca !rec_f, !cir.ptr<!rec_f>, ["f1"] {alignment = 1 : i64}
+// CIR: cir.return
+
+// LLVM: %class.f = type { %class.a }
+// LLVM:  %class.a = type { i8 }
+// LLVM: @h = global i32 0
+// LLVM: declare ptr @_ZN1a1bEi(i32)
+
+// LLVM: define dso_local ptr @_ZN1f1bEv(ptr [[ARG0:%.*]])
+// LLVM: [[ARG0_SAVE:%.*]] = alloca ptr, i64 1, align 8
+// LLVM: [[RET_SAVE:%.*]] = alloca ptr, i64 1, align 8
+// LLVM: store ptr [[ARG0]], ptr [[ARG0_SAVE]], align 8
+// LLVM: [[ARG0_LOAD:%.*]] = load ptr, ptr [[ARG0_SAVE]], align 8
+// LLVM: [[FUNC_PTR:%.*]] = getelementptr %class.f, ptr [[ARG0_LOAD]], i32 0, i32 0
+// LLVM: [[VAR_H:%.*]] = load i32, ptr @h, align 4
+// LLVM: [[RET_VAL:%.*]] = call ptr @_ZN1a1bEi(i32 [[VAR_H]])
+// LLVM: store ptr [[RET_VAL]], ptr [[RET_SAVE]], align 8
+// LLVM: [[RET_VAL2:%.*]] = load ptr, ptr [[RET_SAVE]], align 8
+// LLVM: ret ptr [[RET_VAL2]]
+
+// LLVM: define dso_local void @_Z3fn1v()
+// LLVM: [[FUNC_PTR:%.*]] = alloca %class.f, i64 1, align 1
+// LLVM: ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/call.c b/clang/test/CIR/Incubator/CodeGen/call.c
new file mode 100644
index 0000000000000..5f185a0d0d20d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/call.c
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CXX
+
+void a(void) {}
+int b(int a, int b) {
+  return a + b;
+}
+double c(double a, double b) {
+  return a + b;
+}
+
+void d(void) {
+  a();
+  b(0, 1);
+}
+
+// CHECK: module {{.*}} {
+// CHECK:   cir.func {{.*}} @a()
+// CHECK:     cir.return
+// CHECK:   }
+// CHECK:   cir.func {{.*}} @b(%arg0: !s32i {{.*}}, %arg1: !s32i {{.*}}) -> !s32i
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK:     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CHECK:     %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK:     %3 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:     %4 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK:     %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK:     cir.store{{.*}} %5, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK:     %6 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.return %6
+// CHECK:   }
+// CHECK:   cir.func {{.*}} @c(%arg0: !cir.double {{.*}}, %arg1: !cir.double {{.*}}) -> !cir.double
+// CHECK:     %0 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["a", init]
+// CHECK:     %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["b", init]
+// CHECK:     %2 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"]
+// CHECK:     cir.store %arg0, %0 : !cir.double, !cir.ptr<!cir.double>
+// CHECK:     cir.store %arg1, %1 : !cir.double, !cir.ptr<!cir.double>
+// CHECK:     %3 = cir.load{{.*}} %0 : !cir.ptr<!cir.double>, !cir.double
+// CHECK:     %4 = cir.load{{.*}} %1 : !cir.ptr<!cir.double>, !cir.double
+// CHECK:     %5 = cir.binop(add, %3, %4) : !cir.double
+// CHECK:     cir.store{{.*}} %5, %2 : !cir.double, !cir.ptr<!cir.double>
+// CHECK:     %6 = cir.load{{.*}} %2 : !cir.ptr<!cir.double>, !cir.double
+// CHECK:     cir.return %6 : !cir.double
+// CHECK:   }
+// CHECK:   cir.func {{.*}} @d()
+// CHECK:     call @a() : () -> ()
+// CHECK:     %0 = cir.const #cir.int<0> : !s32i
+// CHECK:     %1 = cir.const #cir.int<1> : !s32i
+// CHECK:     call @b(%0, %1) : (!s32i, !s32i) -> !s32i
+// CHECK:     cir.return
+// CHECK:   }
+//
+// CXX: module {{.*}} {
+// CXX-NEXT:   cir.func {{.*}} @_Z1av()
+// CXX-NEXT:     cir.return
+// CXX-NEXT:   }
+// CXX-NEXT:   cir.func {{.*}} @_Z1bii(%arg0: !s32i {{.*}}, %arg1: !s32i {{.*}}) -> !s32i
+// CXX-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CXX-NEXT:     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CXX-NEXT:     %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CXX-NEXT:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CXX-NEXT:     cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CXX-NEXT:     %3 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CXX-NEXT:     %4 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CXX-NEXT:     %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CXX-NEXT:     cir.store{{.*}} %5, %2 : !s32i, !cir.ptr<!s32i>
+// CXX-NEXT:     %6 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// CXX-NEXT:     cir.return %6
+// CXX-NEXT:   }
+// CXX-NEXT:   cir.func {{.*}} @_Z1cdd(%arg0: !cir.double {{.*}}, %arg1: !cir.double {{.*}}) -> !cir.double
+// CXX-NEXT:     %0 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["a", init]
+// CXX-NEXT:     %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["b", init]
+// CXX-NEXT:     %2 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"]
+// CXX-NEXT:     cir.store %arg0, %0 : !cir.double, !cir.ptr<!cir.double>
+// CXX-NEXT:     cir.store %arg1, %1 : !cir.double, !cir.ptr<!cir.double>
+// CXX-NEXT:     %3 = cir.load{{.*}} %0 : !cir.ptr<!cir.double>, !cir.double
+// CXX-NEXT:     %4 = cir.load{{.*}} %1 : !cir.ptr<!cir.double>, !cir.double
+// CXX-NEXT:     %5 = cir.binop(add, %3, %4) : !cir.double
+// CXX-NEXT:     cir.store{{.*}} %5, %2 : !cir.double, !cir.ptr<!cir.double>
+// CXX-NEXT:     %6 = cir.load{{.*}} %2 : !cir.ptr<!cir.double>, !cir.double
+// CXX-NEXT:     cir.return %6 : !cir.double
+// CXX-NEXT:   }
+// CXX-NEXT:   cir.func {{.*}} @_Z1dv()
+// CXX-NEXT:     call @_Z1av() : () -> ()
+// CXX-NEXT:     %0 = cir.const #cir.int<0> : !s32i
+// CXX-NEXT:     %1 = cir.const #cir.int<1> : !s32i
+// CXX-NEXT:     call @_Z1bii(%0, %1) : (!s32i, !s32i) -> !s32i
+// CXX-NEXT:     cir.return
+// CXX-NEXT:   }
diff --git a/clang/test/CIR/Incubator/CodeGen/call.cpp b/clang/test/CIR/Incubator/CodeGen/call.cpp
new file mode 100644
index 0000000000000..b8464f71106fa
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/call.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int& p();
+int f() {
+  return p() - 22;
+}
+
+// CHECK: cir.func {{.*}} @_Z1fv() -> !s32i
+// CHECK:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:   %1 = cir.call @_Z1pv() : () -> !cir.ptr<!s32i>
+// CHECK:   %2 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK:   %3 = cir.const #cir.int<22> : !s32i
+// CHECK:   %4 = cir.binop(sub, %2, %3) nsw : !s32i
diff --git a/clang/test/CIR/Incubator/CodeGen/cast-lvalue.cpp b/clang/test/CIR/Incubator/CodeGen/cast-lvalue.cpp
new file mode 100644
index 0000000000000..75dd7691d34a3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/cast-lvalue.cpp
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --input-file=%t-og.ll --check-prefix=OGCG %s
+
+// Test that we handle C++ cast expressions as lvalues correctly.
+// This used to assert with "Use emitCastLValue below, remove me when adding testcase"
+// at CIRGenExpr.cpp:2720
+
+// Test reinterpret_cast as lvalue
+void test_reinterpret_cast_lvalue() {
+  int x = 42;
+  reinterpret_cast<char&>(x) = 'A';
+}
+
+// CIR-LABEL: cir.func{{.*}}@_Z28test_reinterpret_cast_lvaluev()
+// CIR: cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CIR: cir.cast bitcast{{.*}}!cir.ptr<!s32i>{{.*}}!cir.ptr<!s8i>
+// CIR: cir.store{{.*}}!s8i, !cir.ptr<!s8i>
+
+// LLVM-LABEL: define{{.*}}@_Z28test_reinterpret_cast_lvaluev()
+// LLVM: alloca i32
+// LLVM: store i8 65, ptr
+
+// OGCG-LABEL: define{{.*}}@_Z28test_reinterpret_cast_lvaluev()
+// OGCG: alloca i32
+// OGCG: store i8 65, ptr
+
+// Test const_cast as lvalue
+void test_const_cast_lvalue() {
+  const int x = 0;
+  const_cast<int&>(x) = 1;
+}
+
+// CIR-LABEL: cir.func{{.*}}@_Z22test_const_cast_lvaluev()
+// CIR: cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
+// CIR: cir.store{{.*}}!s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: define{{.*}}@_Z22test_const_cast_lvaluev()
+// LLVM: alloca i32
+// LLVM: store i32 1, ptr
+
+// OGCG-LABEL: define{{.*}}@_Z22test_const_cast_lvaluev()
+// OGCG: alloca i32
+// OGCG: store i32 1, ptr
+
+// Test const_cast in template context (from crashes/const-cast-lvalue.cpp)
+int global_a;
+struct S {
+  using type = int;
+  static type foo() { return const_cast<int &>(global_a); }
+};
+template <typename> struct T {
+  static bool bar() { return S::foo(); }
+};
+template <typename... Args, typename H> void baz(H) { (T<Args>::bar() || ...); }
+class C {
+  int member;
+public:
+  void method() { baz<int>(member); }
+};
+void caller() {
+  C obj;
+  obj.method();
+}
+
+// CIR-LABEL: cir.func{{.*}}@_ZN1S3fooEv()
+// CIR: cir.get_global @global_a
+
+// LLVM-LABEL: define{{.*}}@_ZN1S3fooEv()
+// LLVM: load{{.*}}@global_a
+
+// OGCG-LABEL: define{{.*}}@_ZN1S3fooEv()
+// OGCG: load{{.*}}@global_a
diff --git a/clang/test/CIR/Incubator/CodeGen/cast.c b/clang/test/CIR/Incubator/CodeGen/cast.c
new file mode 100644
index 0000000000000..9839998f7db7b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/cast.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+typedef struct {
+  int x;
+} A;
+
+int cstyle_cast_lvalue(A a) {
+  return ((A)(a)).x;
+}
+
+// CHECK:  cir.func {{.*}} @cstyle_cast_lvalue(%arg0: !rec_A loc({{.*}}))
+// CHECK:    [[ALLOC_A:%.*]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a", init] {alignment = 4 : i64}
+// CHECK:    [[ALLOC_RET:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:    [[REF_TMP:%.*]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["ref.tmp0"] {alignment = 4 : i64}
+// CHECK:    cir.copy [[ALLOC_A]] to [[REF_TMP]] : !cir.ptr<!rec_A>
+// CHECK:    [[X_ADDR:%.*]] = cir.get_member [[REF_TMP]][0] {name = "x"} : !cir.ptr<!rec_A> -> !cir.ptr<!s32i>
+// CHECK:    [[X:%.*]] = cir.load{{.*}} [[X_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CHECK:    cir.store{{.*}} [[X]], [[ALLOC_RET]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    [[RET:%.*]] = cir.load{{.*}} [[ALLOC_RET]] : !cir.ptr<!s32i>, !s32i
+// CHECK:    cir.return [[RET]] : !s32i
diff --git a/clang/test/CIR/Incubator/CodeGen/cast.cpp b/clang/test/CIR/Incubator/CodeGen/cast.cpp
new file mode 100644
index 0000000000000..ca8bcf77ae39b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/cast.cpp
@@ -0,0 +1,143 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+unsigned char cxxstaticcast_0(unsigned int x) {
+  return static_cast<unsigned char>(x);
+}
+
+// CHECK: cir.func {{.*}} @_Z15cxxstaticcast_0j
+// CHECK:    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+// CHECK:    %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__retval"] {alignment = 1 : i64}
+// CHECK:    cir.store{{.*}} %arg0, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:    %2 = cir.load{{.*}} %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:    %3 = cir.cast integral %2 : !u32i -> !u8i
+// CHECK:    cir.store{{.*}} %3, %1 : !u8i, !cir.ptr<!u8i>
+// CHECK:    %4 = cir.load{{.*}} %1 : !cir.ptr<!u8i>, !u8i
+// CHECK:    cir.return %4 : !u8i
+// CHECK:  }
+
+
+int cStyleCasts_0(unsigned x1, int x2, float x3, short x4, double x5) {
+// CHECK: cir.func {{.*}} @_{{.*}}cStyleCasts_0{{.*}}
+
+  char a = (char)x1; // truncate
+  // CHECK: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !u32i -> !s8i
+
+  short b = (short)x2; // truncate with sign
+  // CHECK: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !s32i -> !s16i
+
+  long long c = (long long)x1; // zero extend
+  // CHECK: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !u32i -> !s64i
+
+  long long d = (long long)x2; // sign extend
+  // CHECK: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !s32i -> !s64i
+
+  unsigned ui = (unsigned)x2; // sign drop
+  // CHECK: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !s32i -> !u32i
+
+  int si = (int)x1; // sign add
+  // CHECK: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !u32i -> !s32i
+
+  unsigned uu = (unsigned)x1; // should not be generated
+  // CHECK-NOT: %{{[0-9]+}} = cir.cast integral %{{[0-9]+}} : !u32i -> !u32i
+
+  int arr[3];
+  int* e = (int*)arr; // explicit pointer decay
+  // CHECK: %{{[0-9]+}} = cir.cast array_to_ptrdecay %{{[0-9]+}} : !cir.ptr<!cir.array<!s32i x 3>> -> !cir.ptr<!s32i>
+
+  int f = (int)x3;
+  // CHECK: %{{[0-9]+}} = cir.cast float_to_int %{{[0-9]+}} : !cir.float -> !s32i
+
+  double g = (double)x3; // FP extension
+  // %{{[0-9]+}} = cir.cast floating %{{[0-9]+}} : !cir.float -> !cir.double
+
+  long l = (long)(void*)x4; // Must sign extend before casting to pointer
+  // CHECK: %[[TMP:[0-9]+]] = cir.cast integral %{{[0-9]+}} : !s16i -> !u64i
+  // CHECK: %[[TMP2:[0-9]+]] = cir.cast int_to_ptr %[[TMP]] : !u64i -> !cir.ptr<!void>
+  // CHECK: %{{[0-9]+}} = cir.cast ptr_to_int %[[TMP2]] : !cir.ptr<!void> -> !s64i
+
+  float sitofp = (float)x2; // Signed integer to floating point
+  // CHECK: %{{.+}} = cir.cast int_to_float %{{[0-9]+}} : !s32i -> !cir.float
+
+  float uitofp = (float)x1; // Unsigned integer to floating point
+  // CHECK: %{{.+}} = cir.cast int_to_float %{{[0-9]+}} : !u32i -> !cir.float
+
+  int fptosi = (int)x3; // Floating point to signed integer
+  // CHECK: %{{.+}} = cir.cast float_to_int %{{[0-9]+}} : !cir.float -> !s32i
+
+  unsigned fptoui = (unsigned)x3; // Floating point to unsigned integer
+  // CHECK: %{{.+}} = cir.cast float_to_int %{{[0-9]+}} : !cir.float -> !u32i
+
+  bool ib = (bool)x1; // No checking, because this isn't a regular cast.
+
+  int bi = (int)ib; // bool to int
+  // CHECK: %{{[0-9]+}} = cir.cast bool_to_int %{{[0-9]+}} : !cir.bool -> !s32i
+
+  float bf = (float)ib; // bool to float
+  // CHECK: %{{[0-9]+}} = cir.cast bool_to_float %{{[0-9]+}} : !cir.bool -> !cir.float
+
+  void* bpv = (void*)ib; // bool to pointer, which is done in two steps
+  // CHECK: %[[TMP:[0-9]+]] = cir.cast bool_to_int  %{{[0-9]+}} : !cir.bool -> !u64i
+  // CHECK: %{{[0-9]+}} = cir.cast int_to_ptr %[[TMP]] : !u64i -> !cir.ptr<!void>
+
+  float dptofp = (float)x5;
+  // CHECK: %{{.+}} = cir.cast floating %{{[0-9]+}} : !cir.double -> !cir.float
+
+  return 0;
+}
+
+bool cptr(void *d) {
+  bool x = d;
+  return x;
+}
+
+// CHECK: cir.func {{.*}} @_Z4cptrPv(%arg0: !cir.ptr<!void>
+// CHECK:   %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["d", init] {alignment = 8 : i64}
+
+// CHECK:   %3 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CHECK:   %4 = cir.cast ptr_to_bool %3 : !cir.ptr<!void> -> !cir.bool
+
+void call_cptr(void *d) {
+  if (!cptr(d)) {
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z9call_cptrPv(%arg0: !cir.ptr<!void>
+// CHECK:   %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["d", init] {alignment = 8 : i64}
+
+// CHECK:   cir.scope {
+// CHECK:     %1 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CHECK:     %2 = cir.call @_Z4cptrPv(%1) : (!cir.ptr<!void>) -> !cir.bool
+// CHECK:     %3 = cir.unary(not, %2) : !cir.bool, !cir.bool
+// CHECK:     cir.if %3 {
+
+void lvalue_cast(int x) {
+  *(int *)&x = 42;
+}
+
+// CHECK: cir.func {{.*}} @_Z11lvalue_cast
+// CHECK:   %1 = cir.const #cir.int<42> : !s32i
+// CHECK:   cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+
+struct A { int x; };
+
+void null_cast(long ptr) {
+  *(int *)0 = 0;
+  ((A *)0)->x = 0;
+}
+
+// CHECK: cir.func {{.*}} @_Z9null_castl
+// CHECK:   %[[ADDR:[0-9]+]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK:   cir.store{{.*}} %{{[0-9]+}}, %[[ADDR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   %[[BASE:[0-9]+]] = cir.const #cir.ptr<null> : !cir.ptr<!rec_A>
+// CHECK:   %[[FIELD:[0-9]+]] = cir.get_member %[[BASE]][0] {name = "x"} : !cir.ptr<!rec_A> -> !cir.ptr<!s32i>
+// CHECK:   cir.store{{.*}} %{{[0-9]+}}, %[[FIELD]] : !s32i, !cir.ptr<!s32i>
+
+void int_cast(long ptr) {
+  ((A *)ptr)->x = 0;
+}
+
+// CHECK: cir.func {{.*}} @_Z8int_castl
+// CHECK:   %[[BASE:[0-9]+]] = cir.cast int_to_ptr %{{[0-9]+}} : !u64i -> !cir.ptr<!rec_A>
+// CHECK:   %[[FIELD:[0-9]+]] = cir.get_member %[[BASE]][0] {name = "x"} : !cir.ptr<!rec_A> -> !cir.ptr<!s32i>
+// CHECK:   cir.store{{.*}} %{{[0-9]+}}, %[[FIELD]] : !s32i, !cir.ptr<!s32i>
diff --git a/clang/test/CIR/Incubator/CodeGen/class_cast.cpp b/clang/test/CIR/Incubator/CodeGen/class_cast.cpp
new file mode 100644
index 0000000000000..9bdf7e6dc7dd0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/class_cast.cpp
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - 2>&1 | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=OGCG
+class Base {
+  // CIR-LABEL: _ZN4BaseaSERS_
+  // CIR-SAME: ([[ARG0:%.*]]: !cir.ptr{{.*}}, [[ARG1:%.*]]: !cir.ptr{{.*}})
+  // CIR: [[ALLOCA_0:%.*]] =  cir.alloca
+  // CIR: [[ALLOCA_1:%.*]] =  cir.alloca
+  // CIR: [[ALLOCA_2:%.*]] =  cir.alloca
+  // CIR: cir.store [[ARG0]], [[ALLOCA_0]]
+  // CIR: cir.store [[ARG1]], [[ALLOCA_1]]
+  // CIR: [[LD_0:%.*]] = cir.load deref [[ALLOCA_0]]
+  // CIR: cir.store align(8) [[LD_0]], [[ALLOCA_2]]
+  // CIR: [[LD_1:%.*]] = cir.load{{.*}} [[ALLOCA_2]]
+  // CIR: cir.return [[LD_1]]
+
+  // LLVM-LABEL: _ZN4BaseaSERS_
+  // LLVM-SAME: (ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) 
+  // LLVM:       [[TMP3:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT:  [[TMP4:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT:  [[TMP5:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT:  store ptr [[ARG0]], ptr [[TMP3]], align 8
+  // LLVM-NEXT:  store ptr [[ARG1]], ptr [[TMP4]], align 8
+  // LLVM-NEXT:  [[TMP6:%.*]] = load ptr, ptr [[TMP3]], align 8
+  // LLVM-NEXT:  store ptr [[TMP6]], ptr [[TMP5]], align 8
+  // LLVM-NEXT:  [[TMP7:%.*]] = load ptr, ptr [[TMP5]], align 8
+  // LLVM-NEXT:  ret ptr [[TMP7]]
+
+public:
+  Base &operator=(Base &b) {
+    return *this;
+  }
+};
+
+class Derived : Base {
+  Derived &operator=(Derived &);
+};
+Derived &Derived::operator=(Derived &B) {
+  // CIR-LABEL: _ZN7DerivedaSERS_
+  // CIR-SAME: [[ARG0:%.*]]: !cir.ptr{{.*}}, [[ARG1:%.*]]: !cir.ptr{{.*}}
+  // CIR: cir.store [[ARG0]], [[ALLOCA_0:%.*]] :
+  // CIR: cir.store [[ARG1]], [[ALLOCA_1:%.*]] :
+  // CIR: [[LD_0:%.*]] = cir.load [[ALLOCA_0]]
+  // CIR: [[BASE_ADDR_0:%.*]] = cir.base_class_addr [[LD_0]]
+  // CIR: [[LD_1:%.*]] = cir.load [[ALLOCA_1]]
+  // CIR: [[BASE_ADDR_1:%.*]] = cir.base_class_addr [[LD_1]]
+  // CIR: [[CALL:%.*]] = cir.call @_ZN4BaseaSERS_
+  // CIR: [[DERIVED_ADDR:%.*]] = cir.derived_class_addr [[CALL]]
+  // CIR: cir.store{{.*}} [[DERIVED_ADDR]], [[ALLOCA_2:%.*]] :
+  // CIR: [[LD_2:%.*]] = cir.load{{.*}} [[ALLOCA_2]]
+  // CIR: cir.return [[LD_2]]
+
+  // LLVM-LABEL: _ZN7DerivedaSERS_
+  // LLVM-SAME: (ptr [[ARG0:%.*]], ptr [[ARG1:%.*]])
+  // LLVM:       [[TMP3:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT:  [[TMP4:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT:  [[TMP5:%.*]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT:  store ptr [[ARG0]], ptr [[TMP3]], align 8
+  // LLVM-NEXT:  store ptr [[ARG1]], ptr [[TMP4]], align 8
+  // LLVM-NEXT:  [[TMP6:%.*]] = load ptr, ptr [[TMP3]], align 8
+  // LLVM-NEXT:  [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8
+  // LLVM-NEXT:  [[TMP8:%.*]] = call ptr @_ZN4BaseaSERS_(ptr [[TMP6]], ptr [[TMP7]])
+  // LLVM-NEXT:  [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
+  // LLVM-NEXT:  store ptr [[TMP9]], ptr [[TMP5]], align 8
+  // LLVM-NEXT:  [[TMP10:%.*]] = load ptr, ptr [[TMP5]], align 8
+  // LLVM-NEXT:  ret ptr [[TMP10]]
+
+  // OGCG-LABEL: @_ZN7DerivedaSERS_
+  // OGCG-SAME: (ptr{{.*}}[[ARG0:%.*]], ptr{{.*}}[[ARG1:%.*]])
+  // OGCG:       [[TMP3:%.*]] = alloca ptr, align 8
+  // OGCG-NEXT:  [[TMP4:%.*]] = alloca ptr, align 8
+  // OGCG-NEXT:  store ptr [[ARG0]], ptr [[TMP3]], align 8
+  // OGCG-NEXT:  store ptr [[ARG1]], ptr [[TMP4]], align 8
+  // OGCG-NEXT:  [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
+  // OGCG-NEXT:  [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
+  // OGCG-NEXT:  [[TMP7:%.*]] = call{{.*}}ptr @_ZN4BaseaSERS_(ptr{{.*}}[[TMP5]], ptr{{.*}}[[TMP6]])
+  // OGCG-NEXT:  ret ptr [[TMP7]]
+  return (Derived &)Base::operator=(B);
+}
+
+// OGCG-LABEL: define{{.*}}@_ZN4BaseaSERS_
+// OGCG-SAME: (ptr{{.*}}[[BASE_ARG0:%.*]], ptr{{.*}}[[BASE_ARG1:%.*]])
+// OGCG:       [[TMP3:%.*]] = alloca ptr, align 8
+// OGCG-NEXT:  [[TMP4:%.*]] = alloca ptr, align 8
+// OGCG-NEXT:  store ptr [[BASE_ARG0]], ptr [[TMP3]], align 8 
+// OGCG-NEXT:  store ptr [[BASE_ARG1]], ptr [[TMP4]], align 8
+// OGCG-NEXT:  [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8
+// OGCG-NEXT:  ret ptr [[TMP5]]
+
diff --git a/clang/test/CIR/Incubator/CodeGen/clear_cache.c b/clang/test/CIR/Incubator/CodeGen/clear_cache.c
new file mode 100644
index 0000000000000..b4123a6f9b5f8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/clear_cache.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-cir -o %t.cir
+// RUN: FileCheck --input-file=%t.cir -check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --input-file=%t.ll -check-prefix=LLVM %s
+
+char buffer[32] = "This is a largely unused buffer";
+
+// __builtin___clear_cache always maps to @llvm.clear_cache, but what
+// each back-end produces is different, and this is tested in LLVM
+
+// CIR-LABEL: main
+// CIR:  %[[VAL_1:.*]] = cir.get_global @buffer : !cir.ptr<!cir.array<!s8i x 32>>
+// CIR:  %[[VAL_2:.*]] = cir.cast array_to_ptrdecay %[[VAL_1]] : !cir.ptr<!cir.array<!s8i x 32>> -> !cir.ptr<!s8i>
+// CIR:  %[[VAL_3:.*]] = cir.cast bitcast %[[VAL_2]] : !cir.ptr<!s8i> -> !cir.ptr<!void>
+// CIR:  %[[VAL_4:.*]] = cir.get_global @buffer : !cir.ptr<!cir.array<!s8i x 32>>
+// CIR:  %[[VAL_6:.*]] = cir.const #cir.int<32> : !s32i
+// CIR:  %[[VAL_7:.*]] = cir.get_element %[[VAL_4]][%[[VAL_6]]] : (!cir.ptr<!cir.array<!s8i x 32>>, !s32i) -> !cir.ptr<!s8i>
+// CIR:  %[[VAL_8:.*]] = cir.cast bitcast %[[VAL_7]] : !cir.ptr<!s8i> -> !cir.ptr<!void>
+// CIR:  cir.clear_cache %[[VAL_3]] : !cir.ptr<!void>, %[[VAL_8]],
+
+// LLVM-LABEL: main
+// LLVM:  call void @llvm.clear_cache(ptr @buffer, ptr getelementptr inbounds nuw (i8, ptr @buffer, i64 32))
+
+int main(void) {
+  __builtin___clear_cache(buffer, buffer+32);
+  return 0;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/cmp.cpp b/clang/test/CIR/Incubator/CodeGen/cmp.cpp
new file mode 100644
index 0000000000000..3bca55e78d13c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/cmp.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void c0(int a, int b) {
+  bool x = a > b;
+  x = a < b;
+  x = a <= b;
+  x = a >= b;
+  x = a != b;
+  x = a == b;
+}
+
+// CHECK: = cir.cmp(gt, %3, %4) : !s32i, !cir.bool
+// CHECK: = cir.cmp(lt, %6, %7) : !s32i, !cir.bool
+// CHECK: = cir.cmp(le, %9, %10) : !s32i, !cir.bool
+// CHECK: = cir.cmp(ge, %12, %13) : !s32i, !cir.bool
+// CHECK: = cir.cmp(ne, %15, %16) : !s32i, !cir.bool
+// CHECK: = cir.cmp(eq, %18, %19) : !s32i, !cir.bool
diff --git a/clang/test/CIR/Incubator/CodeGen/cold-attr.cpp b/clang/test/CIR/Incubator/CodeGen/cold-attr.cpp
new file mode 100644
index 0000000000000..0c8e0b395b081
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/cold-attr.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+// Test: Function with cold attribute
+// CIR: cir.func{{.*}}cold{{.*}}@_Z9cold_funcv
+__attribute__((cold))
+void cold_func() {}
+
+// LLVM-LABEL: define{{.*}}@_Z9cold_funcv
+// LLVM-SAME: [[COLD_ATTRS:#[0-9]+]]
+// LLVM: attributes [[COLD_ATTRS]] = {{{.*}}cold{{.*}}}
+
+// OGCG-LABEL: define{{.*}}@_Z9cold_funcv
+// OGCG-SAME: [[COLD_ATTRS_OG:#[0-9]+]]
+// OGCG: attributes [[COLD_ATTRS_OG]] = {{{.*}}cold{{.*}}}
diff --git a/clang/test/CIR/Incubator/CodeGen/comma.cpp b/clang/test/CIR/Incubator/CodeGen/comma.cpp
new file mode 100644
index 0000000000000..d0a997616ae05
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/comma.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int c0() {
+    int a = 1;
+    int b = 2;
+    return b + 1, a;
+}
+
+// CHECK: cir.func {{.*}} @_Z2c0v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#B:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CHECK: %[[#LOADED_B:]] = cir.load{{.*}} %[[#B]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[#]] = cir.binop(add, %[[#LOADED_B]], %[[#]]) nsw : !s32i
+// CHECK: %[[#LOADED_A:]] = cir.load{{.*}} %[[#A]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store{{.*}} %[[#LOADED_A]], %[[#RET]] : !s32i, !cir.ptr<!s32i>
+
+int &foo1();
+int &foo2();
+
+void c1() {
+    int &x = (foo1(), foo2());
+}
+
+// CHECK: cir.func {{.*}} @_Z2c1v()
+// CHECK: %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %1 = cir.call @_Z4foo1v() : () -> !cir.ptr<!s32i>
+// CHECK: %2 = cir.call @_Z4foo2v() : () -> !cir.ptr<!s32i>
+// CHECK: cir.store{{.*}} %2, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
diff --git a/clang/test/CIR/Incubator/CodeGen/complex-arithmetic.c b/clang/test/CIR/Incubator/CodeGen/complex-arithmetic.c
new file mode 100644
index 0000000000000..5e7b125e8e09f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/complex-arithmetic.c
@@ -0,0 +1,965 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=basic -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIRGEN,CIRGEN-BASIC,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=basic -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIRGEN,CIRGEN-BASIC,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=improved -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIRGEN,CIRGEN-IMPROVED,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=improved -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIRGEN,CIRGEN-IMPROVED,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=full -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIRGEN,CIRGEN-FULL,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=full -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIRGEN,CIRGEN-FULL,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=basic -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIR,CIR-BASIC,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=basic -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIR,CIR-BASIC,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=improved -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIR,CIR-IMPROVED,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=improved -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIR,CIR-IMPROVED,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=full -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CLANG,CIR,CIR-FULL,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=full -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir --check-prefixes=CPPLANG,CIR,CIR-FULL,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=basic -fclangir -emit-llvm -fno-clangir-call-conv-lowering -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CLANG,LLVM,LLVM-BASIC,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=basic -fclangir -emit-llvm -fno-clangir-call-conv-lowering -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CPPLANG,LLVM,LLVM-BASIC,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=improved -fclangir -emit-llvm -fno-clangir-call-conv-lowering -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CLANG,LLVM,LLVM-IMPROVED,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=improved -fclangir -emit-llvm -fno-clangir-call-conv-lowering -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CPPLANG,LLVM,LLVM-IMPROVED,CHECK %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=full -fclangir -emit-llvm -fno-clangir-call-conv-lowering -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CLANG,LLVM,LLVM-FULL,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -complex-range=full -fclangir -emit-llvm -fno-clangir-call-conv-lowering -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=CPPLANG,LLVM,LLVM-FULL,CHECK %s
+
+double _Complex cd1, cd2;
+int _Complex ci1, ci2;
+
+void add() {
+  cd1 = cd1 + cd2;
+  ci1 = ci1 + ci2;
+}
+
+// CLANG:   @add
+// CPPLANG: @_Z3addv
+
+// CIRGEN: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.complex<!s32i>
+
+//      CIR: %[[#LHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#LHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RES_REAL:]] = cir.binop(add, %[[#LHS_REAL]], %[[#RHS_REAL]]) : !cir.double
+// CIR-NEXT: %[[#RES_IMAG:]] = cir.binop(add, %[[#LHS_IMAG]], %[[#RHS_IMAG]]) : !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RES_REAL]], %[[#RES_IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#LHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#LHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RES_REAL:]] = cir.binop(add, %[[#LHS_REAL]], %[[#RHS_REAL]]) : !s32i
+// CIR-NEXT: %[[#RES_IMAG:]] = cir.binop(add, %[[#LHS_IMAG]], %[[#RHS_IMAG]]) : !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RES_REAL]], %[[#RES_IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#LHS_REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#LHS_IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RHS_REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#RHS_IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RES_REAL:]] = fadd double %[[#LHS_REAL]], %[[#RHS_REAL]]
+// LLVM-NEXT: %[[#RES_IMAG:]] = fadd double %[[#LHS_IMAG]], %[[#RHS_IMAG]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#RES_REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RES_IMAG]], 1
+
+//      LLVM: %[[#LHS_REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#LHS_IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RHS_REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#RHS_IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RES_REAL:]] = add i32 %[[#LHS_REAL]], %[[#RHS_REAL]]
+// LLVM-NEXT: %[[#RES_IMAG:]] = add i32 %[[#LHS_IMAG]], %[[#RHS_IMAG]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#RES_REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#RES_IMAG]], 1
+
+// CHECK: }
+
+void sub() {
+  cd1 = cd1 - cd2;
+  ci1 = ci1 - ci2;
+}
+
+// CLANG:   @sub
+// CPPLANG: @_Z3subv
+
+// CIRGEN: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.complex<!s32i>
+
+//      CIR: %[[#LHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#LHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RES_REAL:]] = cir.binop(sub, %[[#LHS_REAL]], %[[#RHS_REAL]]) : !cir.double
+// CIR-NEXT: %[[#RES_IMAG:]] = cir.binop(sub, %[[#LHS_IMAG]], %[[#RHS_IMAG]]) : !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RES_REAL]], %[[#RES_IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#LHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#LHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RHS_REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RHS_IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RES_REAL:]] = cir.binop(sub, %[[#LHS_REAL]], %[[#RHS_REAL]]) : !s32i
+// CIR-NEXT: %[[#RES_IMAG:]] = cir.binop(sub, %[[#LHS_IMAG]], %[[#RHS_IMAG]]) : !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RES_REAL]], %[[#RES_IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#LHS_REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#LHS_IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RHS_REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#RHS_IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RES_REAL:]] = fsub double %[[#LHS_REAL]], %[[#RHS_REAL]]
+// LLVM-NEXT: %[[#RES_IMAG:]] = fsub double %[[#LHS_IMAG]], %[[#RHS_IMAG]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#RES_REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RES_IMAG]], 1
+
+//      LLVM: %[[#LHS_REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#LHS_IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RHS_REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#RHS_IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RES_REAL:]] = sub i32 %[[#LHS_REAL]], %[[#RHS_REAL]]
+// LLVM-NEXT: %[[#RES_IMAG:]] = sub i32 %[[#LHS_IMAG]], %[[#RHS_IMAG]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#RES_REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#RES_IMAG]], 1
+
+// CHECK: }
+
+void mul() {
+  cd1 = cd1 * cd2;
+  ci1 = ci1 * ci2;
+}
+
+// CLANG:   @mul
+// CPPLANG: @_Z3mulv
+
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(basic) : !cir.complex<!cir.double>
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(basic) : !cir.complex<!s32i>
+
+//      CIR-BASIC: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !cir.double
+// CIR-BASIC-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-BASIC: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !s32i
+// CIR-BASIC-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-BASIC-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-BASIC: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#A:]] = fmul double %[[#LHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#B:]] = fmul double %[[#LHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#C:]] = fmul double %[[#LHSR]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#D:]] = fmul double %[[#LHSI]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#E:]] = fsub double %[[#A]], %[[#B]]
+// LLVM-BASIC-NEXT: %[[#F:]] = fadd double %[[#C]], %[[#D]]
+// LLVM-BASIC-NEXT: %[[#G:]] = insertvalue { double, double } undef, double %[[#E]], 0
+// LLVM-BASIC-NEXT: %{{.+}} = insertvalue { double, double } %[[#G]], double %[[#F]], 1
+
+//      LLVM-BASIC: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#C:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#D:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#E:]] = sub i32 %[[#A]], %[[#B]]
+// LLVM-BASIC-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-BASIC-NEXT: %[[#G:]] = insertvalue { i32, i32 } undef, i32 %[[#E]], 0
+// LLVM-BASIC-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#G]], i32 %[[#F]], 1
+
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(improved) : !cir.complex<!cir.double>
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(improved) : !cir.complex<!s32i>
+
+//      CIR-IMPROVED: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.double
+// CIR-IMPROVED-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-IMPROVED: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-IMPROVED-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-IMPROVED: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#A:]] = fmul double %[[#LHSR]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#B:]] = fmul double %[[#LHSI]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#C:]] = fmul double %[[#LHSR]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#D:]] = fmul double %[[#LHSI]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = fsub double %[[#A]], %[[#B]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = fadd double %[[#C]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = insertvalue { double, double } undef, double %[[#E]], 0
+// LLVM-IMPROVED-NEXT: %{{.+}} = insertvalue { double, double } %[[#G]], double %[[#F]], 1
+
+//      LLVM-IMPROVED: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#C:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#D:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = sub i32 %[[#A]], %[[#B]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = insertvalue { i32, i32 } undef, i32 %[[#E]], 0
+// LLVM-IMPROVED-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#G]], i32 %[[#F]], 1
+
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(full) : !cir.complex<!cir.double>
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(full) : !cir.complex<!s32i>
+
+//      CIR-FULL: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !cir.double
+// CIR-FULL-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !cir.double
+// CIR-FULL-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !cir.double
+// CIR-FULL-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !cir.double
+// CIR-FULL-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.double
+// CIR-FULL-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !cir.double
+// CIR-FULL-NEXT: %[[#RES:]] = cir.complex.create %[[#E]], %[[#F]] : !cir.double -> !cir.complex<!cir.double>
+// CIR-FULL-NEXT: %[[#COND:]] = cir.cmp(ne, %[[#E]], %[[#E]]) : !cir.double, !cir.bool
+// CIR-FULL-NEXT: %[[#COND2:]] = cir.cmp(ne, %[[#F]], %[[#F]]) : !cir.double, !cir.bool
+// CIR-FULL-NEXT: %[[#G:]] = cir.const #false
+// CIR-FULL-NEXT: %[[#H:]] = cir.select if %[[#COND]] then %[[#COND2]] else %[[#G]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+// CIR-FULL-NEXT: %{{.+}} = cir.ternary(%[[#H]], true {
+// CIR-FULL-NEXT:   %[[#RES2:]] = cir.call @__muldc3(%[[#LHSR]], %[[#LHSI]], %[[#RHSR]], %[[#RHSI]]) : (!cir.double, !cir.double, !cir.double, !cir.double) -> !cir.complex<!cir.double>
+// CIR-FULL-NEXT:   cir.yield %[[#RES2]] : !cir.complex<!cir.double>
+// CIR-FULL-NEXT: }, false {
+// CIR-FULL-NEXT:   cir.yield %[[#RES]] : !cir.complex<!cir.double>
+// CIR-FULL-NEXT: }) : (!cir.bool) -> !cir.complex<!cir.double>
+
+//      CIR-FULL: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#C:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#D:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#E:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !s32i
+// CIR-FULL-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-FULL-NEXT: %{{.+}} = cir.complex.create %[[#E]], %[[#F]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-FULL:   %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-FULL-NEXT:   %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-FULL-NEXT:   %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-FULL-NEXT:   %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-FULL-NEXT:   %[[#A:]] = fmul double %[[#LHSR]], %[[#RHSR]]
+// LLVM-FULL-NEXT:   %[[#B:]] = fmul double %[[#LHSI]], %[[#RHSI]]
+// LLVM-FULL-NEXT:   %[[#C:]] = fmul double %[[#LHSR]], %[[#RHSI]]
+// LLVM-FULL-NEXT:   %[[#D:]] = fmul double %[[#LHSI]], %[[#RHSR]]
+// LLVM-FULL-NEXT:   %[[#E:]] = fsub double %[[#A]], %[[#B]]
+// LLVM-FULL-NEXT:   %[[#F:]] = fadd double %[[#C]], %[[#D]]
+// LLVM-FULL-NEXT:   %[[#G:]] = insertvalue { double, double } undef, double %[[#E]], 0
+// LLVM-FULL-NEXT:   %[[#RES:]] = insertvalue { double, double } %[[#G]], double %[[#F]], 1
+// LLVM-FULL-NEXT:   %[[#COND:]] = fcmp une double %[[#E]], %[[#E]]
+// LLVM-FULL-NEXT:   %[[#COND2:]] = fcmp une double %[[#F]], %[[#F]]
+// LLVM-FULL-NEXT:   %[[#COND3:]] = and i1 %[[#COND]], %[[#COND2]]
+//      LLVM-FULL: {{.+}}:
+// LLVM-FULL-NEXT:   %{{.+}} = call { double, double } @__muldc3(double %[[#LHSR]], double %[[#LHSI]], double %[[#RHSR]], double %[[#RHSI]])
+// LLVM-FULL-NEXT:   br label %{{.+}}
+//      LLVM-FULL: {{.+}}:
+// LLVM-FULL-NEXT:   br label %{{.+}}
+
+//      LLVM-FULL: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#C:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#D:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#E:]] = sub i32 %[[#A]], %[[#B]]
+// LLVM-FULL-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-FULL-NEXT: %[[#G:]] = insertvalue { i32, i32 } undef, i32 %[[#E]], 0
+// LLVM-FULL-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#G]], i32 %[[#F]], 1
+
+// CHECK: }
+
+void div() {
+  cd1 = cd1 / cd2;
+  ci1 = ci1 / ci2;
+}
+
+// CLANG:   @div
+// CPPLANG: @_Z3divv
+
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(basic) : !cir.complex<!cir.double>
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(basic) : !cir.complex<!s32i>
+
+//      CIR-BASIC: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BASIC-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#C:]] = cir.binop(mul, %[[#RHSR]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#D:]] = cir.binop(mul, %[[#RHSI]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#E:]] = cir.binop(add, %[[#A]], %[[#B]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#G:]] = cir.binop(div, %[[#E]], %[[#F]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#H:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#I:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#J:]] = cir.binop(sub, %[[#H]], %[[#I]]) : !cir.double
+// CIR-BASIC-NEXT: %[[#K:]] = cir.binop(div, %[[#J]], %[[#F]]) : !cir.double
+// CIR-BASIC-NEXT: %{{.+}} = cir.complex.create %[[#G]], %[[#K]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-BASIC: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BASIC-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#C:]] = cir.binop(mul, %[[#RHSR]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#D:]] = cir.binop(mul, %[[#RHSI]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#E:]] = cir.binop(add, %[[#A]], %[[#B]]) : !s32i
+// CIR-BASIC-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-BASIC-NEXT: %[[#G:]] = cir.binop(div, %[[#E]], %[[#F]]) : !s32i
+// CIR-BASIC-NEXT: %[[#H:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-BASIC-NEXT: %[[#I:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-BASIC-NEXT: %[[#J:]] = cir.binop(sub, %[[#H]], %[[#I]]) : !s32i
+// CIR-BASIC-NEXT: %[[#K:]] = cir.binop(div, %[[#J]], %[[#F]]) : !s32i
+// CIR-BASIC-NEXT: %{{.+}} = cir.complex.create %[[#G]], %[[#K]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-BASIC: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#A:]] = fmul double %[[#LHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#B:]] = fmul double %[[#LHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#C:]] = fmul double %[[#RHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#D:]] = fmul double %[[#RHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#E:]] = fadd double %[[#A]], %[[#B]]
+// LLVM-BASIC-NEXT: %[[#F:]] = fadd double %[[#C]], %[[#D]]
+// LLVM-BASIC-NEXT: %[[#G:]] = fdiv double %[[#E]], %[[#F]]
+// LLVM-BASIC-NEXT: %[[#H:]] = fmul double %[[#LHSI]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#I:]] = fmul double %[[#LHSR]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#J:]] = fsub double %[[#H]], %[[#I]]
+// LLVM-BASIC-NEXT: %[[#K:]] = fdiv double %[[#J]], %[[#F]]
+// LLVM-BASIC-NEXT: %[[#L:]] = insertvalue { double, double } undef, double %[[#G]], 0
+// LLVM-BASIC-NEXT: %{{.+}} = insertvalue { double, double } %[[#L]], double %[[#K]], 1
+
+//      LLVM-BASIC: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-BASIC-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-BASIC-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#C:]] = mul i32 %[[#RHSR]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#D:]] = mul i32 %[[#RHSI]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#E:]] = add i32 %[[#A]], %[[#B]]
+// LLVM-BASIC-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-BASIC-NEXT: %[[#G:]] = sdiv i32 %[[#E]], %[[#F]]
+// LLVM-BASIC-NEXT: %[[#H:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-BASIC-NEXT: %[[#I:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-BASIC-NEXT: %[[#J:]] = sub i32 %[[#H]], %[[#I]]
+// LLVM-BASIC-NEXT: %[[#K:]] = sdiv i32 %[[#J]], %[[#F]]
+// LLVM-BASIC-NEXT: %[[#L:]] = insertvalue { i32, i32 } undef, i32 %[[#G]], 0
+// LLVM-BASIC-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#L]], i32 %[[#K]], 1
+
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(improved) : !cir.complex<!cir.double>
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(improved) : !cir.complex<!s32i>
+
+//      CIR-IMPROVED: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-IMPROVED-NEXT: %[[#A:]] = cir.fabs %[[#RHSR]] : !cir.double
+// CIR-IMPROVED-NEXT: %[[#B:]] = cir.fabs %[[#RHSI]] : !cir.double
+// CIR-IMPROVED-NEXT: %[[#C:]] = cir.cmp(ge, %[[#A]], %[[#B]]) : !cir.double, !cir.bool
+// CIR-IMPROVED-NEXT: %{{.+}} = cir.ternary(%[[#C]], true {
+// CIR-IMPROVED-NEXT:   %[[#D:]] = cir.binop(div, %[[#RHSI]], %[[#RHSR]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#E:]] = cir.binop(mul, %[[#D]], %[[#RHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#F:]] = cir.binop(add, %[[#RHSR]], %[[#E]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#G:]] = cir.binop(mul, %[[#LHSI]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#H:]] = cir.binop(add, %[[#LHSR]], %[[#G]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#I:]] = cir.binop(div, %[[#H]], %[[#F]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#J:]] = cir.binop(mul, %[[#LHSR]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#K:]] = cir.binop(sub, %[[#LHSI]], %[[#J]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#L:]] = cir.binop(div, %[[#K]], %[[#F]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#M:]] = cir.complex.create %[[#I]], %[[#L]] : !cir.double -> !cir.complex<!cir.double>
+// CIR-IMPROVED-NEXT:   cir.yield %[[#M]] : !cir.complex<!cir.double>
+// CIR-IMPROVED-NEXT: }, false {
+// CIR-IMPROVED-NEXT:   %[[#D:]] = cir.binop(div, %[[#RHSR]], %[[#RHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#E:]] = cir.binop(mul, %[[#D]], %[[#RHSR]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#F:]] = cir.binop(add, %[[#RHSI]], %[[#E]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#G:]] = cir.binop(mul, %[[#LHSR]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#H:]] = cir.binop(add, %[[#G]], %[[#LHSI]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#I:]] = cir.binop(div, %[[#H]], %[[#F]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#J:]] = cir.binop(mul, %[[#LHSI]], %[[#D]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#K:]] = cir.binop(sub, %[[#J]], %4) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#L:]] = cir.binop(div, %[[#K]], %[[#F]]) : !cir.double
+// CIR-IMPROVED-NEXT:   %[[#M:]] = cir.complex.create %[[#I]], %[[#L]] : !cir.double -> !cir.complex<!cir.double>
+// CIR-IMPROVED-NEXT:   cir.yield %[[#M]] : !cir.complex<!cir.double>
+// CIR-IMPROVED-NEXT: }) : (!cir.bool) -> !cir.complex<!cir.double>
+
+//      CIR-IMPROVED: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-IMPROVED-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#C:]] = cir.binop(mul, %[[#RHSR]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#D:]] = cir.binop(mul, %[[#RHSI]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#E:]] = cir.binop(add, %[[#A]], %[[#B]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#G:]] = cir.binop(div, %[[#E]], %[[#F]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#H:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#I:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#J:]] = cir.binop(sub, %[[#H]], %[[#I]]) : !s32i
+// CIR-IMPROVED-NEXT: %[[#K:]] = cir.binop(div, %[[#J]], %[[#F]]) : !s32i
+// CIR-IMPROVED-NEXT: %{{.+}} = cir.complex.create %[[#G]], %[[#K]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-IMPROVED: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#A:]] = call double @llvm.fabs.f64(double %[[#RHSR]])
+// LLVM-IMPROVED-NEXT: %[[#B:]] = call double @llvm.fabs.f64(double %[[#RHSI]])
+// LLVM-IMPROVED-NEXT: %[[#C:]] = fcmp oge double %[[#A]], %[[#B]]
+// LLVM-IMPROVED-NEXT: br i1 %[[#C]], label %[[#LA:]], label %[[#LB:]]
+//      LLVM-IMPROVED: [[#LA]]:
+// LLVM-IMPROVED-NEXT: %[[#D:]] = fdiv double %[[#RHSI]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = fmul double %[[#D]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = fadd double %[[#RHSR]], %[[#E]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = fmul double %[[#LHSI]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#H:]] = fadd double %[[#LHSR]], %[[#G]]
+// LLVM-IMPROVED-NEXT: %[[#I:]] = fdiv double %[[#H]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#J:]] = fmul double %[[#LHSR]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#K:]] = fsub double %[[#LHSI]], %[[#J]]
+// LLVM-IMPROVED-NEXT: %[[#L:]] = fdiv double %[[#K]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#M:]] = insertvalue { double, double } undef, double %[[#I]], 0
+// LLVM-IMPROVED-NEXT: %[[#N1:]] = insertvalue { double, double } %[[#M]], double %[[#L]], 1
+// LLVM-IMPROVED-NEXT: br label %[[#LC:]]
+//      LLVM-IMPROVED: [[#LB]]:
+// LLVM-IMPROVED-NEXT: %[[#D:]] = fdiv double %[[#RHSR]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = fmul double %[[#D]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = fadd double %[[#RHSI]], %[[#E]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = fmul double %[[#LHSR]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#H:]] = fadd double %[[#G]], %[[#LHSI]]
+// LLVM-IMPROVED-NEXT: %[[#I:]] = fdiv double %[[#H]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#J:]] = fmul double %[[#LHSI]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#K:]] = fsub double %[[#J]], %[[#LHSR]]
+// LLVM-IMPROVED-NEXT: %[[#L:]] = fdiv double %[[#K]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#M:]] = insertvalue { double, double } undef, double %[[#I]], 0
+// LLVM-IMPROVED-NEXT: %[[#N2:]] = insertvalue { double, double } %[[#M]], double %[[#L]], 1
+// LLVM-IMPROVED-NEXT: br label %[[#LC]]
+//      LLVM-IMPROVED: [[#LC]]:
+// LLVM-IMPROVED-NEXT: %{{.+}} = phi { double, double } [ %[[#N2]], %[[#LB]] ], [ %[[#N1]], %[[#LA]] ]
+
+//      LLVM-IMPROVED: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-IMPROVED-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-IMPROVED-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#C:]] = mul i32 %[[#RHSR]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#D:]] = mul i32 %[[#RHSI]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#E:]] = add i32 %[[#A]], %[[#B]]
+// LLVM-IMPROVED-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-IMPROVED-NEXT: %[[#G:]] = sdiv i32 %[[#E]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#H:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-IMPROVED-NEXT: %[[#I:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-IMPROVED-NEXT: %[[#J:]] = sub i32 %[[#H]], %[[#I]]
+// LLVM-IMPROVED-NEXT: %[[#K:]] = sdiv i32 %[[#J]], %[[#F]]
+// LLVM-IMPROVED-NEXT: %[[#L:]] = insertvalue { i32, i32 } undef, i32 %[[#G]], 0
+// LLVM-IMPROVED-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#L]], i32 %[[#K]], 1
+
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(full) : !cir.complex<!cir.double>
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(full) : !cir.complex<!s32i>
+
+//      CIR-FULL: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-FULL-NEXT: %{{.+}} = cir.call @__divdc3(%[[#LHSR]], %[[#LHSI]], %[[#RHSR]], %[[#RHSI]]) : (!cir.double, !cir.double, !cir.double, !cir.double) -> !cir.complex<!cir.double>
+
+//      CIR-FULL: %[[#LHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#LHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#RHSR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#RHSI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-FULL-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#B:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#C:]] = cir.binop(mul, %[[#RHSR]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#D:]] = cir.binop(mul, %[[#RHSI]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#E:]] = cir.binop(add, %[[#A]], %[[#B]]) : !s32i
+// CIR-FULL-NEXT: %[[#F:]] = cir.binop(add, %[[#C]], %[[#D]]) : !s32i
+// CIR-FULL-NEXT: %[[#G:]] = cir.binop(div, %[[#E]], %[[#F]]) : !s32i
+// CIR-FULL-NEXT: %[[#H:]] = cir.binop(mul, %[[#LHSI]], %[[#RHSR]]) : !s32i
+// CIR-FULL-NEXT: %[[#I:]] = cir.binop(mul, %[[#LHSR]], %[[#RHSI]]) : !s32i
+// CIR-FULL-NEXT: %[[#J:]] = cir.binop(sub, %[[#H]], %[[#I]]) : !s32i
+// CIR-FULL-NEXT: %[[#K:]] = cir.binop(div, %[[#J]], %[[#F]]) : !s32i
+// CIR-FULL-NEXT: %{{.+}} = cir.complex.create %[[#G]], %[[#K]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM-FULL: %[[#LHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#LHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#RHSR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#RHSI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-FULL-NEXT: %{{.+}} = call { double, double } @__divdc3(double %[[#LHSR]], double %[[#LHSI]], double %[[#RHSR]], double %[[#RHSI]])
+
+//      LLVM-FULL: %[[#LHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#LHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#RHSR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-FULL-NEXT: %[[#RHSI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-FULL-NEXT: %[[#A:]] = mul i32 %[[#LHSR]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#B:]] = mul i32 %[[#LHSI]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#C:]] = mul i32 %[[#RHSR]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#D:]] = mul i32 %[[#RHSI]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#E:]] = add i32 %[[#A]], %[[#B]]
+// LLVM-FULL-NEXT: %[[#F:]] = add i32 %[[#C]], %[[#D]]
+// LLVM-FULL-NEXT: %[[#G:]] = sdiv i32 %[[#E]], %[[#F]]
+// LLVM-FULL-NEXT: %[[#H:]] = mul i32 %[[#LHSI]], %[[#RHSR]]
+// LLVM-FULL-NEXT: %[[#I:]] = mul i32 %[[#LHSR]], %[[#RHSI]]
+// LLVM-FULL-NEXT: %[[#J:]] = sub i32 %[[#H]], %[[#I]]
+// LLVM-FULL-NEXT: %[[#K:]] = sdiv i32 %[[#J]], %[[#F]]
+// LLVM-FULL-NEXT: %[[#L:]] = insertvalue { i32, i32 } undef, i32 %[[#G]], 0
+// LLVM-FULL-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#L]], i32 %[[#K]], 1
+
+// CHECK: }
+
+void add_assign() {
+  cd1 += cd2;
+  ci1 += ci2;
+}
+
+// CLANG:   @add_assign
+// CPPLANG: @_Z10add_assignv
+
+// CIRGEN: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.complex<!s32i>
+
+// CHECK: }
+
+void add_assign_float16() {
+  _Float16 _Complex a;
+  _Float16 _Complex b;
+  a += b;
+}
+
+//   CLANG: @add_assign_float16
+// CPPLANG: @_Z18add_assign_float16v
+
+// CIRGRN: %{{.*}} = cir.binop(add, %{{.*}}, %{{.*}}) : !cir.complex<!cir.f16>
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["b"]
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
+// CIR: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[B_REAL_F32:.*]] = cir.cast floating %[[B_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[B_IMAG_F32:.*]] = cir.cast floating %[[B_IMAG]] : !cir.f16 -> !cir.float
+// CIR: %[[B_F32_COMPLEX:.*]] = cir.complex.create %[[B_REAL_F32]], %[[B_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_REAL_F32:.*]] = cir.cast floating %[[A_REAL]] : !cir.f16 -> !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast floating %[[A_IMAG]] : !cir.f16 -> !cir.float
+// CIR: %[[A_F32_COMPLEX:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: %[[A_F32_REAL:.*]] = cir.complex.real %[[A_F32_COMPLEX]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[A_F32_IMAG:.*]] = cir.complex.imag %[[A_F32_COMPLEX]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[B_F32_REAL:.*]] = cir.complex.real %[[B_F32_COMPLEX]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[B_F32_IMAG:.*]] = cir.complex.imag %[[B_F32_COMPLEX]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[ADD_REAL:.*]] = cir.binop(add, %[[A_F32_REAL]], %[[B_F32_REAL]]) : !cir.float
+// CIR: %[[ADD_IMAG:.*]] = cir.binop(add, %[[A_F32_IMAG]], %[[B_F32_IMAG]]) : !cir.float
+// CIR: %[[RESULT:.*]] = cir.complex.create %[[ADD_REAL]], %[[ADD_IMAG]] : !cir.float -> !cir.complex<!cir.float>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
+// LLVM: %[[B_ADDR:.*]] = alloca { half, half }, i64 1, align 2
+// LLVM: %[[TMP_B:.*]] = load { half, half }, ptr %[[B_ADDR]], align 2
+// LLVM: %[[B_REAL:.*]] = extractvalue { half, half } %[[TMP_B]], 0
+// LLVM: %[[B_IMAG:.*]] = extractvalue { half, half } %[[TMP_B]], 1
+// LLVM: %[[B_REAL_F32:.*]] = fpext half %[[B_REAL]] to float
+// LLVM: %[[B_IMAG_F32:.*]] = fpext half %[[B_IMAG]] to float
+// LLVM: %[[TMP_B_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[B_REAL_F32]], 0
+// LLVM: %[[B_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_B_COMPLEX_F32]], float %[[B_IMAG_F32]], 1
+// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2
+// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1
+// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
+// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
+// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0
+// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_A_COMPLEX_F32]], float %[[A_IMAG_F32]], 1
+// LLVM: %[[RESULT_REAL_F32:.*]] = fadd float %[[A_REAL_F32]], %[[B_REAL_F32]]
+// LLVM: %[[RESULT_IMAG_F32:.*]] = fadd float %[[A_IMAG_F32]], %[[B_IMAG_F32]]
+// LLVM: %[[TMP_RESULT:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL_F32]], 0
+// LLVM: %[[RESULT:.*]] = insertvalue { float, float } %[[TMP_RESULT]], float %[[RESULT_IMAG_F32]], 1
+// LLVM: %[[RESULT_REAL_F16:.*]] = fptrunc float %[[RESULT_REAL_F32]] to half
+// LLVM: %[[RESULT_IMAG_F16:.*]] = fptrunc float %[[RESULT_IMAG_F32]] to half
+// LLVM: %[[TMP_RESULT_F16:.*]] = insertvalue { half, half } {{.*}}, half %[[RESULT_REAL_F16]], 0
+// LLVM: %[[RESULT_F16:.*]] = insertvalue { half, half } %[[TMP_RESULT_F16]], half %[[RESULT_IMAG_F16]], 1
+// LLVM: store { half, half } %[[RESULT_F16]], ptr %[[A_ADDR]], align 2
+
+// CHECK: }
+
+void sub_assign() {
+  cd1 -= cd2;
+  ci1 -= ci2;
+}
+
+//   CLANG: @sub_assign
+// CPPLANG: @_Z10sub_assignv
+
+// CIRGEN: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.complex<!s32i>
+
+// CHECK: }
+
+void mul_assign() {
+  cd1 *= cd2;
+  ci1 *= ci2;
+}
+
+//   CLANG: @mul_assign
+// CPPLANG: @_Z10mul_assignv
+
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(basic) : !cir.complex<!cir.double>
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(basic) : !cir.complex<!s32i>
+
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(improved) : !cir.complex<!cir.double>
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(improved) : !cir.complex<!s32i>
+
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(full) : !cir.complex<!cir.double>
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop mul %{{.+}}, %{{.+}} range(full) : !cir.complex<!s32i>
+
+// CHECK: }
+
+void div_assign() {
+  cd1 /= cd2;
+  ci1 /= ci2;
+}
+
+//   CLANG: @div_assign
+// CPPLANG: @_Z10div_assignv
+
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(basic) : !cir.complex<!cir.double>
+// CIRGEN-BASIC: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(basic) : !cir.complex<!s32i>
+
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(improved) : !cir.complex<!cir.double>
+// CIRGEN-IMPROVED: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(improved) : !cir.complex<!s32i>
+
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(full) : !cir.complex<!cir.double>
+// CIRGEN-FULL: %{{.+}} = cir.complex.binop div %{{.+}}, %{{.+}} range(full) : !cir.complex<!s32i>
+
+// CHECK: }
+
+void unary_plus() {
+  cd1 = +cd1;
+  ci1 = +ci1;
+}
+
+//   CLANG: @unary_plus
+// CPPLANG: @_Z10unary_plusv
+
+// CIRGEN: %{{.+}} = cir.unary(plus, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(plus, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RESR:]] = cir.unary(plus, %[[#OPR]]) : !cir.double, !cir.double
+// CIR-NEXT: %[[#RESI:]] = cir.unary(plus, %[[#OPI]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RESR]], %[[#RESI]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RESR:]] = cir.unary(plus, %[[#OPR]]) : !s32i, !s32i
+// CIR-NEXT: %[[#RESI:]] = cir.unary(plus, %[[#OPI]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RESR]], %[[#RESI]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#OPR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#OPI]], 1
+
+//      LLVM: %[[#OPR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#OPI]], 1
+
+// CHECK: }
+
+void unary_minus() {
+  cd1 = -cd1;
+  ci1 = -ci1;
+}
+
+//   CLANG: @unary_minus
+// CPPLANG: @_Z11unary_minusv
+
+// CIRGEN: %{{.+}} = cir.unary(minus, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(minus, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RESR:]] = cir.unary(minus, %[[#OPR]]) : !cir.double, !cir.double
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RESR]], %[[#RESI]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RESR:]] = cir.unary(minus, %[[#OPR]]) : !s32i, !s32i
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#RESR]], %[[#RESI]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#OPR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESR:]] = fneg double %[[#OPR]]
+// LLVM-NEXT: %[[#RESI:]] = fneg double %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#RESR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RESI]], 1
+
+//      LLVM: %[[#OPR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESR:]] = sub i32 0, %[[#OPR]]
+// LLVM-NEXT: %[[#RESI:]] = sub i32 0, %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#RESR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#RESI]], 1
+
+// CHECK: }
+
+void unary_not() {
+  cd1 = ~cd1;
+  ci1 = ~ci1;
+}
+
+//   CLANG: @unary_not
+// CPPLANG: @_Z9unary_notv
+
+// CIRGEN: %{{.+}} = cir.unary(not, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(not, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#OPR]], %[[#RESI]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#OPR]], %[[#RESI]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#OPR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESI:]] = fneg double %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RESI]], 1
+
+//      LLVM: %[[#OPR:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESI:]] = sub i32 0, %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#RESI]], 1
+
+// CHECK: }
+
+void builtin_conj() {
+  cd1 = __builtin_conj(cd1);
+}
+
+//   CLANG: @builtin_conj
+// CPPLANG: @_Z12builtin_conjv
+
+// CIRGEN: %{{.+}} = cir.unary(not, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+
+//      CIR: %[[#OPR:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#OPI:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#RESI:]] = cir.unary(minus, %[[#OPI]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#OPR]], %[[#RESI]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[#OPR:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#OPI:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#RESI:]] = fneg double %[[#OPI]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#OPR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#RESI]], 1
+
+// CHECK: }
+
+void pre_increment() {
+  ++cd1;
+  ++ci1;
+}
+
+//   CLANG: @pre_increment
+// CPPLANG: @_Z13pre_incrementv
+
+// CIRGEN: %{{.+}} = cir.unary(inc, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(inc, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#IR:]] = cir.unary(inc, %[[#R]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#IR:]] = cir.unary(inc, %[[#R]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#R:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = fadd double 1.000000e+00, %[[#R]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#I]], 1
+
+//      LLVM: %[[#R:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = add i32 %[[#R]], 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#I]], 1
+
+// CHECK: }
+
+void post_increment() {
+  cd1++;
+  ci1++;
+}
+
+//   CLANG: @post_increment
+// CPPLANG: @_Z14post_incrementv
+
+// CIRGEN: %{{.+}} = cir.unary(inc, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(inc, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#IR:]] = cir.unary(inc, %[[#R]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#IR:]] = cir.unary(inc, %[[#R]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#R:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = fadd double 1.000000e+00, %[[#R]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#I]], 1
+
+//      LLVM: %[[#R:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = add i32 %[[#R]], 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#I]], 1
+
+// CHECK: }
+
+void pre_decrement() {
+  --cd1;
+  --ci1;
+}
+
+//   CLANG: @pre_decrement
+// CPPLANG: @_Z13pre_decrementv
+
+// CIRGEN: %{{.+}} = cir.unary(dec, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(dec, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#IR:]] = cir.unary(dec, %[[#R]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#IR:]] = cir.unary(dec, %[[#R]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#R:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = fadd double -1.000000e+00, %[[#R]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#I]], 1
+
+//      LLVM: %[[#R:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = sub i32 %[[#R]], 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#I]], 1
+
+// CHECK: }
+
+void post_decrement() {
+  cd1--;
+  ci1--;
+}
+
+//   CLANG: @post_decrement
+// CPPLANG: @_Z14post_decrementv
+
+// CIRGEN: %{{.+}} = cir.unary(dec, %{{.+}}) : !cir.complex<!cir.double>, !cir.complex<!cir.double>
+// CIRGEN: %{{.+}} = cir.unary(dec, %{{.+}}) : !cir.complex<!s32i>, !cir.complex<!s32i>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-NEXT: %[[#IR:]] = cir.unary(dec, %[[#R]]) : !cir.double, !cir.double
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR: %[[#R:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#I:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-NEXT: %[[#IR:]] = cir.unary(dec, %[[#R]]) : !s32i, !s32i
+// CIR-NEXT: %{{.+}} = cir.complex.create %[[#IR]], %[[#I]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#R:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = fadd double -1.000000e+00, %[[#R]]
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } undef, double %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double %[[#I]], 1
+
+//      LLVM: %[[#R:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[#I:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT: %[[#IR:]] = sub i32 %[[#R]], 1
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } undef, i32 %[[#IR]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 %[[#I]], 1
+
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/complex-builtins.cpp b/clang/test/CIR/Incubator/CodeGen/complex-builtins.cpp
new file mode 100644
index 0000000000000..d0f47751a4d0f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/complex-builtins.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+
+void complex_choose_expr() {
+ int _Complex a;
+ int _Complex b;
+ int _Complex r = __builtin_choose_expr(true, a, b);
+}
+
+// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
+// CIR: %[[RESULT:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["r", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[RESULT]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[RESULT:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[COMPLEX_A]], align 4
+// LLVM: store { i32, i32 } %[[TMP_A]], ptr %[[RESULT]], align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/complex-cast.c b/clang/test/CIR/Incubator/CodeGen/complex-cast.c
new file mode 100644
index 0000000000000..a9d67a17795b2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/complex-cast.c
@@ -0,0 +1,264 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare -o %t.cir %s 2>&1 | FileCheck --check-prefixes=CIR-BEFORE,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare -o %t.cir %s 2>&1 | FileCheck --check-prefixes=CIR-AFTER,CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=LLVM,CHECK %s
+
+#include <stdbool.h>
+
+volatile double _Complex cd;
+volatile float _Complex cf;
+volatile int _Complex ci;
+volatile short _Complex cs;
+volatile double sd;
+volatile int si;
+volatile bool b;
+
+void scalar_to_complex() {
+  cd = sd;
+  ci = si;
+  cd = si;
+  ci = sd;
+}
+
+// CHECK-LABEL: @scalar_to_complex()
+
+// CIR-BEFORE: %{{.+}} = cir.cast float_to_complex %{{.+}} : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[#REAL:]] = cir.load volatile{{.*}}  %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+// CIR-BEFORE: %{{.+}} = cir.cast int_to_complex %{{.+}} : !s32i -> !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[#REAL:]] = cir.load volatile{{.*}}  %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.double
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast float_to_complex %[[#A]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[#A:]] = cir.load volatile{{.*}}  %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR-AFTER-NEXT: %[[#REAL:]] = cir.cast int_to_float %[[#A]] : !s32i -> !cir.double
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast float_to_int %{{.+}} : !cir.double -> !s32i
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast int_to_complex %[[#A]] : !s32i -> !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[#A:]] = cir.load volatile{{.*}}  %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CIR-AFTER-NEXT: %[[#REAL:]] = cir.cast float_to_int %[[#A]] : !cir.double -> !s32i
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#REAL:]] = load volatile double, ptr @sd, align 8
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } {{.*}}, double %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double 0.000000e+00, 1
+
+//      LLVM: %[[#REAL:]] = load volatile i32, ptr @si, align 4
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } {{.*}}, i32 %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 0, 1
+
+//      LLVM: %[[#A:]] = load volatile i32, ptr @si, align 4
+// LLVM-NEXT: %[[#REAL:]] = sitofp i32 %[[#A]] to double
+// LLVM-NEXT: %[[#B:]] = insertvalue { double, double } {{.*}}, double %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#B]], double 0.000000e+00, 1
+
+//      LLVM: %[[#A:]] = load volatile double, ptr @sd, align 8
+// LLVM-NEXT: %[[#REAL:]] = fptosi double %[[#A]] to i32
+// LLVM-NEXT: %[[#B:]] = insertvalue { i32, i32 } {{.*}}, i32 %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#B]], i32 0, 1
+
+// CHECK: }
+
+void scalar_to_complex_explicit() {
+  cd = (double _Complex)sd;
+  ci = (int _Complex)si;
+  cd = (double _Complex)si;
+  ci = (int _Complex)sd;
+}
+
+// CHECK-LABEL: @scalar_to_complex_explicit()
+
+// CIR-BEFORE: %{{.+}} = cir.cast float_to_complex %{{.+}} : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[#IMAG:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %{{.+}}, %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[#A:]] = insertvalue { double, double } {{.*}}, double %{{.+}}, 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double 0.000000e+00, 1
+
+// CIR-BEFORE: %{{.+}} = cir.cast int_to_complex %{{.+}} : !s32i -> !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[#IMAG:]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %{{.+}}, %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#A:]] = insertvalue { i32, i32 } {{.*}}, i32 %{{.+}}, 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 0, 1
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.double
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast float_to_complex %[[#A]] : !cir.double -> !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[#REAL:]] = cir.cast int_to_float %11 : !s32i -> !cir.double
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[#REAL:]] = sitofp i32 %{{.+}} to double
+// LLVM-NEXT: %[[#A:]] = insertvalue { double, double } {{.*}}, double %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { double, double } %[[#A]], double 0.000000e+00, 1
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast float_to_int %{{.+}} : !cir.double -> !s32i
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast int_to_complex %[[#A]] : !s32i -> !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[#REAL:]] = cir.cast float_to_int %{{.+}} : !cir.double -> !s32i
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[#REAL:]] = fptosi double %{{.+}} to i32
+// LLVM-NEXT: %[[#A:]] = insertvalue { i32, i32 } {{.*}}, i32 %[[#REAL]], 0
+// LLVM-NEXT: %{{.+}} = insertvalue { i32, i32 } %[[#A]], i32 0, 1
+
+// CHECK: }
+
+void complex_to_scalar() {
+  sd = (double)cd;
+  si = (int)ci;
+  sd = (double)ci;
+  si = (int)cd;
+}
+
+// CHECK-LABEL: @complex_to_scalar()
+
+// CIR-BEFORE: %{{.+}} = cir.cast float_complex_to_real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+
+// CIR-AFTER: %{{.+}} = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+
+// LLVM: %{{.+}} = extractvalue { double, double } %{{.+}}, 0
+
+// CIR-BEFORE: %{{.+}} = cir.cast int_complex_to_real %{{.+}} : !cir.complex<!s32i> -> !s32i
+
+// CIR-AFTER: %{{.+}} = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+
+// LLVM: %{{.+}} = extractvalue { i32, i32 } %{{.+}}, 0
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast int_complex_to_real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast int_to_float %[[#A]] : !s32i -> !cir.double
+
+//      CIR-AFTER: %[[#A:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-NEXT: %{{.+}} = cir.cast int_to_float %[[#A]] : !s32i -> !cir.double
+
+//      LLVM: %[[#A:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %{{.+}} = sitofp i32 %[[#A]] to double
+
+//      CIR-BEFORE: %[[#A:]] = cir.cast float_complex_to_real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-BEFORE-NEXT: %{{.+}} = cir.cast float_to_int %[[#A]] : !cir.double -> !s32i
+
+//      CIR-AFTER: %[[#A:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-AFTER-NEXT: %{{.+}} = cir.cast float_to_int %[[#A]] : !cir.double -> !s32i
+
+//      LLVM: %[[#A:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %{{.+}} = fptosi double %[[#A]] to i32
+
+// CHECK: }
+
+void complex_to_bool() {
+  b = (bool)cd;
+  b = (bool)ci;
+}
+
+// CHECK-LABEL: @complex_to_bool()
+
+// CIR-BEFORE: %{{.+}} = cir.cast float_complex_to_bool %{{.+}} : !cir.complex<!cir.double> -> !cir.bool
+
+//      CIR-AFTER: %[[#REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-AFTER-NEXT: %[[#RB:]] = cir.cast float_to_bool %[[#REAL]] : !cir.double -> !cir.bool
+// CIR-AFTER-NEXT: %[[#IB:]] = cir.cast float_to_bool %[[#IMAG]] : !cir.double -> !cir.bool
+// CIR-AFTER-NEXT: %[[#A:]] = cir.const #true
+// CIR-AFTER-NEXT: %{{.+}} = cir.select if %[[#RB]] then %[[#A]] else %[[#IB]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+
+//      LLVM:   %[[#REAL:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT:   %[[#IMAG:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT:   %[[#RB:]] = fcmp une double %[[#REAL]], 0.000000e+00
+// LLVM-NEXT:   %[[#IB:]] = fcmp une double %[[#IMAG]], 0.000000e+00
+// LLVM-NEXT:   %{{.+}} = or i1 %[[#RB]], %[[#IB]]
+
+// CIR-BEFORE: %{{.+}} = cir.cast int_complex_to_bool %{{.+}} : !cir.complex<!s32i> -> !cir.bool
+
+//      CIR-AFTER: %[[#REAL:]] = cir.complex.real %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-NEXT: %[[#IMAG:]] = cir.complex.imag %{{.+}} : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-NEXT: %[[#RB:]] = cir.cast int_to_bool %[[#REAL]] : !s32i -> !cir.bool
+// CIR-AFTER-NEXT: %[[#IB:]] = cir.cast int_to_bool %[[#IMAG]] : !s32i -> !cir.bool
+// CIR-AFTER-NEXT: %[[#A:]] = cir.const #true
+// CIR-AFTER-NEXT: %{{.+}} = cir.select if %[[#RB]] then %[[#A]] else %[[#IB]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+
+//      LLVM:   %[[#REAL:]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT:   %[[#IMAG:]] = extractvalue { i32, i32 } %{{.+}}, 1
+// LLVM-NEXT:   %[[#RB:]] = icmp ne i32 %[[#REAL]], 0
+// LLVM-NEXT:   %[[#IB:]] = icmp ne i32 %[[#IMAG]], 0
+// LLVM-NEXT:   %{{.+}} = or i1 %[[#RB]], %[[#IB]]
+
+// CHECK: }
+
+struct CX {
+  double real;
+  double imag;
+};
+
+void lvalue_to_rvalue_bitcast() {
+   struct CX a;
+   double _Complex b = __builtin_bit_cast(double _Complex, a);
+}
+
+// CHECK-LABEL: @lvalue_to_rvalue_bitcast()
+
+// CIR-BEFORE: %{{.+}} = cir.cast bitcast %{{.+}} : !cir.ptr<!rec_CX> -> !cir.ptr<!cir.complex<!cir.double>>
+
+// CIR-AFTER: %{{.+}} = cir.cast bitcast %{{.+}} : !cir.ptr<!rec_CX> -> !cir.ptr<!cir.complex<!cir.double>>
+
+// LLVM: %[[PTR_ADDR:.*]] = alloca %struct.CX, i64 1, align 8
+// LLVM: %[[COMPLEX_ADDR:.*]] = alloca { double, double }, i64 1, align 8
+// LLVM: %[[PTR_TO_COMPLEX:.*]] = load { double, double }, ptr %[[PTR_ADDR]], align 8
+// LLVM: store { double, double } %[[PTR_TO_COMPLEX]], ptr %[[COMPLEX_ADDR]], align 8
+
+// CHECK: }
+
+void complex_to_complex_cast() {
+  cd = cf;
+  ci = cs;
+}
+
+// CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-BEFORE: %[[FP_COMPLEX:.*]] = cir.cast float_complex %[[TMP]] : !cir.complex<!cir.float> -> !cir.complex<!cir.double>
+
+// CIR-AFTER: %[[#REAL:]] = cir.complex.real %{{.*}} : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[#IMAG:]] = cir.complex.imag %{{.*}} : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[#REAL_FP_CAST:]] = cir.cast floating %[[#REAL]] : !cir.float -> !cir.double
+// CIR-AFTER: %[[#IMAG_FP_CAST:]] = cir.cast floating %[[#IMAG]] : !cir.float -> !cir.double
+// CIR-AFTER: %{{.*}} = cir.complex.create %[[#REAL_FP_CAST]], %[[#IMAG_FP_CAST]] : !cir.double -> !cir.complex<!cir.double>
+
+// LLVM: %[[#REAL:]] = extractvalue { float, float } %{{.*}}, 0
+// LLVM: %[[#IMAG:]] = extractvalue { float, float } %{{.*}}, 1
+// LLVM: %[[#REAL_FP_CAST:]] = fpext float %[[#REAL]] to double
+// LLVM: %[[#IMAG_FP_CAST:]] = fpext float %[[#IMAG]] to double
+// LLVM: %[[TMP:.*]] = insertvalue { double, double } {{.*}}, double %[[#REAL_FP_CAST]], 0
+// LLVM: %{{.*}} = insertvalue { double, double } %[[TMP]], double %[[#IMAG_FP_CAST]], 1
+
+// CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.complex<!s16i>>, !cir.complex<!s16i>
+// CIR-BEFORE: %[[INT_COMPLEX:.*]] = cir.cast int_complex %[[TMP]] : !cir.complex<!s16i> -> !cir.complex<!s32i>
+
+// CIR-AFTER: %[[#REAL:]] = cir.complex.real %{{.*}} : !cir.complex<!s16i> -> !s16i
+// CIR-AFTER: %[[#IMAG:]] = cir.complex.imag %{{.*}} : !cir.complex<!s16i> -> !s16i
+// CIR-AFTER: %[[#REAL_INT_CAST:]] = cir.cast integral %[[#REAL]] : !s16i -> !s32i
+// CIR-AFTER: %[[#IMAG_INT_CAST:]] = cir.cast integral %[[#IMAG]] : !s16i -> !s32i
+// CIR-AFTER: %{{.*}} = cir.complex.create %[[#REAL_INT_CAST]], %[[#IMAG_INT_CAST]] : !s32i -> !cir.complex<!s32i>
+
+// LLVM: %[[#REAL:]] = extractvalue { i16, i16 } %{{.*}}, 0
+// LLVM: %[[#IMAG:]] = extractvalue { i16, i16 } %{{.*}}, 1
+// LLVM: %[[#REAL_INT_CAST:]] = sext i16 %[[#REAL]] to i32
+// LLVM: %[[#IMAG_INT_CAST:]] = sext i16 %[[#IMAG]] to i32
+// LLVM: %[[TMP:.*]] = insertvalue { i32, i32 } {{.*}}, i32 %[[#REAL_INT_CAST]], 0
+// LLVM: %{{.*}} = insertvalue { i32, i32 } %[[TMP]], i32 %[[#IMAG_INT_CAST]], 1
+
+void promotion() {
+  cd = cf + cf;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/complex-cast.cpp b/clang/test/CIR/Incubator/CodeGen/complex-cast.cpp
new file mode 100644
index 0000000000000..7ac024a3076c8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/complex-cast.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-AFTER %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=LLVM %s
+
+struct CX {
+  double real;
+  double imag;
+};
+
+void complex_lvalue_bitcast() {
+  struct CX a;
+  (double _Complex &)a = {};
+}
+
+// CIR-BEFORE: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.ptr<!rec_CX> -> !cir.ptr<!cir.complex<!cir.double>>
+
+// CIR-AFTER: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.ptr<!rec_CX> -> !cir.ptr<!cir.complex<!cir.double>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca %struct.CX, i64 1, align 8
+// LLVM: store { double, double } zeroinitializer, ptr %[[A_ADDR]], align 8
+
+void complex_user_defined_cast() {
+  struct Point {
+    int x;
+    int y;
+    operator int _Complex() const { return {x, y}; }
+  };
+
+  Point p{1, 2};
+  int _Complex c = p;
+}
+
+// CIR-AFTER: %[[P_ADDR:.*]] = cir.alloca !rec_Point, !cir.ptr<!rec_Point>, ["p", init]
+// CIR-AFTER: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["c", init]
+// CIR-AFTER: %[[P_CONST:.*]] = cir.const #cir.const_record<{#cir.int<1> : !s32i, #cir.int<2> : !s32i}> : !rec_Point
+// CIR-AFTER: cir.store{{.*}} %[[P_CONST]], %[[P_ADDR]] : !rec_Point, !cir.ptr<!rec_Point>
+// CIR-AFTER: %[[POINT_TO_COMPLEX:.*]] = cir.call @_ZZ25complex_user_defined_castvENK5PointcvCiEv(%[[P_ADDR]]) : (!cir.ptr<!rec_Point>) -> !cir.complex<!s32i>
+// CIR-AFTER: cir.store{{.*}} %[[POINT_TO_COMPLEX]], %[[C_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[P_ADDR:.*]] = alloca %struct.Point, i64 1, align 4
+// LLVM: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: store %struct.Point { i32 1, i32 2 }, ptr %[[P_ADDR]], align 4
+// LLVM: %[[POINT_TO_COMPLEX:.*]] = call { i32, i32 } @_ZZ25complex_user_defined_castvENK5PointcvCiEv(ptr %[[P_ADDR]])
+// LLVM: store { i32, i32 } %[[POINT_TO_COMPLEX]], ptr %[[C_ADDR]], align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/complex-compound-assignment.cpp b/clang/test/CIR/Incubator/CodeGen/complex-compound-assignment.cpp
new file mode 100644
index 0000000000000..69fdb47842f04
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/complex-compound-assignment.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=C_CIR
+
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=C_LLVM
+
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=C_OGCG
+
+#ifndef __cplusplus
+void foo() {
+  float _Complex a;
+  float b;
+  b += a;
+}
+#endif
+
+// C_CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// C_CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b"]
+// C_CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// C_CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.float>, !cir.float
+// C_CIR: %[[CONST_ZERO:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
+// C_CIR: %[[COMPLEX_B:.*]] = cir.complex.create %[[TMP_B]], %[[CONST_ZERO]] : !cir.float -> !cir.complex<!cir.float>
+// C_CIR: %[[B_REAL:.*]] = cir.complex.real %[[COMPLEX_B]] : !cir.complex<!cir.float> -> !cir.float
+// C_CIR: %[[B_IMAG:.*]] = cir.complex.imag %[[COMPLEX_B]] : !cir.complex<!cir.float> -> !cir.float
+// C_CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// C_CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// C_CIR: %[[ADD_REAL:.*]] = cir.binop(add, %[[B_REAL]], %[[A_REAL]]) : !cir.float
+// C_CIR: %[[ADD_IMAG:.*]] = cir.binop(add, %[[B_IMAG]], %[[A_IMAG]]) : !cir.float
+// C_CIR: %[[RESULT_COMPLEX:.*]] = cir.complex.create %[[ADD_REAL]], %[[ADD_IMAG]] : !cir.float -> !cir.complex<!cir.float>
+// C_CIR: %[[RESULT_REAL:.*]] = cir.complex.real %[[RESULT_COMPLEX]] : !cir.complex<!cir.float> -> !cir.float
+// C_CIR: cir.store{{.*}} %[[RESULT_REAL]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
+
+// C_LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// C_LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
+// C_LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// C_LLVM: %[[TMP_B:.*]] = load float, ptr %[[B_ADDR]], align 4
+// C_LLVM: %[[TMP_COMPLEX_B:.*]] = insertvalue { float, float } {{.*}}, float %[[TMP_B]], 0
+// C_LLVM: %[[COMPLEX_B:.*]] = insertvalue { float, float } %[[TMP_COMPLEX_B]], float 0.000000e+00, 1
+// C_LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// C_LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// C_LLVM: %[[RESULT_REAL:.*]] = fadd float %[[TMP_B]], %[[A_REAL]]
+// C_LLVM: %[[RESULT_IMAG:.*]] = fadd float 0.000000e+00, %[[A_IMAG]]
+// C_LLVM: %[[TMP_RESULT:.*]] = insertvalue { float, float } {{.*}}, float %[[RESULT_REAL]], 0
+// C_LLVM: %[[RESULT:.*]] = insertvalue { float, float } %[[TMP_RESULT]], float %[[RESULT_IMAG]], 1
+// C_LLVM: store float %[[RESULT_REAL]], ptr %[[B_ADDR]], align 4
+
+// C_OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// C_OGCG: %[[B_ADDR:.*]] = alloca float, align 4
+// C_OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// C_OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// C_OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// C_OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// C_OGCG: %[[TMP_B:.*]] = load float, ptr %[[B_ADDR]], align 4
+// C_OGCG: %[[ADD_REAL:.*]] = fadd float %[[TMP_B]], %[[A_REAL]]
+// C_OGCG: store float %[[ADD_REAL]], ptr %[[B_ADDR]], align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/complex-init-list.c b/clang/test/CIR/Incubator/CodeGen/complex-init-list.c
new file mode 100644
index 0000000000000..77d7d9b328e0b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/complex-init-list.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+
+void foo() {
+   int _Complex c = (int _Complex){1, 2};
+}
+
+// CIR: %[[INIT:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["c", init]
+// CIR: %[[COMPOUND:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, [".compoundliteral", init]
+// CIR: %[[COMPLEX:.*]] = cir.const #cir.complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[COMPLEX]], %[[COMPOUND]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+// CIR: %[[TMP:.*]] = cir.load{{.*}} %[[COMPOUND]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[TMP]], %[[INIT]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[INIT:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[COMPOUND:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: store { i32, i32 } { i32 1, i32 2 }, ptr %[[COMPOUND]], align 4
+// LLVM: %[[TMP:.*]] = load { i32, i32 }, ptr %[[COMPOUND:.*]], align 4
+// LLVM: store { i32, i32 } %[[TMP]], ptr %[[INIT]], align 4
+
+void foo2(float a, float b) {
+  float _Complex c = (float _Complex){a, b};
+}
+
+// CIR: %[[INIT:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init]
+// CIR: %[[COMPOUND:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, [".compoundliteral", init]
+// CIR: %[[A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.float>, !cir.float
+// CIR: %[[B:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.float>, !cir.float
+// CIR: %[[COMPLEX:.*]] = cir.complex.create %[[A]], %[[B]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[COMPLEX]], %[[COMPOUND]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+// CIR: %[[TMP:.*]] = cir.load{{.*}} %[[COMPOUND]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[TMP]], %[[INIT]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[INIT:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[COMPOUND:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[A:.*]] = load float, ptr {{.*}}, align 4
+// LLVM: %[[B:.*]] = load float, ptr {{.*}}, align 4
+// LLVM: %[[INSERT:.*]] = insertvalue { float, float } {{.*}}, float %[[A]], 0
+// LLVM: %[[INSERT_2:.*]] = insertvalue { float, float } %[[INSERT]], float %[[B]], 1
+// LLVM: store { float, float } %[[INSERT_2]], ptr %[[COMPOUND]], align 4
+// LLVM: %[[TMP:.*]] = load { float, float }, ptr %[[COMPOUND]], align 4
+// LLVM: store { float, float } %[[TMP]], ptr %[[INIT]], align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/complex.c b/clang/test/CIR/Incubator/CodeGen/complex.c
new file mode 100644
index 0000000000000..505cce00b230e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/complex.c
@@ -0,0 +1,414 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CHECK-BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CHECK-BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CHECK-AFTER %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -x c++ -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CHECK-AFTER %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefixes=LLVM %s
+
+double _Complex c, c2;
+int _Complex ci, ci2;
+
+volatile double _Complex vc, vc2;
+volatile int _Complex vci, vci2;
+
+void list_init() {
+  double _Complex c1 = {1.0, 2.0};
+  int _Complex c2 = {1, 2};
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#REAL:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+// CHECK-BEFORE-NEXT:   %[[#IMAG:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !cir.double -> !cir.complex<!cir.double>
+//      CHECK-BEFORE:   %[[#REAL:]] = cir.const #cir.int<1> : !s32i
+// CHECK-BEFORE-NEXT:   %[[#IMAG:]] = cir.const #cir.int<2> : !s32i
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.create %[[#REAL]], %[[#IMAG]] : !s32i -> !cir.complex<!s32i>
+//      CHECK-BEFORE: }
+
+// CHECK-AFTER: cir.func
+// CHECK-AFTER:   %{{.+}} = cir.const #cir.complex<#cir.fp<1.000000e+00> : !cir.double, #cir.fp<2.000000e+00> : !cir.double> : !cir.complex<!cir.double>
+// CHECK-AFTER:   %{{.+}} = cir.const #cir.complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex<!s32i>
+// CHECK-AFTER: }
+
+// LLVM: define dso_local void @list_init()
+// LLVM:   store { double, double } { double 1.000000e+00, double 2.000000e+00 }, ptr %{{.+}}, align 8
+// LLVM: }
+
+void list_init_2(double r, double i) {
+  double _Complex c1 = {r, i};
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#R:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CHECK-BEFORE-NEXT:   %[[#I:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CHECK-BEFORE-NEXT:   %[[#C:]] = cir.complex.create %[[#R]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   cir.store{{.*}} %[[#C]], %{{.+}} : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#R:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CHECK-AFTER-NEXT:   %[[#I:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.double>, !cir.double
+// CHECK-AFTER-NEXT:   %[[#C:]] = cir.complex.create %[[#R]], %[[#I]] : !cir.double -> !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   cir.store{{.*}} %[[#C]], %{{.+}} : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @list_init_2(double %{{.+}}, double %{{.+}})
+//      LLVM:   %[[#A:]] = insertvalue { double, double } undef, double %{{.+}}, 0
+// LLVM-NEXT:   %[[#B:]] = insertvalue { double, double } %[[#A]], double %{{.+}}, 1
+// LLVM-NEXT:   store { double, double } %[[#B]], ptr %5, align 8
+//      LLVM: }
+
+void builtin_init(double r, double i) {
+  double _Complex c = __builtin_complex(r, i);
+}
+
+// CHECK-BEFORE: cir.func
+// CHECK-BEFORE:   %{{.+}} = cir.complex.create %{{.+}}, %{{.+}} : !cir.double -> !cir.complex<!cir.double>
+// CHECK-BEFORE: }
+
+// CHECK-AFTER: cir.func
+// CHECK-AFTER:   %{{.+}} = cir.complex.create %{{.+}}, %{{.+}} : !cir.double -> !cir.complex<!cir.double>
+// CHECK-AFTER: }
+
+//      LLVM: define dso_local void @builtin_init
+//      LLVM:   %[[#A:]] = insertvalue { double, double } undef, double %{{.+}}, 0
+// LLVM-NEXT:   %[[#B:]] = insertvalue { double, double } %[[#A]], double %{{.+}}, 1
+// LLVM-NEXT:   store { double, double } %[[#B]], ptr %{{.+}}, align 8
+//      LLVM: }
+
+void imag_literal() {
+  c = 3.0i;
+  ci = 3i;
+}
+
+// CHECK-BEFORE: cir.func
+// CHECK-BEFORE: %{{.+}} = cir.const #cir.complex<#cir.fp<0.000000e+00> : !cir.double, #cir.fp<3.000000e+00> : !cir.double> : !cir.complex<!cir.double>
+// CHECK-BEFORE: %{{.+}} = cir.const #cir.complex<#cir.int<0> : !s32i, #cir.int<3> : !s32i> : !cir.complex<!s32i>
+// CHECK-BEFORE: }
+
+// CHECK-AFTER: cir.func
+// CHECK-AFTER:   %{{.+}} = cir.const #cir.complex<#cir.fp<0.000000e+00> : !cir.double, #cir.fp<3.000000e+00> : !cir.double> : !cir.complex<!cir.double>
+// CHECK-AFTER:   %{{.+}} = cir.const #cir.complex<#cir.int<0> : !s32i, #cir.int<3> : !s32i> : !cir.complex<!s32i>
+// CHECK-AFTER: }
+
+// LLVM: define dso_local void @imag_literal()
+// LLVM:   store { double, double } { double 0.000000e+00, double 3.000000e+00 }, ptr @c, align 8
+// LLVM:   store { i32, i32 } { i32 0, i32 3 }, ptr @ci, align 4
+// LLVM: }
+
+void load_store() {
+  c = c2;
+  ci = ci2;
+}
+
+//      CHECK-BEFORE: cir.func
+// CHECK-BEFORE-NEXT:   %[[#C2_PTR:]] = cir.get_global @c2 : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#C2:]] = cir.load{{.*}} %[[#C2_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   cir.store{{.*}} %[[#C2]], %[[#C_PTR]] : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#CI2_PTR:]] = cir.get_global @ci2 : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %[[#CI2:]] = cir.load{{.*}} %[[#CI2_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   cir.store{{.*}} %[[#CI2]], %[[#CI_PTR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+// CHECK-AFTER-NEXT:   %[[#C2_PTR:]] = cir.get_global @c2 : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#C2:]] = cir.load{{.*}} %[[#C2_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   cir.store{{.*}} %[[#C2]], %[[#C_PTR]] : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#CI2_PTR:]] = cir.get_global @ci2 : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %[[#CI2:]] = cir.load{{.*}} %[[#CI2_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   cir.store{{.*}} %[[#CI2]], %[[#CI_PTR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @load_store()
+//      LLVM:   %[[#A:]] = load { double, double }, ptr @c2, align 8
+// LLVM-NEXT:   store { double, double } %[[#A]], ptr @c, align 8
+// LLVM-NEXT:   %[[#B:]] = load { i32, i32 }, ptr @ci2, align 4
+// LLVM-NEXT:   store { i32, i32 } %[[#B]], ptr @ci, align 4
+//      LLVM: }
+
+void load_store_volatile() {
+  vc = vc2;
+  vci = vci2;
+}
+
+//      CHECK-BEFORE: cir.func
+// CHECK-BEFORE-NEXT:   %[[#VC2_PTR:]] = cir.get_global @vc2 : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#VC2:]] = cir.load volatile{{.*}} %[[#VC2_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %[[#VC_PTR:]] = cir.get_global @vc : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   cir.store volatile{{.*}} %[[#VC2]], %[[#VC_PTR]] : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#VCI2_PTR:]] = cir.get_global @vci2 : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %[[#VCI2:]] = cir.load volatile{{.*}} %[[#VCI2_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[#VCI_PTR:]] = cir.get_global @vci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   cir.store volatile{{.*}} %[[#VCI2]], %[[#VCI_PTR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+// CHECK-AFTER-NEXT:   %[[#VC2_PTR:]] = cir.get_global @vc2 : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#VC2:]] = cir.load volatile{{.*}} %[[#VC2_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %[[#VC_PTR:]] = cir.get_global @vc : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   cir.store volatile{{.*}} %[[#VC2]], %[[#VC_PTR]] : !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#VCI2_PTR:]] = cir.get_global @vci2 : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %[[#VCI2:]] = cir.load volatile{{.*}} %[[#VCI2_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[#VCI_PTR:]] = cir.get_global @vci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   cir.store volatile{{.*}} %[[#VCI2]], %[[#VCI_PTR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @load_store_volatile()
+//      LLVM:   %[[#A:]] = load volatile { double, double }, ptr @vc2, align 8
+// LLVM-NEXT:   store volatile { double, double } %[[#A]], ptr @vc, align 8
+// LLVM-NEXT:   %[[#B:]] = load volatile { i32, i32 }, ptr @vci2, align 4
+// LLVM-NEXT:   store volatile { i32, i32 } %[[#B]], ptr @vci, align 4
+//      LLVM: }
+
+void real() {
+  double r = __builtin_creal(c);
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#A:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#B:]] = cir.load{{.*}} %[[#A]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.real %[[#B]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#A:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#B:]] = cir.load{{.*}} %[[#A]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.real %[[#B]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @real()
+//      LLVM:   %[[#A:]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT:   store double %[[#A]], ptr %{{.+}}, align 8
+//      LLVM: }
+
+void imag() {
+  double i = __builtin_cimag(c);
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#A:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[#B:]] = cir.load{{.*}} %[[#A]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.imag %[[#B]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#A:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[#B:]] = cir.load{{.*}} %[[#A]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.imag %[[#B]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @imag()
+//      LLVM:   %[[#A:]] = extractvalue { double, double } %{{.+}}, 1
+// LLVM-NEXT:   store double %[[#A]], ptr %{{.+}}, align 8
+//      LLVM: }
+
+void real_ptr() {
+  double *r1 = &__real__ c;
+  int *r2 = &__real__ ci;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.real_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+//      CHECK-BEFORE:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.real_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.real_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+//      CHECK-AFTER:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.real_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+//      CHECK-AFTER: }
+
+//      LLVM: define dso_local void @real_ptr()
+//      LLVM:   store ptr @c, ptr %{{.+}}, align 8
+// LLVM-NEXT:   store ptr @ci, ptr %{{.+}}, align 8
+//      LLVM: }
+
+void real_ptr_local() {
+  double _Complex c1 = {1.0, 2.0};
+  double *r3 = &__real__ c1;
+}
+
+// CHECK-BEFORE: cir.func
+// CHECK-BEFORE:   %[[#C:]] = cir.alloca !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE:   %{{.+}} = cir.complex.real_ptr %[[#C]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+// CHECK-BEFORE: }
+
+// CHECK-AFTER: cir.func
+// CHECK-AFTER:   %[[#C:]] = cir.alloca !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER:   %{{.+}} = cir.complex.real_ptr %[[#C]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+// CHECK-AFTER: }
+
+//      LLVM: define dso_local void @real_ptr_local()
+//      LLVM:   store { double, double } { double 1.000000e+00, double 2.000000e+00 }, ptr %{{.+}}, align 8
+// LLVM-NEXT:   %{{.+}} = getelementptr inbounds { double, double }, ptr %{{.+}}, i32 0, i32 0
+//      LLVM: }
+
+void extract_real() {
+  double r1 = __real__ c;
+  int r2 = __real__ ci;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[COMPLEX:.*]] = cir.load{{.*}} %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %[[#REAL:]] = cir.complex.real %[[COMPLEX]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-BEFORE:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %[[COMPLEX:.*]] = cir.load{{.*}} %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[#REAL:]] = cir.complex.real %[[COMPLEX]] : !cir.complex<!s32i> -> !s32i
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[COMPLEX:.*]] = cir.load{{.*}} %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %[[#REAL:]] = cir.complex.real %[[COMPLEX]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-AFTER:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %[[COMPLEX:.*]] = cir.load{{.*}} %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[#REAL:]] = cir.complex.real %[[COMPLEX]] : !cir.complex<!s32i> -> !s32i
+//      CHECK-AFTER: }
+
+// LLVM: define dso_local void @extract_real()
+// LLVM:   %[[COMPLEX_D:.*]] = load { double, double }, ptr @c, align 8
+// LLVM:   %[[R1:.*]] = extractvalue { double, double } %[[COMPLEX_D]], 0
+// LLVM:   %[[COMPLEX_I:.*]] = load { i32, i32 }, ptr @ci, align 4
+// LLVM:   %[[R2:.*]] = extractvalue { i32, i32 } %[[COMPLEX_I]], 0
+// LLVM: }
+
+int extract_real_and_add(int _Complex a, int _Complex b) {
+  return __real__ a + __real__ b;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[COMPLEX_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[REAL_A:.*]] = cir.complex.real %[[COMPLEX_A]] : !cir.complex<!s32i> -> !s32i
+// CHECK-BEFORE-NEXT:   %[[COMPLEX_B:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[REAL_B:.*]] = cir.complex.real %[[COMPLEX_B]] : !cir.complex<!s32i> -> !s32i
+// CHECK-BEFORE-NEXT:   %[[ADD:.*]] = cir.binop(add, %[[REAL_A]], %[[REAL_B]]) nsw : !s32i
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[COMPLEX_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[REAL_A:.*]] = cir.complex.real %[[COMPLEX_A]] : !cir.complex<!s32i> -> !s32i
+// CHECK-AFTER-NEXT:   %[[COMPLEX_B:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[REAL_B:.*]] = cir.complex.real %[[COMPLEX_B]] : !cir.complex<!s32i> -> !s32i
+// CHECK-AFTER-NEXT:   %[[ADD:.*]] = cir.binop(add, %[[REAL_A]], %[[REAL_B]]) nsw : !s32i
+//      CHECK-AFTER: }
+
+// LLVM: define dso_local i32 @extract_real_and_add
+// LLVM:   %[[COMPLEX_A:.*]] = load { i32, i32 }, ptr {{.*}}, align 4
+// LLVM:   %[[REAL_A:.*]] = extractvalue { i32, i32 } %[[COMPLEX_A]], 0
+// LLVM:   %[[COMPLEX_B:.*]] = load { i32, i32 }, ptr {{.*}}, align 4
+// LLVM:   %[[REAL_B:.*]] = extractvalue { i32, i32 } %[[COMPLEX_B]], 0
+// LLVM:   %10 = add nsw i32 %[[REAL_A]], %[[REAL_B]]
+// LLVM: }
+
+void imag_ptr() {
+  double *i1 = &__imag__ c;
+  int *i2 = &__imag__ ci;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.imag_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+//      CHECK-BEFORE:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %{{.+}} = cir.complex.imag_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.imag_ptr %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>> -> !cir.ptr<!cir.double>
+//      CHECK-AFTER:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %{{.+}} = cir.complex.imag_ptr %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>> -> !cir.ptr<!s32i>
+//      CHECK-AFTER: }
+
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// LLVM: define dso_local void @imag_ptr()
+// LLVM:   store ptr getelementptr inbounds nuw (i8, ptr @c, i64 8), ptr %{{.+}}, align 8
+// LLVM:   store ptr getelementptr inbounds nuw (i8, ptr @ci, i64 4), ptr %{{.+}}, align 8
+// LLVM: }
+
+void extract_imag() {
+  double i1 = __imag__ c;
+  int i2 = __imag__ ci;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-BEFORE-NEXT:   %[[COMPLEX:.*]] = cir.load{{.*}} %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-BEFORE-NEXT:   %[[#IMAG:]] = cir.complex.imag %[[COMPLEX]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-BEFORE:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-BEFORE-NEXT:   %[[COMPLEX:.*]] = cir.load{{.*}} %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[#IMAG:]] = cir.complex.imag %[[COMPLEX]] : !cir.complex<!s32i> -> !s32i
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[#C_PTR:]] = cir.get_global @c : !cir.ptr<!cir.complex<!cir.double>>
+// CHECK-AFTER-NEXT:   %[[COMPLEX:.*]] = cir.load{{.*}} %[[#C_PTR]] : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+// CHECK-AFTER-NEXT:   %[[#IMAG:]] = cir.complex.imag %[[COMPLEX]] : !cir.complex<!cir.double> -> !cir.double
+//      CHECK-AFTER:   %[[#CI_PTR:]] = cir.get_global @ci : !cir.ptr<!cir.complex<!s32i>>
+// CHECK-AFTER-NEXT:   %[[COMPLEX:.*]] = cir.load{{.*}} %[[#CI_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[#IMAG:]] = cir.complex.imag %[[COMPLEX]] : !cir.complex<!s32i> -> !s32i
+//      CHECK-AFTER: }
+
+// LLVM: define dso_local void @extract_imag()
+// LLVM:   %[[COMPLEX_D:.*]] = load { double, double }, ptr @c, align 8
+// LLVM:   %[[I1:.*]] = extractvalue { double, double } %[[COMPLEX_D]], 1
+// LLVM:   %[[COMPLEX_I:.*]] = load { i32, i32 }, ptr @ci, align 4
+// LLVM:   %[[I2:.*]] = extractvalue { i32, i32 } %[[COMPLEX_I]], 1
+// LLVM: }
+
+int extract_imag_and_add(int _Complex a, int _Complex b) {
+  return __imag__ a + __imag__ b;
+}
+
+//      CHECK-BEFORE: cir.func
+//      CHECK-BEFORE:   %[[COMPLEX_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[IMAG_A:.*]] = cir.complex.imag %[[COMPLEX_A]] : !cir.complex<!s32i> -> !s32i
+// CHECK-BEFORE-NEXT:   %[[COMPLEX_B:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-BEFORE-NEXT:   %[[IMAG_B:.*]] = cir.complex.imag %[[COMPLEX_B]] : !cir.complex<!s32i> -> !s32i
+// CHECK-BEFORE-NEXT:   %[[ADD:.*]] = cir.binop(add, %[[IMAG_A]], %[[IMAG_B]]) nsw : !s32i
+//      CHECK-BEFORE: }
+
+//      CHECK-AFTER: cir.func
+//      CHECK-AFTER:   %[[COMPLEX_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[IMAG_A:.*]] = cir.complex.imag %[[COMPLEX_A]] : !cir.complex<!s32i> -> !s32i
+// CHECK-AFTER-NEXT:   %[[COMPLEX_B:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK-AFTER-NEXT:   %[[IMAG_B:.*]] = cir.complex.imag %[[COMPLEX_B]] : !cir.complex<!s32i> -> !s32i
+// CHECK-AFTER-NEXT:   %[[ADD:.*]] = cir.binop(add, %[[IMAG_A]], %[[IMAG_B]]) nsw : !s32i
+//      CHECK-AFTER: }
+
+// LLVM: define dso_local i32 @extract_imag_and_add
+// LLVM:   %[[COMPLEX_A:.*]] = load { i32, i32 }, ptr {{.*}}, align 4
+// LLVM:   %[[IMAG_A:.*]] = extractvalue { i32, i32 } %[[COMPLEX_A]], 1
+// LLVM:   %[[COMPLEX_B:.*]] = load { i32, i32 }, ptr {{.*}}, align 4
+// LLVM:   %[[IMAG_B:.*]] = extractvalue { i32, i32 } %[[COMPLEX_B]], 1
+// LLVM:   %10 = add nsw i32 %[[IMAG_A]], %[[IMAG_B]]
+// LLVM: }
+
+void complex_with_empty_init() { int _Complex c = {}; }
+
+// CHECK: {{.*}} = cir.const #cir.complex<#cir.int<0> : !s32i, #cir.int<0> : !s32i> : !cir.complex<!s32i>
+
+void complex_array_subscript() {
+  int _Complex arr[2];
+  int _Complex r = arr[1];
+}
+
+// CHECK: %[[ARR:.*]] = cir.alloca !cir.array<!cir.complex<!s32i> x 2>, !cir.ptr<!cir.array<!cir.complex<!s32i> x 2>>, ["arr"]
+// CHECK: %[[RESULT:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["r", init]
+// CHECK: %[[IDX:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK: %[[RESULT_VAL:.*]] = cir.get_element %[[ARR]][%[[IDX]]] : (!cir.ptr<!cir.complex<!s32i>>, !s32i) -> !cir.ptr<!cir.complex<!s32i>>
+// CHECK: %[[TMP:.*]] = cir.load{{.*}} %[[RESULT_VAL]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CHECK: cir.store{{.*}} %[[TMP]], %[[RESULT]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[ARR:.*]] = alloca [2 x { i32, i32 }], i64 1, align 16
+// LLVM: %[[RESULT:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[RESULT_VAL:.*]] = getelementptr [2 x { i32, i32 }], ptr %[[ARR]], i32 0, i64 1
+// LLVM: %[[TMP:.*]] = load { i32, i32 }, ptr %[[RESULT_VAL]], align 8
+// LLVM: store { i32, i32 } %[[TMP]], ptr %[[RESULT]], align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/complex.cpp b/clang/test/CIR/Incubator/CodeGen/complex.cpp
new file mode 100644
index 0000000000000..aa36ec5700716
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/complex.cpp
@@ -0,0 +1,458 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+void complex_functional_cast() {
+  using IntComplex = int _Complex;
+  int _Complex a = IntComplex{};
+}
+
+// CIR: %[[INIT:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a", init]
+// CIR: %[[COMPLEX:.*]] = cir.const #cir.zero : !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[COMPLEX]], %[[INIT]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[INIT:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: store { i32, i32 } zeroinitializer, ptr %[[INIT]], align 4
+
+void complex_deref_expr(int _Complex* a) {
+  int _Complex b = *a;
+}
+
+// CIR: %[[COMPLEX_A_PTR:.*]] = cir.alloca !cir.ptr<!cir.complex<!s32i>>, !cir.ptr<!cir.ptr<!cir.complex<!s32i>>>, ["a", init]
+// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b", init]
+// CIR: %[[COMPLEX_A:.*]] = cir.load deref {{.*}} %[[COMPLEX_A_PTR]] : !cir.ptr<!cir.ptr<!cir.complex<!s32i>>>, !cir.ptr<!cir.complex<!s32i>>
+// CIR: %[[TMP:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[TMP]], %[[COMPLEX_B]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[COMPLEX_A_PTR:.*]] = alloca ptr, i64 1, align 8
+// LLVM: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[COMPLEX_A:.*]] = load ptr, ptr %[[COMPLEX_A_PTR]], align 8
+// LLVM: %[[TMP:.*]] = load { i32, i32 }, ptr %[[COMPLEX_A]], align 4
+// LLVM: store { i32, i32 } %[[TMP]], ptr %[[COMPLEX_B]], align 4
+
+void complex_cxx_scalar_value_init_expr() {
+  using IntComplex = int _Complex;
+  int _Complex a = IntComplex();
+}
+
+// CIR: %[[INIT:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a", init]
+// CIR: %[[COMPLEX:.*]] = cir.const #cir.zero : !cir.complex<!s32i>
+// CIR: cir.store align(4) %[[COMPLEX]], %[[INIT]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[INIT:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: store { i32, i32 } zeroinitializer, ptr %[[INIT]], align 4
+
+void complex_abstract_condition(bool cond, int _Complex a, int _Complex b) {
+  int _Complex c = cond ? a : b;
+}
+
+// CIR: %[[COND:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cond", init]
+// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a", init]
+// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b", init]
+// CIR: %[[RESULT:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["c", init]
+// CIR: %[[TMP_COND:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR: %[[RESULT_VAL:.*]] = cir.ternary(%[[TMP_COND]], true {
+// CIR:   %[[TMP_A:.*]] = cir.load{{.*}} %[[COMPLEX_A]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR:   cir.yield %[[TMP_A]] : !cir.complex<!s32i>
+// CIR: }, false {
+// CIR:   %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR:   cir.yield %[[TMP_B]] : !cir.complex<!s32i>
+// CIR: }) : (!cir.bool) -> !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[RESULT_VAL]], %[[RESULT]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[COND:.*]] = alloca i8, i64 1, align 1
+// LLVM: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[RESULT:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[TMP_COND:.*]] = load i8, ptr %[[COND]], align 1
+// LLVM: %[[COND_VAL:.*]] = trunc i8 %[[TMP_COND]] to i1
+// LLVM: br i1 %[[COND_VAL]], label %[[TRUE_BB:.*]], label %[[FALSE_BB:.*]]
+// LLVM: [[TRUE_BB]]:
+// LLVM:  %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[COMPLEX_A]], align 4
+// LLVM:  br label %[[END_BB:.*]]
+// LLVM: [[FALSE_BB]]:
+// LLVM:  %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[COMPLEX_B]], align 4
+// LLVM:  br label %[[END_BB]]
+// LLVM: [[END_BB]]:
+// LLVM: %[[RESULT_VAL:.*]] = phi { i32, i32 } [ %[[TMP_B]], %[[FALSE_BB]] ], [ %[[TMP_A]], %[[TRUE_BB]] ]
+// LLVM: store { i32, i32 } %[[RESULT_VAL]], ptr %[[RESULT]], align 4
+
+int _Complex complex_real_operator_on_rvalue() {
+  int real = __real__ complex_real_operator_on_rvalue();
+  return {};
+}
+
+// CIR: %[[RET_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["__retval"]
+// CIR: %[[REAL_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["real", init]
+// CIR: %[[CALL:.*]] = cir.call @_Z31complex_real_operator_on_rvaluev() : () -> !cir.complex<!s32i>
+// CIR: %[[REAL:.*]] = cir.complex.real %[[CALL]] : !cir.complex<!s32i> -> !s32i
+// CIR: cir.store{{.*}} %[[REAL]], %[[REAL_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[RET_COMPLEX:.*]] = cir.const #cir.zero : !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[RET_COMPLEX]], %[[RET_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+// CIR: %[[TMP_RET:.*]] = cir.load{{.*}} %[[RET_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: cir.return %[[TMP_RET]] : !cir.complex<!s32i>
+
+// LLVM: %[[RET_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[REAL_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[CALL:.*]] = call { i32, i32 } @_Z31complex_real_operator_on_rvaluev()
+// LLVM: %[[REAL:.*]] = extractvalue { i32, i32 } %[[CALL]], 0
+// LLVM: store i32 %[[REAL]], ptr %[[REAL_ADDR]], align 4
+// LLVM: store { i32, i32 } zeroinitializer, ptr %[[RET_ADDR]], align 4
+// LLVM: %[[TMP_RET:.*]] = load { i32, i32 }, ptr %[[RET_ADDR]], align 4
+// LLVM: ret { i32, i32 } %[[TMP_RET]]
+
+void complex_member_expr() {
+  struct Wrapper {
+    int _Complex c;
+  };
+
+  Wrapper w;
+  int r = __real__ w.c;
+}
+
+// CIR: %[[W_ADDR:.*]] = cir.alloca !rec_Wrapper, !cir.ptr<!rec_Wrapper>, ["w"]
+// CIR: %[[REAL_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["r", init]
+// CIR: %[[ELEM_PTR:.*]] = cir.get_member %[[W_ADDR]][0] {name = "c"} : !cir.ptr<!rec_Wrapper> -> !cir.ptr<!cir.complex<!s32i>>
+// CIR: %[[TMP_ELEM_PTR:.*]] = cir.load{{.*}} %[[ELEM_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[REAL:.*]] = cir.complex.real %[[TMP_ELEM_PTR]] : !cir.complex<!s32i> -> !s32i
+// CIR: cir.store{{.*}} %[[REAL]], %[[REAL_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[W_ADDR:.*]] = alloca %struct.Wrapper, i64 1, align 4
+// LLVM: %[[REAL_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[ELEM_PTR:.*]] = getelementptr %struct.Wrapper, ptr %[[W_ADDR]], i32 0, i32 0
+// LLVM: %[[TMP_ELEM_PTR:.*]] = load { i32, i32 }, ptr %[[ELEM_PTR]], align 4
+// LLVM: %[[REAL:.*]] = extractvalue { i32, i32 } %[[TMP_ELEM_PTR]], 0
+// LLVM: store i32 %[[REAL]], ptr %[[REAL_ADDR]], align 4
+
+int _Complex complex_imag_operator_on_rvalue() {
+  int imag = __imag__ complex_imag_operator_on_rvalue();
+  return {};
+}
+
+// CIR: %[[RET_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["__retval"]
+// CIR: %[[IMAG_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["imag", init]
+// CIR: %[[CALL:.*]] = cir.call @_Z31complex_imag_operator_on_rvaluev() : () -> !cir.complex<!s32i>
+// CIR: %[[IMAG:.*]] = cir.complex.imag %[[CALL]] : !cir.complex<!s32i> -> !s32i
+// CIR: cir.store{{.*}} %[[IMAG]], %[[IMAG_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[RET_COMPLEX:.*]] = cir.const #cir.zero : !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[RET_COMPLEX]], %[[RET_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+// CIR: %[[TMP_RET:.*]] = cir.load{{.*}} %[[RET_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: cir.return %[[TMP_RET]] : !cir.complex<!s32i>
+
+// LLVM: %[[RET_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[IMAG_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[CALL:.*]] = call { i32, i32 } @_Z31complex_imag_operator_on_rvaluev()
+// LLVM: %[[IMAG:.*]] = extractvalue { i32, i32 } %[[CALL]], 1
+// LLVM: store i32 %[[IMAG]], ptr %[[IMAG_ADDR]], align 4
+// LLVM: store { i32, i32 } zeroinitializer, ptr %[[RET_ADDR]], align 4
+// LLVM: %[[TMP_RET:.*]] = load { i32, i32 }, ptr %[[RET_ADDR]], align 4
+// LLVM: ret { i32, i32 } %[[TMP_RET]]
+
+struct Container {
+  static int _Complex c;
+};
+
+void complex_member_expr_with_var_deal() {
+  Container con;
+  int r = __real__ con.c;
+}
+
+// CIR: %[[REAL_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["r", init]
+// CIR: %[[ELEM_PTR:.*]] = cir.get_global @_ZN9Container1cE : !cir.ptr<!cir.complex<!s32i>>
+// CIR: %[[ELEM:.*]] = cir.load{{.*}} %[[ELEM_PTR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: %[[REAL:.*]] = cir.complex.real %[[ELEM]] : !cir.complex<!s32i> -> !s32i
+// CIR: cir.store{{.*}} %[[REAL]], %[[REAL_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[REAL_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[ELEM:.*]] = load { i32, i32 }, ptr @_ZN9Container1cE, align 4
+// LLVM: %[[REAL:.*]] = extractvalue { i32, i32 } %[[ELEM]], 0
+// LLVM: store i32 %[[REAL]], ptr %[[REAL_ADDR]], align 4
+
+void complex_comma_operator(int _Complex a, int _Complex b) {
+  int _Complex c = (a, b);
+}
+
+// CIR: %[[COMPLEX_A:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a", init]
+// CIR: %[[COMPLEX_B:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b", init]
+// CIR: %[[RESULT:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["c", init]
+// CIR: %[[TMP_B:.*]] = cir.load{{.*}} %[[COMPLEX_B]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR: cir.store{{.*}} %[[TMP_B]], %[[RESULT]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[COMPLEX_A:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[COMPLEX_B:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[RESULT:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[COMPLEX_B]], align 4
+// LLVM: store { i32, i32 } %[[TMP_B]], ptr %[[RESULT]], align 4
+
+void complex_cxx_default_init_expr() {
+  struct FPComplexWrapper {
+    float _Complex c{};
+  };
+
+  FPComplexWrapper w{};
+}
+
+// CIR: %[[W_ADDR:.*]] = cir.alloca !rec_FPComplexWrapper, !cir.ptr<!rec_FPComplexWrapper>, ["w", init]
+// CIR: %[[C_ADDR:.*]] = cir.get_member %[[W_ADDR]][0] {name = "c"} : !cir.ptr<!rec_FPComplexWrapper> -> !cir.ptr<!cir.complex<!cir.float>>
+// CIR: %[[CONST_COMPLEX:.*]] = cir.const #cir.zero : !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[CONST_COMPLEX]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[W_ADDR:.*]] = alloca %struct.FPComplexWrapper, i64 1, align 4
+// LLVM: %[[C_ADDR:.*]] = getelementptr %struct.FPComplexWrapper, ptr %[[W_ADDR]], i32 0, i32 0
+// LLVM: store { float, float } zeroinitializer, ptr %[[C_ADDR]], align 4
+
+// OGCG: %[[W_ADDR:.*]] = alloca %struct.Wrapper, align 4
+// OGCG: %[[C_ADDR:.*]] = getelementptr inbounds nuw %struct.FPComplexWrapper, ptr %[[W_ADDR]], i32 0, i32 0
+// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0
+// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1
+// OGCG: store float 0.000000e+00, ptr %[[C_REAL_PTR]], align 4
+// OGCG: store float 0.000000e+00, ptr %[[C_IMAG_PTR]], align 4
+
+void complex_init_atomic() {
+  _Atomic(float _Complex) a;
+  __c11_atomic_init(&a, {1.0f, 2.0f});
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[CONST_COMPLEX:.*]] = cir.const #cir.complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float> : !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[CONST_COMPLEX]], %[[A_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 8
+// LLVM: store { float, float } { float 1.000000e+00, float 2.000000e+00 }, ptr %[[A_ADDR]], align 8
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 8
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: store float 1.000000e+00, ptr %[[A_REAL_PTR]], align 8
+// OGCG: store float 2.000000e+00, ptr %[[A_IMAG_PTR]], align 4
+
+void complex_opaque_value_expr() {
+  float _Complex a;
+  float b = 1.0f ?: __real__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b", init]
+// CIR: %[[CONST_1:.*]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+// CIR: cir.store align(4) %[[CONST_1]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: store float 1.000000e+00, ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
+// OGCG: store float 1.000000e+00, ptr %[[B_ADDR]], align 4
+
+void atomic_complex_type() {
+  _Atomic(float _Complex) a;
+  float _Complex b = __c11_atomic_load(&a, __ATOMIC_RELAXED);
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR: %[[ATOMIC_TMP_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["atomic-temp"]
+// CIR: %[[A_PTR:.*]] = cir.cast bitcast %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>> -> !cir.ptr<!u64i>
+// CIR: %[[ATOMIC_TMP_PTR:.*]] = cir.cast bitcast %[[ATOMIC_TMP_ADDR]] : !cir.ptr<!cir.complex<!cir.float>> -> !cir.ptr<!u64i>
+// CIR: %[[TMP_A_ATOMIC:.*]] = cir.load{{.*}} atomic(relaxed) %[[A_PTR]] : !cir.ptr<!u64i>, !u64i
+// CIR: cir.store{{.*}} %[[TMP_A_ATOMIC]], %[[ATOMIC_TMP_PTR]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[TMP_ATOMIC_PTR:.*]] = cir.cast bitcast %[[ATOMIC_TMP_PTR]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.complex<!cir.float>>
+// CIR: %[[TMP_ATOMIC:.*]] = cir.load{{.*}} %[[TMP_ATOMIC_PTR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[TMP_ATOMIC]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 8
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[ATOMIC_TMP_ADDR:.*]] = alloca { float, float }, i64 1, align 8
+// LLVM: %[[TMP_A_ATOMIC:.*]] = load atomic i64, ptr %[[A_ADDR]] monotonic, align 8
+// LLVM: store i64 %[[TMP_A_ATOMIC]], ptr %[[ATOMIC_TMP_ADDR]], align 8
+// LLVM: %[[TMP_ATOMIC:.*]] = load { float, float }, ptr %[[ATOMIC_TMP_ADDR]], align 8
+// LLVM: store { float, float } %[[TMP_ATOMIC]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 8
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[ATOMIC_TMP_ADDR:.*]] = alloca { float, float }, align 8
+// OGCG: %[[TMP_A_ATOMIC:.*]] = load atomic i64, ptr %[[A_ADDR]] monotonic, align 8
+// OGCG: store i64 %[[TMP_A_ATOMIC]], ptr %[[ATOMIC_TMP_ADDR]], align 8
+// OGCG: %[[ATOMIC_TMP_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[ATOMIC_TMP_ADDR]], i32 0, i32 0
+// OGCG: %[[ATOMIC_TMP_REAL:.*]] = load float, ptr %[[ATOMIC_TMP_REAL_PTR]], align 8
+// OGCG: %[[ATOMIC_TMP_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[ATOMIC_TMP_ADDR]], i32 0, i32 1
+// OGCG: %[[ATOMIC_TMP_IMAG:.*]] = load float, ptr %[[ATOMIC_TMP_IMAG_PTR]], align 4
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store float %[[ATOMIC_TMP_REAL]], ptr %[[B_REAL_PTR]], align 4
+// OGCG: store float %[[ATOMIC_TMP_IMAG]], ptr %[[B_IMAG_PTR]], align 4
+
+void complex_type_parameter(float _Complex a) {}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a", init]
+// CIR: cir.store %{{.*}}, %[[A_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// TODO(CIR): the difference between the CIR LLVM and OGCG is because the lack of calling convention lowering,
+// Test will be updated when that is implemented
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: store { float, float } %{{.*}}, ptr %[[A_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: store <2 x float> %a.coerce, ptr %[[A_ADDR]], align 4
+
+void complex_type_argument() {
+  float _Complex a;
+  complex_type_parameter(a);
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[ARG_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["coerce"]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[ARG_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+// CIR: %[[TMP_ARG:.*]] = cir.load{{.*}} %[[ARG_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: cir.call @_Z22complex_type_parameterCf(%[[TMP_ARG]]) : (!cir.complex<!cir.float>) -> ()
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[ARG_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: store { float, float } %[[TMP_A]], ptr %[[ARG_ADDR]], align 4
+// LLVM: %[[TMP_ARG:.*]] = load { float, float }, ptr %[[ARG_ADDR]], align 4
+// LLVM: call void @_Z22complex_type_parameterCf({ float, float } %[[TMP_ARG]])
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[ARG_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[ARG_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[ARG_ADDR]], i32 0, i32 0
+// OGCG: %[[ARG_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[ARG_ADDR]], i32 0, i32 1
+// OGCG: store float %[[A_REAL]], ptr %[[ARG_REAL_PTR]], align 4
+// OGCG: store float %[[A_IMAG]], ptr %[[ARG_IMAG_PTR]], align 4
+// OGCG: %[[TMP_ARG:.*]] = load <2 x float>, ptr %[[ARG_ADDR]], align 4
+// OGCG: call void @_Z22complex_type_parameterCf(<2 x float> noundef %[[TMP_ARG]])
+
+void function_with_complex_default_arg(
+    float _Complex a = __builtin_complex(1.0f, 2.2f)) {}
+
+// CIR: %[[ARG_0_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a", init]
+// CIR: cir.store %{{.*}}, %[[ARG_0_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// TODO(CIR): the difference between the CIR LLVM and OGCG is because the lack of calling convention lowering,
+
+// LLVM: %[[ARG_0_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: store { float, float } %{{.*}}, ptr %[[ARG_0_ADDR]], align 4
+
+// OGCG: %[[ARG_0_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: store <2 x float> %{{.*}}, ptr %[[ARG_0_ADDR]], align 4
+
+void calling_function_with_default_arg() {
+  function_with_complex_default_arg();
+}
+
+// CIR: %[[DEFAULT_ARG_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["coerce"]
+// CIR: %[[DEFAULT_ARG_VAL:.*]] = cir.const #cir.complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.200000e+00> : !cir.float> : !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[DEFAULT_ARG_VAL]], %[[DEFAULT_ARG_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+// CIR: %[[TMP_DEFAULT_ARG:.*]] = cir.load{{.*}} %[[DEFAULT_ARG_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: cir.call @_Z33function_with_complex_default_argCf(%[[TMP_DEFAULT_ARG]]) : (!cir.complex<!cir.float>) -> ()
+
+// TODO(CIR): the difference between the CIR LLVM and OGCG is because the lack of calling convention lowering,
+
+// LLVM: %[[DEFAULT_ARG_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: store { float, float } { float 1.000000e+00, float 0x40019999A0000000 }, ptr %[[DEFAULT_ARG_ADDR]], align 4
+// LLVM: %[[TMP_DEFAULT_ARG:.*]] = load { float, float }, ptr %[[DEFAULT_ARG_ADDR]], align 4
+// LLVM: call void @_Z33function_with_complex_default_argCf({ float, float } %[[TMP_DEFAULT_ARG]])
+
+// OGCG: %[[DEFAULT_ARG_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[DEFAULT_ARG_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[DEFAULT_ARG_ADDR]], i32 0, i32 0
+// OGCG: %[[DEFAULT_ARG_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[DEFAULT_ARG_ADDR]], i32 0, i32 1
+// OGCG: store float 1.000000e+00, ptr %[[DEFAULT_ARG_REAL_PTR]], align 4
+// OGCG: store float 0x40019999A0000000, ptr %[[DEFAULT_ARG_IMAG_PTR]], align 4
+// OGCG: %[[TMP_DEFAULT_ARG:.*]] = load <2 x float>, ptr %[[DEFAULT_ARG_ADDR]], align 4
+// OGCG: call void @_Z33function_with_complex_default_argCf(<2 x float> {{.*}} %[[TMP_DEFAULT_ARG]])
+
+void real_on_scalar_glvalue() {
+  float a;
+  float b = __real__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.float>, !cir.float
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.float -> !cir.float
+// CIR: cir.store{{.*}} %[[A_REAL]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
+
+// LLVM: %[[A_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load float, ptr %[[A_ADDR]], align 4
+// LLVM: store float %[[TMP_A]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca float, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
+// OGCG: %[[TMP_A:.*]] = load float, ptr %[[A_ADDR]], align 4
+// OGCG: store float %[[TMP_A]], ptr %[[B_ADDR]], align 4
+
+void imag_on_scalar_glvalue() {
+  float a;
+  float b = __imag__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.float>, !cir.float
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.float -> !cir.float
+// CIR: cir.store{{.*}} %[[A_IMAG]], %[[B_ADDR]] : !cir.float, !cir.ptr<!cir.float>
+
+// LLVM: %[[A_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca float, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load float, ptr %[[A_ADDR]], align 4
+// LLVM: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca float, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca float, align 4
+// OGCG: store float 0.000000e+00, ptr %[[B_ADDR]], align 4
+
+void real_on_scalar_bool() {
+  bool a;
+  bool b = __real__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.bool -> !cir.bool
+// CIR: cir.store{{.*}} %[[A_REAL]], %[[B_ADDR]] : !cir.bool, !cir.ptr<!cir.bool>
+
+// LLVM: %[[A_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM: %[[B_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM: %[[TMP_A:.*]] = load i8, ptr %[[A_ADDR]], align 1
+// LLVM: %[[TMP_A_I1:.*]] = trunc i8 %[[TMP_A]] to i1
+// LLVM: %[[TMP_A_I8:.*]] = zext i1 %[[TMP_A_I1]] to i8
+// LLVM: store i8 %[[TMP_A_I8]], ptr %[[B_ADDR]], align 1
+
+// OGCG: %[[A_ADDR:.*]] = alloca i8, align 1
+// OGCG: %[[B_ADDR:.*]] = alloca i8, align 1
+// OGCG: %[[TMP_A:.*]] = load i8, ptr %[[A_ADDR]], align 1
+// OGCG: %[[TMP_A_I1:.*]] = trunc i8 %[[TMP_A]] to i1
+// OGCG: %[[TMP_A_I8:.*]] = zext i1 %[[TMP_A_I1]] to i8
+// OGCG: store i8 %[[TMP_A_I8]], ptr %[[B_ADDR]], align 1
+
+void imag_on_scalar_bool() {
+  bool a;
+  bool b = __imag__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.bool -> !cir.bool
+// CIR: cir.store{{.*}} %[[A_IMAG]], %[[B_ADDR]] : !cir.bool, !cir.ptr<!cir.bool>
+
+// LLVM: %[[A_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM: %[[B_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM: %[[TMP_A:.*]] = load i8, ptr %[[A_ADDR]], align 1
+// LLVM: %[[TMP_A_I1:.*]] = trunc i8 %[[TMP_A]] to i1
+// LLVM: store i8 0, ptr %[[B_ADDR]], align 1
+
+// OGCG: %[[A_ADDR:.*]] = alloca i8, align 1
+// OGCG: %[[B_ADDR:.*]] = alloca i8, align 1
+// OGCG: store i8 0, ptr %[[B_ADDR]], align 1
diff --git a/clang/test/CIR/Incubator/CodeGen/compound-literal-empty.c b/clang/test/CIR/Incubator/CodeGen/compound-literal-empty.c
new file mode 100644
index 0000000000000..b0007d96b4cb2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/compound-literal-empty.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+short b() { return (short){}; }
+
+// CIR-LABEL: b
+// CIR: {{%.*}} = cir.alloca !s16i, !cir.ptr<!s16i>, [".compoundliteral"] {alignment = 2 : i64}
+
+// LLVM-LABEL: b
+// LLVM: [[RET_P:%.*]] = alloca i16, i64 1, align 2
+// LLVM: [[LITERAL:%.*]] =  alloca i16, i64 1, align 2
+// LLVM: store i16 0, ptr [[LITERAL]], align 2
+// LLVM: [[T0:%.*]] = load i16, ptr [[LITERAL]], align 2
+// LLVM: store i16 [[T0]], ptr [[RET_P]], align 2
+// LLVM: [[T1:%.*]] = load i16, ptr [[RET_P]], align 2
+// LLVM: ret i16 [[T1]]
diff --git a/clang/test/CIR/Incubator/CodeGen/compound-literal.c b/clang/test/CIR/Incubator/CodeGen/compound-literal.c
new file mode 100644
index 0000000000000..11f202d890a6b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/compound-literal.c
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-call-conv-lowering -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+typedef struct {
+  int *arr;
+} S;
+
+S a = {
+  .arr = (int[]){}
+};
+
+// CIR: cir.global "private" internal @".compoundLiteral.0" = #cir.zero : !cir.array<!s32i x 0> {alignment = 4 : i64}
+// CIR: cir.global external @a = #cir.const_record<{#cir.global_view<@".compoundLiteral.0"> : !cir.ptr<!s32i>}> : !rec_S
+
+// LLVM: @.compoundLiteral.0 = internal global [0 x i32] zeroinitializer
+// LLVM: @a = global %struct.S { ptr @.compoundLiteral.0 }
+
+S b = {
+  .arr = (int[]){1}
+};
+
+// CIR: cir.global "private" internal @".compoundLiteral.1" = #cir.const_array<[#cir.int<1> : !s32i]> : !cir.array<!s32i x 1> {alignment = 4 : i64}
+// CIR: cir.global external @b = #cir.const_record<{#cir.global_view<@".compoundLiteral.1"> : !cir.ptr<!s32i>}> : !rec_S
+
+// LLVM: @.compoundLiteral.1 = internal global [1 x i32] [i32 1]
+// LLVM: @b = global %struct.S { ptr @.compoundLiteral.1 }
+
+int foo() {
+  return (struct {
+           int i;
+         }){1}
+      .i;
+}
+
+// CIR:  cir.func {{.*}} @foo() -> !s32i
+// CIR:    [[RET_MEM:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR:    [[COMPLITERAL_MEM:%.*]] = cir.alloca !rec_anon2E0, !cir.ptr<!rec_anon2E0>, [".compoundliteral"] {alignment = 4 : i64}
+// CIR:    [[FIELD:%.*]] = cir.get_member [[COMPLITERAL_MEM]][0] {name = "i"} : !cir.ptr<!rec_anon2E0> -> !cir.ptr<!s32i>
+// CIR:    [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR:    cir.store{{.*}} [[ONE]], [[FIELD]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[ONE:%.*]] = cir.const #cir.int<1> : !s32i
+// CIR:    cir.store{{.*}} [[ONE]], [[RET_MEM]] : !s32i, !cir.ptr<!s32i>
+// CIR:    [[RET:%.*]] = cir.load{{.*}} [[RET_MEM]] : !cir.ptr<!s32i>, !s32i
+// CIR:    cir.return [[RET]] : !s32i
+
+struct G { short x, y, z; };
+struct G g(int x, int y, int z) {
+  return (struct G) { x, y, z };
+}
+
+// CIR:  cir.func {{.*}} @g
+// CIR:    %[[RETVAL:.*]] = cir.alloca !rec_G, !cir.ptr<!rec_G>, ["__retval"] {alignment = 2 : i64}
+// CIR:    %[[X:.*]] = cir.get_member %[[RETVAL]][0] {name = "x"}
+// CIR:    cir.store{{.*}} {{.*}}, %[[X]] : !s16i
+// CIR:    %[[Y:.*]] = cir.get_member %[[RETVAL]][1] {name = "y"}
+// CIR:    cir.store{{.*}} {{.*}}, %[[Y]] : !s16i
+// CIR:    %[[Z:.*]] = cir.get_member %[[RETVAL]][2] {name = "z"}
+// CIR:    cir.store{{.*}} {{.*}}, %[[Z]] : !s16i
+// CIR:    %[[RES:.*]] = cir.load{{.*}} %[[RETVAL]]
+// CIR:    cir.return %[[RES]]
+
+// Nothing meaningful to test for LLVM codegen here.
+// FIXME: ABI note, LLVM lowering differs from traditional LLVM codegen here,
+// because the former does a memcopy + i48 load.
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+void split_large_page(unsigned long addr, pgprot_t prot)
+{
+  (addr ? prot : ((pgprot_t) { 0x001 } )).pgprot;
+}
+
+// CIR-LABEL: @split_large_page
+// CIR:   %[[VAL_2:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["addr", init] {alignment = 8 : i64}
+// CIR:   %[[VAL_3:.*]] = cir.alloca !rec_pgprot_t, !cir.ptr<!rec_pgprot_t>, ["prot", init] {alignment = 8 : i64}
+// CIR:   cir.store{{.*}} {{.*}}, %[[VAL_2]] : !u64i, !cir.ptr<!u64i>
+// CIR:   cir.store{{.*}} {{.*}}, %[[VAL_3]] : !rec_pgprot_t, !cir.ptr<!rec_pgprot_t>
+// CIR:   {{.*}} = cir.scope {
+// CIR:     %[[VAL_4:.*]] = cir.alloca !rec_pgprot_t, !cir.ptr<!rec_pgprot_t>, ["ref.tmp0"] {alignment = 8 : i64} loc(#loc64)
+// CIR:     %[[VAL_5:.*]] = cir.load{{.*}} %[[VAL_2]] : !cir.ptr<!u64i>, !u64i
+// CIR:     %[[VAL_6:.*]] = cir.cast int_to_bool %[[VAL_5]] : !u64i -> !cir.bool
+// CIR:     cir.if %[[VAL_6]] {
+// CIR:       cir.copy %[[VAL_3]] to %[[VAL_4]] : !cir.ptr<!rec_pgprot_t>
+// CIR:     } else {
+// CIR:       %[[VAL_7:.*]] = cir.get_member %[[VAL_4]][0] {name = "pgprot"} : !cir.ptr<!rec_pgprot_t> -> !cir.ptr<!u64i>
+// CIR:       %[[VAL_8:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:       %[[VAL_9:.*]] = cir.cast integral %[[VAL_8]] : !s32i -> !u64i
+// CIR:       cir.store{{.*}} %[[VAL_9]], %[[VAL_7]] : !u64i, !cir.ptr<!u64i>
+// CIR:     }
+// CIR:     %[[VAL_10:.*]] = cir.get_member %[[VAL_4]][0] {name = "pgprot"} : !cir.ptr<!rec_pgprot_t> -> !cir.ptr<!u64i>
+// CIR:     %[[VAL_11:.*]] = cir.load{{.*}} %[[VAL_10]] : !cir.ptr<!u64i>, !u64i
+// CIR:   cir.return
+// CIR: }
+
+// CHECK-LABEL: @split_large_page
+// CHECK:    br i1 {{.*}}, label %[[TRUE:[a-z0-9]+]], label %[[FALSE:[a-z0-9]+]]
+// CHECK:  [[FALSE]]:
+// CHECK:    %[[GEP:.*]] = getelementptr {{.*}}, ptr %[[ADDR:.*]], i32 0, i32 0
+// CHECK:    store i64 1, ptr %[[GEP]], align 8
+// CHECK:    br label %[[EXIT:[a-z0-9]+]]
+// CHECK:  [[TRUE]]:
+// CHECK:    call void @llvm.memcpy.p0.p0.i32(ptr %[[ADDR]], ptr {{.*}}, i32 8, i1 false)
+// CHECK:    br label %[[EXIT]]
+// CHECK:  [[EXIT]]:
+// CHECK:    ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/concept-specialization.cpp b/clang/test/CIR/Incubator/CodeGen/concept-specialization.cpp
new file mode 100644
index 0000000000000..cd2e61cff4c1b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/concept-specialization.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+template <typename T>
+concept Integral = __is_integral(T);
+
+template <typename T>
+concept Signed = Integral<T> && __is_signed(T);
+
+// Test ConceptSpecializationExpr as a boolean value
+bool test_concept_bool() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_concept_boolv
+  // CHECK: %{{.*}} = cir.const #true
+  return Integral<int>;
+}
+
+bool test_concept_false() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_concept_falsev
+  // CHECK: %{{.*}} = cir.const #false
+  return Integral<float>;
+}
+
+bool test_concept_compound() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_concept_compoundv
+  // CHECK: %{{.*}} = cir.const #true
+  return Signed<int>;
+}
+
+bool test_concept_unsigned() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_concept_unsignedv
+  // CHECK: %{{.*}} = cir.const #false
+  return Signed<unsigned>;
+}
+
+// Test in conditional
+int test_concept_in_if() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_concept_in_ifv
+  if (Integral<int>) {
+    // CHECK: %{{.*}} = cir.const #true
+    // CHECK: cir.if %{{.*}} {
+    return 1;
+  }
+  return 0;
+}
+
+// Test constexpr variable with concept
+constexpr bool is_int_integral = Integral<int>;
+
+int use_constexpr() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}use_constexprv
+  if (is_int_integral) {
+    // This should be optimized to a constant true
+    return 42;
+  }
+  return 0;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/cond.cpp b/clang/test/CIR/Incubator/CodeGen/cond.cpp
new file mode 100644
index 0000000000000..660292b7e5fc8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/cond.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+struct __less {
+  inline constexpr bool operator()(const unsigned long& __x, const unsigned long& __y) const {return __x < __y;}
+};
+
+const unsigned long&
+min(const unsigned long& __a, const unsigned long& __b) {
+  return __less()(__b, __a) ? __b : __a;
+}
+
+// CHECK: cir.func {{.*}} @_Z3minRKmS0_(%arg0: !cir.ptr<!u64i>
+// CHECK:   %0 = cir.alloca !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>, ["__a", init, const] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>, ["__b", init, const] {alignment = 8 : i64}
+// CHECK:   %2 = cir.alloca !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>, ["__retval"] {alignment = 8 : i64}
+// CHECK:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>
+// CHECK:   cir.store{{.*}} %arg1, %1 : !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>
+// CHECK:   cir.scope {
+// CHECK:     %4 = cir.alloca !rec___less, !cir.ptr<!rec___less>, ["ref.tmp0"] {alignment = 1 : i64}
+// CHECK:     %5 = cir.load %1 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:     %6 = cir.load %0 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:     %7 = cir.call @_ZNK6__lessclERKmS1_(%4, %5, %6) : (!cir.ptr<!rec___less>, !cir.ptr<!u64i>, !cir.ptr<!u64i>) -> !cir.bool
+// CHECK:     %8 = cir.ternary(%7, true {
+// CHECK:       %9 = cir.load %1 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:       cir.yield %9 : !cir.ptr<!u64i>
+// CHECK:     }, false {
+// CHECK:       %9 = cir.load %0 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:       cir.yield %9 : !cir.ptr<!u64i>
+// CHECK:     }) : (!cir.bool) -> !cir.ptr<!u64i>
+// CHECK:     cir.store{{.*}} %8, %2 : !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!u64i>>
+
+// LLVM-LABEL: define {{.*}} @_Z3minRKmS0_
+// LLVM-NOT:     call {{.*}} @_ZN6__lessC1Ev
+// LLVM:         call {{.*}} @_ZNK6__lessclERKmS1_
+// LLVM:         ret ptr
+
+// OGCG-LABEL: define {{.*}} @_Z3minRKmS0_
+// OGCG-NOT:     call {{.*}} @_ZN6__lessC1Ev
+// OGCG:         call {{.*}} @_ZNK6__lessclERKmS1_
+// OGCG:         ret ptr
diff --git a/clang/test/CIR/Incubator/CodeGen/conditional-cleanup.cpp b/clang/test/CIR/Incubator/CodeGen/conditional-cleanup.cpp
new file mode 100644
index 0000000000000..5235dc89159f9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/conditional-cleanup.cpp
@@ -0,0 +1,230 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir %s -o %t.eh.cir
+// RUN: FileCheck --check-prefix=CIR_EH --input-file=%t.eh.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir-flat -fno-clangir-call-conv-lowering %s -o %t.eh.flat.cir
+// RUN: FileCheck --check-prefix=CIR_FLAT_EH --input-file=%t.eh.flat.cir %s
+
+typedef __typeof(sizeof(0)) size_t;
+
+// Declare the reserved global placement new.
+void *operator new(size_t, void*);
+
+namespace test7 {
+  struct A { A(); ~A(); };
+  struct B {
+    static void *operator new(size_t size) throw();
+    B(const A&, B*);
+    ~B();
+  };
+
+  B *test() {
+    return new B(A(), new B(A(), 0));
+  }
+}
+
+// CIR-DAG: ![[A:.*]] = !cir.record<struct "test7::A" padded {!u8i}
+// CIR-DAG: ![[B:.*]] = !cir.record<struct "test7::B" padded {!u8i}
+
+// CIR_EH-DAG: ![[A:.*]] = !cir.record<struct "test7::A" padded {!u8i}
+// CIR_EH-DAG: ![[B:.*]] = !cir.record<struct "test7::B" padded {!u8i}
+
+// CIR-LABEL: _ZN5test74testEv
+// CIR:   %[[RET_VAL:.*]] = cir.alloca !cir.ptr<![[B]]>, !cir.ptr<!cir.ptr<![[B]]>>, ["__retval"] {alignment = 8 : i64}
+// CIR:   cir.scope {
+// CIR:     %[[TMP_A0:.*]] = cir.alloca ![[A]], !cir.ptr<![[A]]>, ["ref.tmp0"] {alignment = 1 : i64}
+// CIR:     %[[CLEANUP_COND_OUTER:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cleanup.cond"] {alignment = 1 : i64}
+// CIR:     %[[TMP_A1:.*]] = cir.alloca ![[A]], !cir.ptr<![[A]]>, ["ref.tmp1"] {alignment = 1 : i64}
+// CIR:     %[[CLEANUP_COND_INNER:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cleanup.cond"] {alignment = 1 : i64}
+// CIR:     %[[FALSE0:.*]] = cir.const #false
+// CIR:     %[[TRUE0:.*]] = cir.const #true
+// CIR:     %[[FALSE1:.*]] = cir.const #false
+// CIR:     %[[TRUE1:.*]] = cir.const #true
+
+// CIR:     %[[NULL_CHECK0:.*]] = cir.cmp(ne
+// CIR:     %[[PTR_B0:.*]] = cir.cast bitcast
+// CIR:     cir.store align(1) %[[FALSE1]], %[[CLEANUP_COND_OUTER]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR:     cir.store align(1) %[[FALSE0]], %[[CLEANUP_COND_INNER]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR:     cir.if %[[NULL_CHECK0]] {
+
+// Ctor call: @test7::A::A()
+// CIR:       cir.call @_ZN5test71AC1Ev(%[[TMP_A0]]) : (!cir.ptr<![[A]]>) -> ()
+// CIR:       cir.store{{.*}} %[[TRUE1]], %[[CLEANUP_COND_OUTER]] : !cir.bool, !cir.ptr<!cir.bool>
+
+// CIR:       %[[NULL_CHECK1:.*]] = cir.cmp(ne
+// CIR:       %[[PTR_B1:.*]] = cir.cast bitcast
+// CIR:       cir.if %[[NULL_CHECK1]] {
+
+// Ctor call: @test7::A::A()
+// CIR:         cir.call @_ZN5test71AC1Ev(%[[TMP_A1]]) : (!cir.ptr<![[A]]>) -> ()
+// CIR:         cir.store{{.*}} %[[TRUE0]], %[[CLEANUP_COND_INNER]] : !cir.bool, !cir.ptr<!cir.bool>
+// Ctor call: @test7::B::B()
+// CIR:         cir.call @_ZN5test71BC1ERKNS_1AEPS0_(%[[PTR_B1]], %[[TMP_A1]], {{.*}}) : (!cir.ptr<![[B]]>, !cir.ptr<![[A]]>, !cir.ptr<![[B]]>) -> ()
+// CIR:       }
+
+// Ctor call: @test7::B::B()
+// CIR:       cir.call @_ZN5test71BC1ERKNS_1AEPS0_(%[[PTR_B0]], %[[TMP_A0]], %[[PTR_B1]]) : (!cir.ptr<![[B]]>, !cir.ptr<![[A]]>, !cir.ptr<![[B]]>) -> ()
+// CIR:     }
+// CIR:     cir.store{{.*}} %[[PTR_B0]], %[[RET_VAL]] : !cir.ptr<![[B]]>, !cir.ptr<!cir.ptr<![[B]]>>
+// CIR:     %[[DO_CLEANUP_INNER:.*]] = cir.load{{.*}} %[[CLEANUP_COND_INNER]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR:     cir.if %[[DO_CLEANUP_INNER]] {
+// Dtor call: @test7::A::~A()
+// CIR:       cir.call @_ZN5test71AD1Ev(%[[TMP_A1]]) : (!cir.ptr<![[A]]>) -> ()
+// CIR:     }
+// CIR:     %[[DO_CLEANUP_OUTER:.*]] = cir.load{{.*}} %[[CLEANUP_COND_OUTER]] : !cir.ptr<!cir.bool>, !cir.bool
+// Dtor call: @test7::A::~A()
+// CIR:     cir.if %[[DO_CLEANUP_OUTER]] {
+// CIR:       cir.call @_ZN5test71AD1Ev(%[[TMP_A0]]) : (!cir.ptr<![[A]]>) -> ()
+// CIR:     }
+// CIR:   }
+// CIR:   cir.return
+// CIR: }
+
+// CIR_EH-DAG: #[[$ATTR_0:.+]] = #cir.bool<false> : !cir.bool
+// CIR_EH-DAG: #[[$ATTR_1:.+]] = #cir<extra({nothrow = #cir.nothrow})>
+// CIR_EH-DAG: #[[$ATTR_3:.+]] = #cir.bool<true> : !cir.bool
+
+// CIR_EH-LABEL: @_ZN5test74testEv
+// CIR_EH:           %[[VAL_0:.*]] = cir.alloca !cir.ptr<!rec_test73A3AB>, !cir.ptr<!cir.ptr<!rec_test73A3AB>>, ["__retval"] {alignment = 8 : i64}
+// CIR_EH:           cir.scope {
+// CIR_EH:             %[[VAL_1:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cleanup.cond"] {alignment = 1 : i64}
+// CIR_EH:             %[[VAL_2:.*]] = cir.alloca !rec_test73A3AA, !cir.ptr<!rec_test73A3AA>, ["ref.tmp0"] {alignment = 1 : i64}
+// CIR_EH:             %[[VAL_3:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cleanup.cond"] {alignment = 1 : i64}
+// CIR_EH:             %[[VAL_4:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cleanup.cond"] {alignment = 1 : i64}
+// CIR_EH:             %[[VAL_5:.*]] = cir.alloca !rec_test73A3AA, !cir.ptr<!rec_test73A3AA>, ["ref.tmp1"] {alignment = 1 : i64}
+// CIR_EH:             %[[VAL_6:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cleanup.cond"] {alignment = 1 : i64}
+// CIR_EH:             %[[VAL_7:.*]] = cir.const #[[$ATTR_0]]
+// CIR_EH:             %[[VAL_8:.*]] = cir.const #[[$ATTR_3]]
+// CIR_EH:             %[[VAL_9:.*]] = cir.const #[[$ATTR_0]]
+// CIR_EH:             %[[VAL_10:.*]] = cir.const #[[$ATTR_3]]
+// CIR_EH:             %[[VAL_11:.*]] = cir.const #[[$ATTR_0]]
+// CIR_EH:             %[[VAL_12:.*]] = cir.const #[[$ATTR_3]]
+// CIR_EH:             %[[VAL_13:.*]] = cir.const #[[$ATTR_0]]
+// CIR_EH:             %[[VAL_14:.*]] = cir.const #[[$ATTR_3]]
+// CIR_EH:             %[[VAL_15:.*]] = cir.const #{{.*}}<1> : !u64i
+// CIR_EH:             %[[VAL_16:.*]] = cir.call @_ZN5test71BnwEm(%[[VAL_15]]) : (!u64i) -> !cir.ptr<!void>
+// CIR_EH:             %[[VAL_17:.*]] = cir.const #{{.*}}<null> : !cir.ptr<!void>
+// CIR_EH:             %[[VAL_18:.*]] = cir.cmp(ne, %[[VAL_16]], %[[VAL_17]]) : !cir.ptr<!void>, !cir.bool
+// CIR_EH:             %[[VAL_19:.*]] = cir.cast bitcast %[[VAL_16]] : !cir.ptr<!void> -> !cir.ptr<!rec_test73A3AB>
+// CIR_EH:             cir.store align(1) %[[VAL_13]], %[[VAL_1]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:             cir.store align(1) %[[VAL_11]], %[[VAL_3]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:             cir.store align(1) %[[VAL_9]], %[[VAL_4]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:             cir.store align(1) %[[VAL_7]], %[[VAL_6]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:             cir.if %[[VAL_18]] {
+// CIR_EH:               cir.store{{.*}} %[[VAL_14]], %[[VAL_1]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:               cir.try synthetic cleanup {
+// CIR_EH:                 cir.call exception @_ZN5test71AC1Ev(%[[VAL_2]]) : (!cir.ptr<!rec_test73A3AA>) -> () cleanup {
+// CIR_EH:                   %[[VAL_20:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                   cir.if %[[VAL_20]] {
+// CIR_EH:                     cir.call @_ZdlPvm(%[[VAL_16]], %[[VAL_15]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_EH:                   }
+// CIR_EH:                   cir.yield
+// CIR_EH:                 }
+// CIR_EH:                 cir.yield
+// CIR_EH:               } catch [#{{.*}} {
+// CIR_EH:                 cir.resume
+// CIR_EH:               }]
+// CIR_EH:               cir.store{{.*}} %[[VAL_12]], %[[VAL_3]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:               %[[VAL_21:.*]] = cir.const #{{.*}}<1> : !u64i
+// CIR_EH:               %[[VAL_22:.*]] = cir.call @_ZN5test71BnwEm(%[[VAL_21]]) : (!u64i) -> !cir.ptr<!void>
+// CIR_EH:               %[[VAL_23:.*]] = cir.const #{{.*}}<null> : !cir.ptr<!void>
+// CIR_EH:               %[[VAL_24:.*]] = cir.cmp(ne, %[[VAL_22]], %[[VAL_23]]) : !cir.ptr<!void>, !cir.bool
+// CIR_EH:               %[[VAL_25:.*]] = cir.cast bitcast %[[VAL_22]] : !cir.ptr<!void> -> !cir.ptr<!rec_test73A3AB>
+// CIR_EH:               cir.if %[[VAL_24]] {
+// CIR_EH:                 cir.store{{.*}} %[[VAL_10]], %[[VAL_4]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:                 cir.try synthetic cleanup {
+// CIR_EH:                   cir.call exception @_ZN5test71AC1Ev(%[[VAL_5]]) : (!cir.ptr<!rec_test73A3AA>) -> () cleanup {
+// CIR_EH:                     %[[VAL_26:.*]] = cir.load{{.*}} %[[VAL_4]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                     cir.if %[[VAL_26]] {
+// CIR_EH:                       cir.call @_ZdlPvm(%[[VAL_22]], %[[VAL_21]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_EH:                     }
+// CIR_EH:                     %[[VAL_27:.*]] = cir.load{{.*}} %[[VAL_3]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                     cir.if %[[VAL_27]] {
+// CIR_EH:                       cir.call @_ZN5test71AD1Ev(%[[VAL_2]]) : (!cir.ptr<!rec_test73A3AA>) -> ()
+// CIR_EH:                     }
+// CIR_EH:                     %[[VAL_28:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                     cir.if %[[VAL_28]] {
+// CIR_EH:                       cir.call @_ZdlPvm(%[[VAL_16]], %[[VAL_15]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_EH:                     }
+// CIR_EH:                     cir.yield
+// CIR_EH:                   }
+// CIR_EH:                   cir.yield
+// CIR_EH:                 } catch [#{{.*}} {
+// CIR_EH:                   cir.resume
+// CIR_EH:                 }]
+// CIR_EH:                 cir.store{{.*}} %[[VAL_8]], %[[VAL_6]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:                 %[[VAL_29:.*]] = cir.const #{{.*}}<null> : !cir.ptr<!rec_test73A3AB>
+// CIR_EH:                 cir.try synthetic cleanup {
+// CIR_EH:                   cir.call exception @_ZN5test71BC1ERKNS_1AEPS0_(%[[VAL_25]], %[[VAL_5]], %[[VAL_29]]) : (!cir.ptr<!rec_test73A3AB>, !cir.ptr<!rec_test73A3AA>, !cir.ptr<!rec_test73A3AB>) -> () cleanup {
+// CIR_EH:                     %[[VAL_30:.*]] = cir.load{{.*}} %[[VAL_6]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                     cir.if %[[VAL_30]] {
+// CIR_EH:                       cir.call @_ZN5test71AD1Ev(%[[VAL_5]]) : (!cir.ptr<!rec_test73A3AA>) -> ()
+// CIR_EH:                     }
+// CIR_EH:                     %[[VAL_31:.*]] = cir.load{{.*}} %[[VAL_4]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                     cir.if %[[VAL_31]] {
+// CIR_EH:                       cir.call @_ZdlPvm(%[[VAL_22]], %[[VAL_21]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_EH:                     }
+// CIR_EH:                     %[[VAL_32:.*]] = cir.load{{.*}} %[[VAL_3]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                     cir.if %[[VAL_32]] {
+// CIR_EH:                       cir.call @_ZN5test71AD1Ev(%[[VAL_2]]) : (!cir.ptr<!rec_test73A3AA>) -> ()
+// CIR_EH:                     }
+// CIR_EH:                     %[[VAL_33:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                     cir.if %[[VAL_33]] {
+// CIR_EH:                       cir.call @_ZdlPvm(%[[VAL_16]], %[[VAL_15]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_EH:                     }
+// CIR_EH:                     cir.yield
+// CIR_EH:                   }
+// CIR_EH:                   cir.yield
+// CIR_EH:                 } catch [#{{.*}} {
+// CIR_EH:                   cir.resume
+// CIR_EH:                 }]
+// CIR_EH:                 %[[VAL_34:.*]] = cir.const #[[$ATTR_0]]
+// CIR_EH:                 cir.store{{.*}} %[[VAL_34]], %[[VAL_4]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:               }
+// CIR_EH:               cir.try synthetic cleanup {
+// CIR_EH:                 cir.call exception @_ZN5test71BC1ERKNS_1AEPS0_(%[[VAL_19]], %[[VAL_2]], %[[VAL_25]]) : (!cir.ptr<!rec_test73A3AB>, !cir.ptr<!rec_test73A3AA>, !cir.ptr<!rec_test73A3AB>) -> () cleanup {
+// CIR_EH:                   %[[VAL_35:.*]] = cir.load{{.*}} %[[VAL_6]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                   cir.if %[[VAL_35]] {
+// CIR_EH:                     cir.call @_ZN5test71AD1Ev(%[[VAL_5]]) : (!cir.ptr<!rec_test73A3AA>) -> ()
+// CIR_EH:                   }
+// CIR_EH:                   %[[VAL_36:.*]] = cir.load{{.*}} %[[VAL_4]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                   cir.if %[[VAL_36]] {
+// CIR_EH:                     cir.call @_ZdlPvm(%[[VAL_22]], %[[VAL_21]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_EH:                   }
+// CIR_EH:                   %[[VAL_37:.*]] = cir.load{{.*}} %[[VAL_3]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                   cir.if %[[VAL_37]] {
+// CIR_EH:                     cir.call @_ZN5test71AD1Ev(%[[VAL_2]]) : (!cir.ptr<!rec_test73A3AA>) -> ()
+// CIR_EH:                   }
+// CIR_EH:                   %[[VAL_38:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:                   cir.if %[[VAL_38]] {
+// CIR_EH:                     cir.call @_ZdlPvm(%[[VAL_16]], %[[VAL_15]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_EH:                   }
+// CIR_EH:                   cir.yield
+// CIR_EH:                 }
+// CIR_EH:                 cir.yield
+// CIR_EH:               } catch [#{{.*}} {
+// CIR_EH:                 cir.resume
+// CIR_EH:               }]
+// CIR_EH:               %[[VAL_39:.*]] = cir.const #[[$ATTR_0]]
+// CIR_EH:               cir.store{{.*}} %[[VAL_39]], %[[VAL_1]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR_EH:             }
+// CIR_EH:             cir.store{{.*}} %[[VAL_19]], %[[VAL_0]] : !cir.ptr<!rec_test73A3AB>, !cir.ptr<!cir.ptr<!rec_test73A3AB>>
+// CIR_EH:             %[[VAL_40:.*]] = cir.load{{.*}} %[[VAL_6]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:             cir.if %[[VAL_40]] {
+// CIR_EH:               cir.call @_ZN5test71AD1Ev(%[[VAL_5]]) : (!cir.ptr<!rec_test73A3AA>) -> ()
+// CIR_EH:             }
+// CIR_EH:             %[[VAL_41:.*]] = cir.load{{.*}} %[[VAL_3]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:             cir.if %[[VAL_41]] {
+// CIR_EH:               cir.call @_ZN5test71AD1Ev(%[[VAL_2]]) : (!cir.ptr<!rec_test73A3AA>) -> ()
+// CIR_EH:             }
+// CIR_EH:             %[[VAL_43:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR_EH:             cir.if %[[VAL_43]] {
+// CIR_EH:               cir.call @_ZdlPvm(%[[VAL_16]], %[[VAL_15]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_EH:             }
+// CIR_EH:           }
+// CIR_EH:           %[[VAL_44:.*]] = cir.load{{.*}} %[[VAL_0]] : !cir.ptr<!cir.ptr<!rec_test73A3AB>>, !cir.ptr<!rec_test73A3AB>
+// CIR_EH:           cir.return %[[VAL_44]] : !cir.ptr<!rec_test73A3AB>
+// CIR_EH:         }
+
+// Nothing special, just test it passes!
+// CIR_FLAT_EH-LABEL: @_ZN5test74testEv
diff --git a/clang/test/CIR/Incubator/CodeGen/const-alloca.cpp b/clang/test/CIR/Incubator/CodeGen/const-alloca.cpp
new file mode 100644
index 0000000000000..6df862956bcc3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/const-alloca.cpp
@@ -0,0 +1,172 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+int produce_int();
+void blackbox(const int &);
+void consume(int);
+
+void local_const_int() {
+  const int x = produce_int();
+}
+
+// CIR-LABEL: @_Z15local_const_intv
+// CIR:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
+// CIR: }
+
+void param_const_int(const int x) {}
+
+// CIR-LABEL: @_Z15param_const_inti
+// CIR:  %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
+// CIR: }
+
+void local_constexpr_int() {
+  constexpr int x = 42;
+  blackbox(x);
+}
+
+// CIR-LABEL: @_Z19local_constexpr_intv
+// CIR:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
+// CIR: }
+
+void local_reference() {
+  int x = 0;
+  int &r = x;
+}
+
+// CIR-LABEL: @_Z15local_referencev
+// CIR:   %{{.+}} = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["r", init, const]
+// CIR: }
+
+struct Foo {
+  int a;
+  int b;
+};
+
+Foo produce_foo();
+
+void local_const_struct() {
+  const Foo x = produce_foo();
+}
+
+// CIR-LABEL: @_Z18local_const_structv
+// CIR:   %{{.+}} = cir.alloca !rec_Foo, !cir.ptr<!rec_Foo>, ["x", init, const]
+// CIR: }
+
+[[clang::optnone]]
+int local_const_load_store() {
+  const int x = produce_int();
+  int y = x;
+  return y;
+}
+
+// CIR-LABEL: @_Z22local_const_load_storev
+// CIR: %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const] {alignment = 4 : i64}
+// CIR: }
+
+// LLVM-LABEL: @_Z22local_const_load_storev
+//      LLVM: %[[#INIT:]] = call i32 @_Z11produce_intv()
+// LLVM-NEXT: store i32 %[[#INIT]], ptr %[[#SLOT:]], align 4, !tbaa !{{.*}}, !invariant.group !{{.+}}
+// LLVM-NEXT: %{{.+}} = load i32, ptr %[[#SLOT]], align 4, !tbaa !{{.*}}, !invariant.group !{{.+}}
+// LLVM: }
+
+int local_const_optimize() {
+  const int x = produce_int();
+  blackbox(x);
+  blackbox(x);
+  return x;
+}
+
+// LLVM-LABEL: @_Z20local_const_optimizev()
+// LLVM-NEXT:    %[[#slot:]] = alloca i32, align 4
+// LLVM-NEXT:    %[[#init:]] = tail call i32 @_Z11produce_intv()
+// LLVM-NEXT:    store i32 %[[#init]], ptr %[[#slot]], align 4, !tbaa !{{.*}}, !invariant.group !{{.+}}
+// LLVM-NEXT:    call void @_Z8blackboxRKi(ptr nonnull %[[#slot]])
+// LLVM-NEXT:    call void @_Z8blackboxRKi(ptr nonnull %[[#slot]])
+// LLVM-NEXT:    ret i32 %[[#init]]
+// LLVM-NEXT:  }
+
+int local_scoped_const() {
+  {
+    const int x = produce_int();
+    blackbox(x);
+    return x;
+  }
+}
+
+// CIR-LABEL: @_Z18local_scoped_constv()
+//      CIR:    cir.scope {
+// CIR-NEXT:      %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
+// CIR-NEXT:      %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
+// CIR-NEXT:      cir.store{{.*}} %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:      cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
+// CIR-NEXT:      %[[#x_reload:]] = cir.load{{.*}} %[[#x_slot]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:      cir.store{{.*}} %[[#x_reload]], %[[#ret_slot:]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:      %[[#ret:]] = cir.load{{.*}} %[[#ret_slot]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:      cir.return %[[#ret]] : !s32i
+// CIR-NEXT:    }
+//      CIR:  }
+
+// LLVM-LABEL: @_Z18local_scoped_constv()
+// LLVM-NEXT:    %[[#x_slot:]] = alloca i32, align 4
+// LLVM-NEXT:    %[[#init:]] = tail call i32 @_Z11produce_intv()
+// LLVM-NEXT:    store i32 %[[#init]], ptr %[[#x_slot]], align 4, !tbaa !{{.+}}, !invariant.group !{{.+}}
+// LLVM-NEXT:    call void @_Z8blackboxRKi(ptr nonnull %[[#x_slot]])
+// LLVM-NEXT:    ret i32 %[[#init]]
+// LLVM-NEXT:  }
+
+void local_const_in_loop() {
+  for (int i = 0; i < 10; ++i) {
+    const int x = produce_int();
+    blackbox(x);
+    consume(x);
+  }
+}
+
+// CIR-LABEL: @_Z19local_const_in_loopv
+//      CIR:    cir.scope {
+//      CIR:      cir.for : cond {
+//      CIR:      } body {
+// CIR-NEXT:        cir.scope {
+// CIR-NEXT:          %[[#x_slot:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init, const]
+// CIR-NEXT:          %[[#init:]] = cir.call @_Z11produce_intv() : () -> !s32i
+// CIR-NEXT:          cir.store{{.*}} %[[#init]], %[[#x_slot]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:          cir.call @_Z8blackboxRKi(%[[#x_slot]]) : (!cir.ptr<!s32i>) -> ()
+// CIR-NEXT:          %[[#x_reload:]] = cir.load{{.*}} %[[#x_slot]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:          cir.call @_Z7consumei(%[[#x_reload]]) : (!s32i) -> ()
+// CIR-NEXT:        }
+// CIR-NEXT:        cir.yield
+// CIR-NEXT:      } step {
+//      CIR:      }
+// CIR-NEXT:    }
+// CIR-NEXT:    cir.return
+// CIR-NEXT:  }
+
+// LLVM-LABEL: @_Z19local_const_in_loopv()
+//      LLVM:    %[[#x_ptr:]] = call ptr @llvm.launder.invariant.group.p0(ptr nonnull %1)
+// LLVM-NEXT:    %[[#init:]] = call i32 @_Z11produce_intv()
+// LLVM-NEXT:    store i32 %[[#init]], ptr %[[#x_ptr]], align 4, !tbaa !{{.+}}, !invariant.group !{{.+}}
+// LLVM-NEXT:    call void @_Z8blackboxRKi(ptr nonnull %[[#x_ptr]])
+// LLVM-NEXT:    call void @_Z7consumei(i32 %[[#init]])
+//      LLVM:  }
+
+void local_const_in_while_condition() {
+  while (const int x = produce_int()) {
+    blackbox(x);
+  }
+}
+
+// LLVM-LABEL: @_Z30local_const_in_while_conditionv()
+//      LLVM:    %[[#x_slot:]] = alloca i32, align 4
+// LLVM-NEXT:    %[[#init:]] = tail call i32 @_Z11produce_intv()
+// LLVM-NEXT:    store i32 %[[#init]], ptr %[[#x_slot]], align 4
+// LLVM-NEXT:    %[[loop_cond:.+]] = icmp eq i32 %[[#init]], 0
+// LLVM-NEXT:    br i1 %[[loop_cond]], label %{{.+}}, label %[[loop_body:.+]]
+//      LLVM:  [[loop_body]]:
+// LLVM-NEXT:    call void @_Z8blackboxRKi(ptr nonnull %[[#x_slot]])
+// LLVM-NEXT:    %[[#next:]] = call i32 @_Z11produce_intv()
+// LLVM-NEXT:    store i32 %[[#next]], ptr %[[#x_slot]], align 4
+// LLVM-NEXT:    %[[cond:.+]] = icmp eq i32 %[[#next]], 0
+// LLVM-NEXT:    br i1 %[[cond]], label %{{.+}}, label %[[loop_body]]
+//      LLVM:  }
diff --git a/clang/test/CIR/Incubator/CodeGen/const-array.c b/clang/test/CIR/Incubator/CodeGen/const-array.c
new file mode 100644
index 0000000000000..30ce6da1175ca
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/const-array.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+void bar() {
+  const int arr[1] = {1};
+}
+
+// CHECK: cir.global "private" constant internal dso_local @bar.arr = #cir.const_array<[#cir.int<1> : !s32i]> : !cir.array<!s32i x 1> {alignment = 4 : i64}
+// CHECK: cir.func {{.*}} @bar()
+// CHECK:   {{.*}} = cir.get_global @bar.arr : !cir.ptr<!cir.array<!s32i x 1>>
+
+void foo() {
+  int a[10] = {1};
+}
+
+// CHECK-LABEL: @foo()
+// CHECK: %[[ADDR:.*]] = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a", init]
+// CHECK: %[[SRC:.*]] = cir.get_global @__const.foo.a : !cir.ptr<!cir.array<!s32i x 10>>
+// CHECK: cir.copy %[[SRC]] to %[[ADDR]] : !cir.ptr<!cir.array<!s32i x 10>>
diff --git a/clang/test/CIR/Incubator/CodeGen/const-baseclass.cpp b/clang/test/CIR/Incubator/CodeGen/const-baseclass.cpp
new file mode 100644
index 0000000000000..8c955db0ed0cb
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/const-baseclass.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+struct Empty { };
+struct A {
+};
+
+struct B : A, Empty {
+  B() : A(), Empty() { }
+};
+
+void f() {
+  B b1;
+}
+
+// Trivial base class constructor calls are lowered away.
+// CHECK-LABEL: @_ZN1BC2Ev
+// CHECK: %[[A:.*]] = cir.base_class_addr {{.*}} [0] -> !cir.ptr<!rec_A>
+// CHECK: %[[BASE:.*]] = cir.base_class_addr {{.*}} [0] -> !cir.ptr<!rec_Empty>
+// CHECK: cir.return
+
+// LLVM-LABEL: define {{.*}} @_ZN1BC2Ev
+// LLVM-NOT:     call {{.*}} @_ZN1AC2Ev
+// LLVM-NOT:     call {{.*}} @_ZN5EmptyC2Ev
+// LLVM:         ret void
+
+// OGCG-LABEL: define {{.*}} @_ZN1BC2Ev
+// OGCG-NOT:     call {{.*}} @_ZN1AC2Ev
+// OGCG-NOT:     call {{.*}} @_ZN5EmptyC2Ev
+// OGCG:         ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/const-bitfields.c b/clang/test/CIR/Incubator/CodeGen/const-bitfields.c
new file mode 100644
index 0000000000000..f0576b85d69f4
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/const-bitfields.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o - 2>&1 | FileCheck %s
+
+struct T {
+  int X : 5;
+  int Y : 6;
+  int Z : 9;
+  int W;  
+};
+
+struct Inner {
+  unsigned a :  1;
+  unsigned b :  1;
+  unsigned c :  1;
+  unsigned d : 30;
+};
+
+// CHECK-DAG: !rec_anon_struct = !cir.record<struct  {!u8i, !u8i, !u8i, !u8i, !s32i}>
+// CHECK-DAG: !rec_T = !cir.record<struct "T" {!u32i, !s32i} #cir.record.decl.ast>
+// CHECK-DAG: !rec_anon_struct1 = !cir.record<struct  {!u8i, !cir.array<!u8i x 3>, !u8i, !u8i, !u8i, !u8i}>
+// CHECK-DAG: #bfi_Z = #cir.bitfield_info<name = "Z", storage_type = !u32i, size = 9, offset = 11, is_signed = true>
+
+struct T GV = { 1, 5, 26, 42 };
+// CHECK: cir.global external @GV = #cir.const_record<{#cir.int<161> : !u8i, #cir.int<208> : !u8i, #cir.int<0> : !u8i,  #cir.zero : !u8i, #cir.int<42> : !s32i}> : !rec_anon_struct
+
+// check padding is used (const array of zeros)
+struct Inner var = { 1, 0, 1, 21};
+// CHECK: cir.global external @var = #cir.const_record<{#cir.int<5> : !u8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>, #cir.int<21> : !u8i, #cir.int<0> : !u8i, #cir.int<0> : !u8i, #cir.int<0> : !u8i}> : !rec_anon_struct1
+
+
+// CHECK: cir.func {{.*@getZ()}}
+// CHECK:   %1 = cir.get_global @GV : !cir.ptr<!rec_anon_struct>
+// CHECK:   %2 = cir.cast bitcast %1 : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!rec_T>
+// CHECK:   %3 = cir.get_member %2[0] {name = "Z"} : !cir.ptr<!rec_T> -> !cir.ptr<!u32i>
+// CHECK:   %4 = cir.get_bitfield align(4) (#bfi_Z, %3 : !cir.ptr<!u32i>) -> !s32i
+int getZ() {
+  return GV.Z;
+}
+
+// check the type used is the type of T struct for plain field
+// CHECK:  cir.func {{.*@getW()}}
+// CHECK:    %1 = cir.get_global @GV : !cir.ptr<!rec_anon_struct>
+// CHECK:    %2 = cir.cast bitcast %1 : !cir.ptr<!rec_anon_struct> -> !cir.ptr<!rec_T>
+// CHECK:    %3 = cir.get_member %2[1] {name = "W"} : !cir.ptr<!rec_T> -> !cir.ptr<!s32i>
+int getW() {
+  return GV.W;
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/const-complex.cpp b/clang/test/CIR/Incubator/CodeGen/const-complex.cpp
new file mode 100644
index 0000000000000..76b4f8a0284d1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/const-complex.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CHECK
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+
+int _Complex gci;
+
+float _Complex gcf;
+
+int _Complex gci2 = { 1, 2 };
+
+float _Complex gcf2 = { 1.0f, 2.0f };
+
+// CHECK: cir.global external {{.*}} = #cir.zero : !cir.complex<!s32i>
+// CHECK: cir.global external {{.*}} = #cir.zero : !cir.complex<!cir.float>
+// CHECK: cir.global external {{.*}} = #cir.complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex<!s32i>
+// CHECK: cir.global external {{.*}} = #cir.complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float> : !cir.complex<!cir.float>
+
+// LLVM: {{.*}} = global { i32, i32 } zeroinitializer, align 4
+// LLVM: {{.*}} = global { float, float } zeroinitializer, align 4
+// LLVM: {{.*}} = global { i32, i32 } { i32 1, i32 2 }, align 4
+// LLVM: {{.*}} = global { float, float } { float 1.000000e+00, float 2.000000e+00 }, align 4
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/constant-expr.cpp b/clang/test/CIR/Incubator/CodeGen/constant-expr.cpp
new file mode 100644
index 0000000000000..63025dcc47a04
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/constant-expr.cpp
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+struct StructWithConstEval {
+  consteval int _Complex consteval_ret_complex() { return {1, 2}; }
+  consteval int consteval_ret_int() { return 1; }
+  consteval void consteval_ret_void() {}
+};
+
+void calling_consteval_methods() {
+  StructWithConstEval a;
+  int b = a.consteval_ret_int();
+  int _Complex c = a.consteval_ret_complex();
+  a.consteval_ret_void();
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !rec_StructWithConstEval, !cir.ptr<!rec_StructWithConstEval>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["c", init]
+// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
+// CIR: cir.store {{.*}} %[[CONST_1]], %[[B_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[CONST_COMPLEX:.*]] = cir.const #cir.complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex<!s32i>
+// CIR: cir.store {{.*}} %[[CONST_COMPLEX]], %[[C_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca %struct.StructWithConstEval, i64 1, align 1
+// LLVM: %[[B_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM: store i32 1, ptr %[[B_ADDR]], align 4
+// LLVM: store { i32, i32 } { i32 1, i32 2 }, ptr %[[C_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca %struct.StructWithConstEval, align 1
+// OGCG: %[[B_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[C_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG: store i32 1, ptr %[[B_ADDR]], align 4
+// OGCG: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 0
+// OGCG: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 1
+// OGCG: store i32 1, ptr %[[C_REAL_PTR]], align 4
+// OGCG: store i32 2, ptr %[[C_IMAG_PTR]], align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/constptr.c b/clang/test/CIR/Incubator/CodeGen/constptr.c
new file mode 100644
index 0000000000000..e19f7574566b7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/constptr.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+int *p = (int*)0x1234;
+
+
+// CIR:  cir.global external @p = #cir.ptr<4660 : i64> : !cir.ptr<!s32i>
+// LLVM: @p = global ptr inttoptr (i64 4660 to ptr)
diff --git a/clang/test/CIR/Incubator/CodeGen/copy-constructor.cpp b/clang/test/CIR/Incubator/CodeGen/copy-constructor.cpp
new file mode 100644
index 0000000000000..25c1834305f70
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/copy-constructor.cpp
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct HasScalarArrayMember {
+  int arr[2][2];
+  HasScalarArrayMember(const HasScalarArrayMember &);
+};
+
+// CIR-LABEL: cir.func {{.*}} @_ZN20HasScalarArrayMemberC2ERKS_(
+// CIR-NEXT:    %[[#THIS:]] = cir.alloca !cir.ptr<!rec_HasScalarArrayMember>
+// CIR-NEXT:    %[[#OTHER:]] = cir.alloca !cir.ptr<!rec_HasScalarArrayMember>
+// CIR-NEXT:    cir.store %arg0, %[[#THIS]]
+// CIR-NEXT:    cir.store %arg1, %[[#OTHER]]
+// CIR-NEXT:    %[[#THIS_LOAD:]] = cir.load{{.*}} %[[#THIS]]
+// CIR-NEXT:    %[[#THIS_ARR:]] = cir.get_member %[[#THIS_LOAD]][0] {name = "arr"}
+// CIR-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CIR-NEXT:    %[[#OTHER_ARR:]] = cir.get_member %[[#OTHER_LOAD]][0] {name = "arr"}
+// CIR-NEXT:    cir.copy %[[#OTHER_ARR]] to %[[#THIS_ARR]] : !cir.ptr<!cir.array<!cir.array<!s32i x 2> x 2>>
+// CIR-NEXT:    cir.return
+
+// LLVM-LABEL: define {{.*}} @_ZN20HasScalarArrayMemberC2ERKS_(
+// LLVM-SAME:      ptr %[[#ARG0:]], ptr %[[#ARG1:]])
+// LLVM-NEXT:    %[[#THIS:]] = alloca ptr
+// LLVM-NEXT:    %[[#OTHER:]] = alloca ptr
+// LLVM-NEXT:    store ptr %[[#ARG0]], ptr %[[#THIS]]
+// LLVM-NEXT:    store ptr %[[#ARG1]], ptr %[[#OTHER]]
+// LLVM-NEXT:    %[[#THIS_LOAD:]] = load ptr, ptr %[[#THIS]]
+// LLVM-NEXT:    %[[#THIS_ARR:]] = getelementptr %struct.HasScalarArrayMember, ptr %[[#THIS_LOAD]], i32 0, i32 0
+// LLVM-NEXT:    %[[#OTHER_LOAD:]] = load ptr, ptr %[[#OTHER]]
+// LLVM-NEXT:    %[[#OTHER_ARR:]] = getelementptr %struct.HasScalarArrayMember, ptr %[[#OTHER_LOAD]], i32 0, i32 0
+// LLVM-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr %[[#THIS_ARR]], ptr %[[#OTHER_ARR]], i32 16, i1 false)
+// LLVM-NEXT:    ret void
+HasScalarArrayMember::HasScalarArrayMember(const HasScalarArrayMember &) = default;
+
+struct Trivial { int *i; };
+struct ManyMembers {
+  int i;
+  int j;
+  Trivial k;
+  int l[1];
+  int m[2];
+  Trivial n;
+  int &o;
+  int *p;
+};
+
+// CIR-LABEL: cir.func {{.*}} @_ZN11ManyMembersC2ERKS_(
+// CIR:         %[[#THIS_LOAD:]] = cir.load{{.*}} %[[#]]
+// CIR-NEXT:    %[[#THIS_I:]] = cir.get_member %[[#THIS_LOAD]][0] {name = "i"}
+// CIR-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER:]]
+// CIR-NEXT:    %[[#OTHER_I:]] = cir.get_member %[[#OTHER_LOAD]][0] {name = "i"}
+// CIR-NEXT:    %[[#MEMCPY_SIZE:]] = cir.const #cir.int<8>
+// CIR-NEXT:    %[[#THIS_I_CAST:]] = cir.cast bitcast %[[#THIS_I]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CIR-NEXT:    %[[#OTHER_I_CAST:]] = cir.cast bitcast %[[#OTHER_I]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CIR-NEXT:    cir.libc.memcpy %[[#MEMCPY_SIZE]] bytes from %[[#OTHER_I_CAST]] to %[[#THIS_I_CAST]]
+// CIR-NEXT:    %[[#THIS_K:]] = cir.get_member %[[#THIS_LOAD]][2] {name = "k"}
+// CIR-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CIR-NEXT:    %[[#OTHER_K:]] = cir.get_member %[[#OTHER_LOAD]][2] {name = "k"}
+// CIR-NEXT:    cir.copy %[[#OTHER_K]] to %[[#THIS_K]] : !cir.ptr<!rec_Trivial>
+// CIR-NEXT:    %[[#THIS_L:]] = cir.get_member %[[#THIS_LOAD]][3] {name = "l"}
+// CIR-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CIR-NEXT:    %[[#OTHER_L:]] = cir.get_member %[[#OTHER_LOAD]][3] {name = "l"}
+// CIR-NEXT:    %[[#MEMCPY_SIZE:]] = cir.const #cir.int<12>
+// CIR-NEXT:    %[[#THIS_L_CAST:]] = cir.cast bitcast %[[#THIS_L]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!void>
+// CIR-NEXT:    %[[#OTHER_L_CAST:]] = cir.cast bitcast %[[#OTHER_L]] : !cir.ptr<!cir.array<!s32i x 1>> -> !cir.ptr<!void>
+// CIR-NEXT:    cir.libc.memcpy %[[#MEMCPY_SIZE]] bytes from %[[#OTHER_L_CAST]] to %[[#THIS_L_CAST]]
+// CIR-NEXT:    %[[#THIS_N:]] = cir.get_member %[[#THIS_LOAD]][5] {name = "n"}
+// CIR-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CIR-NEXT:    %[[#OTHER_N:]] = cir.get_member %[[#OTHER_LOAD]][5] {name = "n"}
+// CIR-NEXT:    cir.copy %[[#OTHER_N]] to %[[#THIS_N]] : !cir.ptr<!rec_Trivial>
+// CIR-NEXT:    %[[#THIS_O:]] = cir.get_member %[[#THIS_LOAD]][6] {name = "o"}
+// CIR-NEXT:    %[[#OTHER_LOAD:]] = cir.load{{.*}} %[[#OTHER]]
+// CIR-NEXT:    %[[#OTHER_O:]] = cir.get_member %[[#OTHER_LOAD]][6] {name = "o"}
+// CIR-NEXT:    %[[#MEMCPY_SIZE:]] = cir.const #cir.int<16>
+// CIR-NEXT:    %[[#THIS_O_CAST:]] = cir.cast bitcast %[[#THIS_O]] : !cir.ptr<!cir.ptr<!s32i>> -> !cir.ptr<!void>
+// CIR-NEXT:    %[[#OTHER_O_CAST:]] = cir.cast bitcast %[[#OTHER_O]] : !cir.ptr<!cir.ptr<!s32i>> -> !cir.ptr<!void>
+// CIR-NEXT:    cir.libc.memcpy %[[#MEMCPY_SIZE]] bytes from %[[#OTHER_O_CAST]] to %[[#THIS_O_CAST]]
+// CIR-NEXT:    cir.return
+// CIR-NEXT:  }
+
+// CIR-LABEL: cir.func {{.*}} @_Z9forceCopyR11ManyMembers(
+// CIR:         cir.copy
+void forceCopy(ManyMembers &m) {
+  ManyMembers copy(m);
+}
+
+// CIR-LABEL: cir.func {{.*}} @_Z6doCopyR11ManyMembers(
+// CIR:         cir.copy
+ManyMembers doCopy(ManyMembers &src) {
+  return src;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/coro-task.cpp b/clang/test/CIR/Incubator/CodeGen/coro-task.cpp
new file mode 100644
index 0000000000000..ec428a3404503
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/coro-task.cpp
@@ -0,0 +1,431 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+namespace std {
+
+template<typename T> struct remove_reference       { typedef T type; };
+template<typename T> struct remove_reference<T &>  { typedef T type; };
+template<typename T> struct remove_reference<T &&> { typedef T type; };
+
+template<typename T>
+typename remove_reference<T>::type &&move(T &&t) noexcept;
+
+template <class Ret, typename... T>
+struct coroutine_traits { using promise_type = typename Ret::promise_type; };
+
+template <class Promise = void>
+struct coroutine_handle {
+  static coroutine_handle from_address(void *) noexcept;
+};
+template <>
+struct coroutine_handle<void> {
+  template <class PromiseType>
+  coroutine_handle(coroutine_handle<PromiseType>) noexcept;
+  static coroutine_handle from_address(void *);
+};
+
+struct suspend_always {
+  bool await_ready() noexcept { return false; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+struct suspend_never {
+  bool await_ready() noexcept { return true; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+struct string {
+  int size() const;
+  string();
+  string(char const *s);
+};
+
+template<typename T>
+struct optional {
+  optional();
+  optional(const T&);
+  T &operator*() &;
+  T &&operator*() &&;
+  T &value() &;
+  T &&value() &&;
+};
+} // namespace std
+
+namespace folly {
+namespace coro {
+
+using std::suspend_always;
+using std::suspend_never;
+using std::coroutine_handle;
+
+using SemiFuture = int;
+
+template<class T>
+struct Task {
+    struct promise_type {
+        Task<T> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_value(T);
+        void unhandled_exception();
+        auto yield_value(Task<T>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    T await_resume();
+};
+
+template<>
+struct Task<void> {
+    struct promise_type {
+        Task<void> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_void() noexcept;
+        void unhandled_exception() noexcept;
+        auto yield_value(Task<void>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    void await_resume() noexcept {}
+    SemiFuture semi();
+};
+
+// FIXME: add CIRGen support here.
+// struct blocking_wait_fn {
+//   template <typename T>
+//   T operator()(Task<T>&& awaitable) const {
+//     return T();
+//   }
+// };
+
+// inline constexpr blocking_wait_fn blocking_wait{};
+// static constexpr blocking_wait_fn const& blockingWait = blocking_wait;
+
+template <typename T>
+T blockingWait(Task<T>&& awaitable) {
+  return T();
+}
+
+template <typename T>
+Task<T> collectAllRange(Task<T>* awaitable);
+
+template <typename... SemiAwaitables>
+Task<void> collectAll(SemiAwaitables&&... awaitables);
+
+struct co_invoke_fn {
+  template <typename F, typename... A>
+  Task<void> operator()(F&& f, A&&... a) const {
+    return Task<void>();
+  }
+};
+
+co_invoke_fn co_invoke;
+
+}} // namespace folly::coro
+
+// CHECK-DAG: ![[IntTask:.*]] = !cir.record<struct "folly::coro::Task<int>" padded {!u8i}>
+// CHECK-DAG: ![[VoidTask:.*]] = !cir.record<struct "folly::coro::Task<void>" padded {!u8i}>
+// CHECK-DAG: ![[VoidPromisse:.*]] = !cir.record<struct "folly::coro::Task<void>::promise_type" padded {!u8i}>
+// CHECK-DAG: ![[CoroHandleVoid:.*]] = !cir.record<struct "std::coroutine_handle<void>" padded {!u8i}>
+// CHECK-DAG: ![[CoroHandlePromise:rec_.*]]  = !cir.record<struct "std::coroutine_handle<folly::coro::Task<void>::promise_type>" padded {!u8i}>
+// CHECK-DAG: ![[StdString:.*]] = !cir.record<struct "std::string" padded {!u8i}>
+// CHECK-DAG: ![[SuspendAlways:.*]] = !cir.record<struct "std::suspend_always" padded {!u8i}>
+
+// CHECK: module {{.*}} {
+// CHECK-NEXT: cir.global external @_ZN5folly4coro9co_invokeE = #cir.zero : !rec_folly3A3Acoro3A3Aco_invoke_fn
+
+// CHECK: cir.func builtin private @__builtin_coro_id(!u32i, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>) -> !u32i
+// CHECK: cir.func builtin private @__builtin_coro_alloc(!u32i) -> !cir.bool
+// CHECK: cir.func builtin private @__builtin_coro_size() -> !u64i
+// CHECK: cir.func builtin private @__builtin_coro_begin(!u32i, !cir.ptr<!void>) -> !cir.ptr<!void>
+
+using VoidTask = folly::coro::Task<void>;
+
+VoidTask silly_task() {
+  co_await std::suspend_always();
+}
+
+// CHECK: cir.func coroutine {{.*}} @_Z10silly_taskv() -> ![[VoidTask]]
+
+// Allocate promise.
+
+// CHECK: %[[#VoidTaskAddr:]] = cir.alloca ![[VoidTask]], {{.*}}, ["__retval"]
+// CHECK: %[[#SavedFrameAddr:]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["__coro_frame_addr"] {alignment = 8 : i64}
+// CHECK: %[[#VoidPromisseAddr:]] = cir.alloca ![[VoidPromisse]], {{.*}}, ["__promise"]
+
+// Get coroutine id with __builtin_coro_id.
+
+// CHECK: %[[#NullPtr:]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CHECK: %[[#Align:]] = cir.const #cir.int<16> : !u32i
+// CHECK: %[[#CoroId:]] = cir.call @__builtin_coro_id(%[[#Align]], %[[#NullPtr]], %[[#NullPtr]], %[[#NullPtr]])
+
+// Perform allocation calling operator 'new' depending on __builtin_coro_alloc and
+// call __builtin_coro_begin for the final coroutine frame address.
+
+// CHECK: %[[#ShouldAlloc:]] = cir.call @__builtin_coro_alloc(%[[#CoroId]]) : (!u32i) -> !cir.bool
+// CHECK: cir.store{{.*}} %[[#NullPtr]], %[[#SavedFrameAddr]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK: cir.if %[[#ShouldAlloc]] {
+// CHECK:   %[[#CoroSize:]] = cir.call @__builtin_coro_size() : () -> !u64i
+// CHECK:   %[[#AllocAddr:]] = cir.call @_Znwm(%[[#CoroSize]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:   cir.store{{.*}} %[[#AllocAddr]], %[[#SavedFrameAddr]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK: }
+// CHECK: %[[#Load0:]] = cir.load{{.*}} %[[#SavedFrameAddr]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CHECK: %[[#CoroFrameAddr:]] = cir.call @__builtin_coro_begin(%[[#CoroId]], %[[#Load0]])
+
+// Call promise.get_return_object() to retrieve the task object.
+
+// CHECK: %[[#RetObj:]] = cir.call @_ZN5folly4coro4TaskIvE12promise_type17get_return_objectEv(%[[#VoidPromisseAddr]]) : {{.*}} -> ![[VoidTask]]
+// CHECK: cir.store{{.*}} %[[#RetObj]], %[[#VoidTaskAddr]] : ![[VoidTask]]
+
+// Start a new scope for the actual codegen for co_await, create temporary allocas for
+// holding coroutine handle and the suspend_always struct.
+
+// CHECK: cir.scope {
+// CHECK:   %[[#SuspendAlwaysAddr:]] = cir.alloca ![[SuspendAlways]], {{.*}} ["ref.tmp0"] {alignment = 1 : i64}
+// CHECK:   %[[#CoroHandleVoidAddr:]] = cir.alloca ![[CoroHandleVoid]], {{.*}} ["agg.tmp0"] {alignment = 1 : i64}
+// CHECK:   %[[#CoroHandlePromiseAddr:]] = cir.alloca ![[CoroHandlePromise]], {{.*}} ["agg.tmp1"] {alignment = 1 : i64}
+
+// Effectively execute `coawait promise_type::initial_suspend()` by calling initial_suspend() and getting
+// the suspend_always struct to use for cir.await. Note that we return by-value since we defer ABI lowering
+// to later passes, same is done elsewhere.
+
+// CHECK:   %[[#Tmp0:]] = cir.call @_ZN5folly4coro4TaskIvE12promise_type15initial_suspendEv(%[[#VoidPromisseAddr]])
+// CHECK:   cir.store{{.*}} %[[#Tmp0]], %[[#SuspendAlwaysAddr]]
+
+//
+// Here we start mapping co_await to cir.await.
+//
+
+// First regions `ready` has a special cir.yield code to veto suspension.
+
+// CHECK:   cir.await(init, ready : {
+// CHECK:     %[[#ReadyVeto:]] = cir.scope {
+// CHECK:       %[[#TmpCallRes:]] = cir.call @_ZNSt14suspend_always11await_readyEv(%[[#SuspendAlwaysAddr]])
+// CHECK:       cir.yield %[[#TmpCallRes]] : !cir.bool
+// CHECK:     }
+// CHECK:     cir.condition(%[[#ReadyVeto]])
+
+// Second region `suspend` contains the actual suspend logic.
+//
+// - Start by getting the coroutine handle using from_address().
+// - Implicit convert coroutine handle from task specific promisse
+//   specialization to a void one.
+// - Call suspend_always::await_suspend() passing the handle.
+//
+// FIXME: add veto support for non-void await_suspends.
+
+// CHECK:   }, suspend : {
+// CHECK:     %[[#FromAddrRes:]] = cir.call @_ZNSt16coroutine_handleIN5folly4coro4TaskIvE12promise_typeEE12from_addressEPv(%[[#CoroFrameAddr]])
+// CHECK:     cir.store{{.*}} %[[#FromAddrRes]], %[[#CoroHandlePromiseAddr]] : ![[CoroHandlePromise]]
+// CHECK:     %[[#CoroHandlePromiseReload:]] = cir.load{{.*}} %[[#CoroHandlePromiseAddr]]
+// CHECK:     cir.call @_ZNSt16coroutine_handleIvEC1IN5folly4coro4TaskIvE12promise_typeEEES_IT_E(%[[#CoroHandleVoidAddr]], %[[#CoroHandlePromiseReload]])
+// CHECK:     %[[#CoroHandleVoidReload:]] = cir.load{{.*}} %[[#CoroHandleVoidAddr]] : !cir.ptr<![[CoroHandleVoid]]>, ![[CoroHandleVoid]]
+// CHECK:     cir.call @_ZNSt14suspend_always13await_suspendESt16coroutine_handleIvE(%[[#SuspendAlwaysAddr]], %[[#CoroHandleVoidReload]])
+// CHECK:     cir.yield
+
+// Third region `resume` handles coroutine resuming logic.
+
+// CHECK:   }, resume : {
+// CHECK:     cir.call @_ZNSt14suspend_always12await_resumeEv(%[[#SuspendAlwaysAddr]])
+// CHECK:     cir.yield
+// CHECK:   },)
+// CHECK: }
+
+// Since we already tested cir.await guts above, the remaining checks for:
+// - The actual user written co_await
+// - The promise call
+// - The final suspend co_await
+// - Return
+
+// The actual user written co_await
+// CHECK: cir.scope {
+// CHECK:   cir.await(user, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+// The promise call
+// CHECK: cir.call @_ZN5folly4coro4TaskIvE12promise_type11return_voidEv(%[[#VoidPromisseAddr]])
+
+// The final suspend co_await
+// CHECK: cir.scope {
+// CHECK:   cir.await(final, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+// Call builtin coro end and return
+
+// CHECK-NEXT: %[[#CoroEndArg0:]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CHECK-NEXT: %[[#CoroEndArg1:]] = cir.const #false
+// CHECK-NEXT: = cir.call @__builtin_coro_end(%[[#CoroEndArg0]], %[[#CoroEndArg1]])
+
+// CHECK: %[[#Tmp1:]] = cir.load{{.*}} %[[#VoidTaskAddr]]
+// CHECK-NEXT: cir.return %[[#Tmp1]]
+// CHECK-NEXT: }
+
+folly::coro::Task<int> byRef(const std::string& s) {
+  co_return s.size();
+}
+
+// FIXME: this could be less redundant than two allocas + reloads
+// CHECK: cir.func coroutine {{.*}} @_Z5byRefRKSt6string(%arg0: !cir.ptr<![[StdString]]> {{.*}} ![[IntTask]]
+// CHECK: %[[#AllocaParam:]] = cir.alloca !cir.ptr<![[StdString]]>, {{.*}} ["s", init, const]
+// CHECK: %[[#AllocaFnUse:]] = cir.alloca !cir.ptr<![[StdString]]>, {{.*}} ["s", init, const]
+
+folly::coro::Task<void> silly_coro() {
+  std::optional<folly::coro::Task<int>> task;
+  {
+    std::string s = "yolo";
+    task = byRef(s);
+  }
+  folly::coro::blockingWait(std::move(task.value()));
+  co_return;
+}
+
+// Make sure we properly handle OnFallthrough coro body sub stmt and
+// check there are not multiple co_returns emitted.
+
+// CHECK: cir.func coroutine {{.*}} @_Z10silly_corov() {{.*}} ![[VoidTask]]
+// CHECK: cir.await(init, ready : {
+// CHECK: cir.call @_ZN5folly4coro4TaskIvE12promise_type11return_voidEv
+// CHECK-NOT: cir.call @_ZN5folly4coro4TaskIvE12promise_type11return_voidEv
+// CHECK: cir.await(final, ready : {
+
+folly::coro::Task<int> go(int const& val);
+folly::coro::Task<int> go1() {
+  auto task = go(1);
+  co_return co_await task;
+}
+
+// CHECK: cir.func coroutine {{.*}} @_Z3go1v() {{.*}} ![[IntTask]]
+// CHECK: %[[#IntTaskAddr:]] = cir.alloca ![[IntTask]], !cir.ptr<![[IntTask]]>, ["task", init]
+
+// CHECK:   cir.await(init, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+// The call to go(1) has its own scope due to full-expression rules.
+// CHECK: cir.scope {
+// CHECK:   %[[#OneAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp1", init] {alignment = 4 : i64}
+// CHECK:   %[[#One:]] = cir.const #cir.int<1> : !s32i
+// CHECK:   cir.store{{.*}} %[[#One]], %[[#OneAddr]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   %[[#IntTaskTmp:]] = cir.call @_Z2goRKi(%[[#OneAddr]]) : (!cir.ptr<!s32i>) -> ![[IntTask]]
+// CHECK:   cir.store{{.*}} %[[#IntTaskTmp]], %[[#IntTaskAddr]] : ![[IntTask]], !cir.ptr<![[IntTask]]>
+// CHECK: }
+
+// CHECK: %[[#CoReturnValAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__coawait_resume_rval"] {alignment = 1 : i64}
+// CHECK: cir.await(user, ready : {
+// CHECK: }, suspend : {
+// CHECK: }, resume : {
+// CHECK:   %[[#ResumeVal:]] = cir.call @_ZN5folly4coro4TaskIiE12await_resumeEv(%3)
+// CHECK:   cir.store{{.*}} %[[#ResumeVal]], %[[#CoReturnValAddr]] : !s32i, !cir.ptr<!s32i>
+// CHECK: },)
+// CHECK: %[[#V:]] = cir.load{{.*}} %[[#CoReturnValAddr]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.call @_ZN5folly4coro4TaskIiE12promise_type12return_valueEi({{.*}}, %[[#V]])
+
+folly::coro::Task<int> go1_lambda() {
+  auto task = []() -> folly::coro::Task<int> {
+    co_return 1;
+  }();
+  co_return co_await task;
+}
+
+// CHECK: cir.func coroutine {{.*}} @_ZZ10go1_lambdavENK3$_0clEv{{.*}} ![[IntTask]]
+// CHECK: cir.func coroutine {{.*}} @_Z10go1_lambdav() {{.*}} ![[IntTask]]
+
+folly::coro::Task<int> go4() {
+  auto* fn = +[](int const& i) -> folly::coro::Task<int> { co_return i; };
+  auto task = fn(3);
+  co_return co_await std::move(task);
+}
+
+// CHECK: cir.func coroutine {{.*}} @_Z3go4v() {{.*}} ![[IntTask]]
+
+// CHECK:   cir.await(init, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+// CHECK: %12 = cir.scope {
+// CHECK:   %17 = cir.alloca !rec_anon2E2, !cir.ptr<!rec_anon2E2>, ["ref.tmp1"] {alignment = 1 : i64}
+
+// Get the lambda invoker ptr via `lambda operator folly::coro::Task<int> (*)(int const&)()`
+// CHECK:   %18 = cir.call @_ZZ3go4vENK3$_0cvPFN5folly4coro4TaskIiEERKiEEv(%17) : (!cir.ptr<!rec_anon2E2>) -> !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> ![[IntTask]]>>
+// CHECK:   %19 = cir.unary(plus, %18) : !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> ![[IntTask]]>>, !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> ![[IntTask]]>>
+// CHECK:   cir.yield %19 : !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> ![[IntTask]]>>
+// CHECK: }
+// CHECK: cir.store{{.*}} %12, %3 : !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> ![[IntTask]]>>, !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> ![[IntTask]]>>>
+// CHECK: cir.scope {
+// CHECK:   %17 = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp2", init] {alignment = 4 : i64}
+// CHECK:   %18 = cir.load{{.*}} %3 : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> ![[IntTask]]>>>, !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> ![[IntTask]]>>
+// CHECK:   %19 = cir.const #cir.int<3> : !s32i
+// CHECK:   cir.store{{.*}} %19, %17 : !s32i, !cir.ptr<!s32i>
+
+// Call invoker, which calls operator() indirectly.
+// CHECK:   %20 = cir.call %18(%17) : (!cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> ![[IntTask]]>>, !cir.ptr<!s32i>) -> ![[IntTask]]
+// CHECK:   cir.store{{.*}} %20, %4 : ![[IntTask]], !cir.ptr<![[IntTask]]>
+// CHECK: }
+
+// CHECK:   cir.await(user, ready : {
+// CHECK:   }, suspend : {
+// CHECK:   }, resume : {
+// CHECK:   },)
+// CHECK: }
+
+folly::coro::Task<void> yield();
+folly::coro::Task<void> yield1() {
+  auto t = yield();
+  co_yield t;
+}
+
+// CHECK: cir.func coroutine {{.*}} @_Z6yield1v() -> !rec_folly3A3Acoro3A3ATask3Cvoid3E
+
+// CHECK: cir.await(init, ready : {
+// CHECK: }, suspend : {
+// CHECK: }, resume : {
+// CHECK: },)
+
+//      CHECK: cir.scope {
+// CHECK-NEXT:   %[[#SUSPEND_PTR:]] = cir.alloca !rec_std3A3Asuspend_always, !cir.ptr<!rec_std3A3Asuspend_always>
+// CHECK-NEXT:   %[[#AWAITER_PTR:]] = cir.alloca !rec_folly3A3Acoro3A3ATask3Cvoid3E, !cir.ptr<!rec_folly3A3Acoro3A3ATask3Cvoid3E>
+// CHECK-NEXT:   %[[#CORO_PTR:]] = cir.alloca !rec_std3A3Acoroutine_handle3Cvoid3E, !cir.ptr<!rec_std3A3Acoroutine_handle3Cvoid3E>
+// CHECK-NEXT:   %[[#CORO2_PTR:]] = cir.alloca !rec_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E, !cir.ptr<!rec_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E>
+// CHECK-NEXT:   cir.copy {{.*}} to %[[#AWAITER_PTR:]] : !cir.ptr<!rec_folly3A3Acoro3A3ATask3Cvoid3E>
+// CHECK-NEXT:   %[[#AWAITER:]] = cir.load{{.*}} %[[#AWAITER_PTR]] : !cir.ptr<!rec_folly3A3Acoro3A3ATask3Cvoid3E>, !rec_folly3A3Acoro3A3ATask3Cvoid3E
+// CHECK-NEXT:   %[[#SUSPEND:]] = cir.call @_ZN5folly4coro4TaskIvE12promise_type11yield_valueES2_(%{{.+}}, %[[#AWAITER]]) : (!cir.ptr<!rec_folly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type>, !rec_folly3A3Acoro3A3ATask3Cvoid3E) -> !rec_std3A3Asuspend_always
+// CHECK-NEXT:   cir.store{{.*}} %[[#SUSPEND]], %[[#SUSPEND_PTR]] : !rec_std3A3Asuspend_always, !cir.ptr<!rec_std3A3Asuspend_always>
+// CHECK-NEXT:   cir.await(yield, ready : {
+// CHECK-NEXT:     %[[#READY:]] = cir.scope {
+// CHECK-NEXT:       %[[#A:]] = cir.call @_ZNSt14suspend_always11await_readyEv(%[[#SUSPEND_PTR]]) : (!cir.ptr<!rec_std3A3Asuspend_always>) -> !cir.bool
+// CHECK-NEXT:       cir.yield %[[#A]] : !cir.bool
+// CHECK-NEXT:     } : !cir.bool
+// CHECK-NEXT:     cir.condition(%[[#READY]])
+// CHECK-NEXT:   }, suspend : {
+// CHECK-NEXT:     %[[#CORO2:]] = cir.call @_ZNSt16coroutine_handleIN5folly4coro4TaskIvE12promise_typeEE12from_addressEPv(%9) : (!cir.ptr<!void>) -> !rec_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E
+// CHECK-NEXT:     cir.store{{.*}} %[[#CORO2]], %[[#CORO2_PTR]] : !rec_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E, !cir.ptr<!rec_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E>
+// CHECK-NEXT:     %[[#B:]] = cir.load{{.*}} %[[#CORO2_PTR]] : !cir.ptr<!rec_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E>, !rec_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E
+// CHECK-NEXT:     cir.call @_ZNSt16coroutine_handleIvEC1IN5folly4coro4TaskIvE12promise_typeEEES_IT_E(%[[#CORO_PTR]], %[[#B]]) : (!cir.ptr<!rec_std3A3Acoroutine_handle3Cvoid3E>, !rec_std3A3Acoroutine_handle3Cfolly3A3Acoro3A3ATask3Cvoid3E3A3Apromise_type3E) -> ()
+// CHECK-NEXT:     %[[#C:]] = cir.load{{.*}} %[[#CORO_PTR]] : !cir.ptr<!rec_std3A3Acoroutine_handle3Cvoid3E>, !rec_std3A3Acoroutine_handle3Cvoid3E
+// CHECK-NEXT:     cir.call @_ZNSt14suspend_always13await_suspendESt16coroutine_handleIvE(%[[#SUSPEND_PTR]], %[[#C]]) : (!cir.ptr<!rec_std3A3Asuspend_always>, !rec_std3A3Acoroutine_handle3Cvoid3E) -> ()
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   }, resume : {
+// CHECK-NEXT:     cir.call @_ZNSt14suspend_always12await_resumeEv(%[[#SUSPEND_PTR]]) : (!cir.ptr<!rec_std3A3Asuspend_always>) -> ()
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   },)
+// CHECK-NEXT: }
+
+// CHECK: cir.await(final, ready : {
+// CHECK: }, suspend : {
+// CHECK: }, resume : {
+// CHECK: },)
+
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/count-of.c b/clang/test/CIR/Incubator/CodeGen/count-of.c
new file mode 100644
index 0000000000000..4e2a7e7da5fea
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/count-of.c
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -std=c2y -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c2y -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c2y -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+unsigned long vla_with_array_element_type_with_const_size() {
+  long size;
+  return _Countof(int[5][size]);
+}
+
+// CIR: %[[RET_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"]
+// CIR: %[[SIZE_ADDR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["size"]
+// CIR: %[[CONST_5:.*]] = cir.const #cir.int<5> : !u64i
+// CIR: cir.store{{.*}} %[[CONST_5]], %[[RET_ADDR]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[RET_VAL:.*]] = cir.load{{.*}} %[[RET_ADDR]] : !cir.ptr<!u64i>, !u64i
+// CIR: cir.return %[[RET_VAL]] : !u64i
+
+// LLVM: %[[RET_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: %[[SIZE_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: store i64 5, ptr %[[RET_ADDR]], align 8
+// LLVM: %[[RET_VAL:.*]] = load i64, ptr %[[RET_ADDR]], align 8
+// LLVM: ret i64 %[[RET_VAL]]
+
+// OGCG: %[[SIZE_ADDR:.*]] = alloca i64, align 8
+// OGCG: ret i64 5
+
+unsigned long vla_with_array_element_type_non_const_size() {
+  long size;
+  return _Countof(int[size][size]);
+}
+
+// CIR: %[[REET_ADDR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"]
+// CIR: %[[SIZE_ADDR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["size"]
+// CIR: %[[TMP_SIZE:.*]] = cir.load {{.*}} %[[SIZE_ADDR]] : !cir.ptr<!s64i>, !s64i
+// CIR: %[[TMP_SIZE_U64:.*]] = cir.cast integral %[[TMP_SIZE]] : !s64i -> !u64i
+// CIR: cir.store{{.*}} %[[TMP_SIZE_U64]], %[[RET_ADDR]] : !u64i, !cir.ptr<!u64i>
+// CIR: %[[TMP_RET:.*]] = cir.load{{.*}} %[[RET_ADDR]] : !cir.ptr<!u64i>, !u64i
+// CIR: cir.return %[[TMP_RET]] : !u64i
+
+// LLVM: %[[RET_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: %[[SIZE_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: %[[TMP_SIZE:.*]] = load i64, ptr %[[SIZE_ADDR]], align 8
+// LLVM: store i64 %[[TMP_SIZE]], ptr %[[RET_ADDR]], align 8
+// LLVM: %[[TMP_RET:.*]] = load i64, ptr %[[RET_ADDR]], align 8
+// LLVM: ret i64 %[[TMP_RET]]
+
+// OGCG: %[[SIZE_ADDR:.*]] = alloca i64, align 8
+// OGCG: %[[TMP_SIZE:.*]] = load i64, ptr %[[SIZE_ADDR]], align 8
+// OGCG: %[[TMP_SIZE_2:.*]] = load i64, ptr %[[SIZE_ADDR]], align 8
+// OGCG: ret i64 %[[TMP_SIZE]]
diff --git a/clang/test/CIR/Incubator/CodeGen/ctor-alias.cpp b/clang/test/CIR/Incubator/CodeGen/ctor-alias.cpp
new file mode 100644
index 0000000000000..d6bb98a9b7212
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/ctor-alias.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-cir %s -o - | FileCheck %s
+
+struct DummyString {
+  DummyString(const char *s) {}
+};
+
+void t() {
+  DummyString s4 = "yolo";
+}
+
+//      CHECK: cir.func {{.*}} @_ZN11DummyStringC2EPKc
+// CHECK-NEXT:     %0 = cir.alloca !cir.ptr<!rec_DummyString>, !cir.ptr<!cir.ptr<!rec_DummyString>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:     %1 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+// CHECK-NEXT:     cir.store %arg0, %0 : !cir.ptr<!rec_DummyString>, !cir.ptr<!cir.ptr<!rec_DummyString>>
+// CHECK-NEXT:     cir.store %arg1, %1 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:     %2 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_DummyString>>, !cir.ptr<!rec_DummyString>
+// CHECK-NEXT:     cir.return
+
+// CHECK-NOT: cir.fun @_ZN11DummyStringC1EPKc
+
+//      CHECK:   cir.func {{.*}} @_Z1tv
+// CHECK-NEXT:     %0 = cir.alloca !rec_DummyString, !cir.ptr<!rec_DummyString>, ["s4", init] {alignment = 1 : i64}
+// CHECK-NEXT:     %1 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 5>>
+// CHECK-NEXT:     %2 = cir.cast array_to_ptrdecay %1 : !cir.ptr<!cir.array<!s8i x 5>> -> !cir.ptr<!s8i>
+// CHECK-NEXT:     cir.call @_ZN11DummyStringC2EPKc(%0, %2) : (!cir.ptr<!rec_DummyString>, !cir.ptr<!s8i>) -> ()
+// CHECK-NEXT:     cir.return
+
+struct B {
+  B();
+};
+B::B() {
+}
+
+// CHECK: cir.func {{.*}} @_ZN1BC2Ev(%arg0: !cir.ptr<!rec_B>
+// CHECK:   %0 = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>
+// CHECK:   %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_B>>, !cir.ptr<!rec_B>
+// CHECK:   cir.return
+// CHECK: }
+// CHECK: cir.func private dso_local @_ZN1BC1Ev(!cir.ptr<!rec_B>) special_member<#cir.cxx_ctor<!rec_B, default>> alias(@_ZN1BC2Ev)
diff --git a/clang/test/CIR/Incubator/CodeGen/ctor-member-lvalue-to-rvalue.cpp b/clang/test/CIR/Incubator/CodeGen/ctor-member-lvalue-to-rvalue.cpp
new file mode 100644
index 0000000000000..cefe22f7b037d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/ctor-member-lvalue-to-rvalue.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -std=c++17 -mconstructor-aliases -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+// TODO: support -mno-constructor-aliases
+
+struct String {
+  long size;
+  String(const String &s) : size{s.size} {}
+// CHECK: cir.func {{.*}} @_ZN6StringC2ERKS_
+// CHECK:     %0 = cir.alloca !cir.ptr<!rec_String>, !cir.ptr<!cir.ptr<!rec_String>>, ["this", init] {alignment = 8 : i64}
+// CHECK:     %1 = cir.alloca !cir.ptr<!rec_String>, !cir.ptr<!cir.ptr<!rec_String>>, ["s", init, const] {alignment = 8 : i64}
+// CHECK:     cir.store{{.*}} %arg0, %0
+// CHECK:     cir.store{{.*}} %arg1, %1
+// CHECK:     %2 = cir.load{{.*}} %0
+// CHECK:     %3 = cir.get_member %2[0] {name = "size"}
+// CHECK:     %4 = cir.load{{.*}} %1
+// CHECK:     %5 = cir.get_member %4[0] {name = "size"}
+// CHECK:     %6 = cir.load{{.*}} %5 : !cir.ptr<!s64i>, !s64i
+// CHECK:     cir.store{{.*}} %6, %3 : !s64i, !cir.ptr<!s64i>
+// CHECK:     cir.return
+// CHECK:   }
+
+  String() {}
+};
+
+void foo() {
+  String s;
+  String s1{s};
+}
+// CHECK: cir.func {{.*}} @_Z3foov() {{.*}} {
+// CHECK:  %0 = cir.alloca !rec_String, !cir.ptr<!rec_String>, ["s", init] {alignment = 8 : i64}
+// CHECK:  %1 = cir.alloca !rec_String, !cir.ptr<!rec_String>, ["s1", init] {alignment = 8 : i64}
+// CHECK:  cir.call @_ZN6StringC2Ev(%0) : (!cir.ptr<!rec_String>) -> ()
+// CHECK:  cir.copy %0 to %1 : !cir.ptr<!rec_String>
+// CHECK:  cir.return
+// }
diff --git a/clang/test/CIR/Incubator/CodeGen/ctor.cpp b/clang/test/CIR/Incubator/CodeGen/ctor.cpp
new file mode 100644
index 0000000000000..53e454d085ec7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/ctor.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Struk {
+  int a;
+  Struk() {}
+  void test() {}
+};
+
+void baz() {
+  Struk s;
+}
+
+// CHECK: !rec_Struk = !cir.record<struct "Struk" {!s32i}>
+
+// CHECK:   cir.func {{.*}} @_ZN5StrukC2Ev(%arg0: !cir.ptr<!rec_Struk>
+// CHECK-NEXT:     %0 = cir.alloca !cir.ptr<!rec_Struk>, !cir.ptr<!cir.ptr<!rec_Struk>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:     cir.store %arg0, %0 : !cir.ptr<!rec_Struk>, !cir.ptr<!cir.ptr<!rec_Struk>>
+// CHECK-NEXT:     %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_Struk>>, !cir.ptr<!rec_Struk>
+// CHECK-NEXT:     cir.return
+
+// CHECK:   cir.func {{.*}} @_ZN5StrukC1Ev(%arg0: !cir.ptr<!rec_Struk>
+// CHECK-NEXT:     %0 = cir.alloca !cir.ptr<!rec_Struk>, !cir.ptr<!cir.ptr<!rec_Struk>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:     cir.store %arg0, %0 : !cir.ptr<!rec_Struk>, !cir.ptr<!cir.ptr<!rec_Struk>>
+// CHECK-NEXT:     %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_Struk>>, !cir.ptr<!rec_Struk>
+// CHECK-NEXT:     cir.call @_ZN5StrukC2Ev(%1) : (!cir.ptr<!rec_Struk>) -> ()
+// CHECK-NEXT:     cir.return
+
+// CHECK:   cir.func {{.*}} @_Z3bazv()
+// CHECK-NEXT:     %0 = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk>, ["s", init] {alignment = 4 : i64}
+// CHECK-NEXT:     cir.call @_ZN5StrukC1Ev(%0) : (!cir.ptr<!rec_Struk>) -> ()
+// CHECK-NEXT:     cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/cxx-default-arg.cpp b/clang/test/CIR/Incubator/CodeGen/cxx-default-arg.cpp
new file mode 100644
index 0000000000000..167b8fffd7875
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/cxx-default-arg.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: cir.func {{.*}} @_ZN12MyIntPointerC1EPi
+
+struct MyIntPointer {
+  MyIntPointer(int *p = nullptr);
+};
+
+void foo() {
+  MyIntPointer p;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/cxx-traits.cpp b/clang/test/CIR/Incubator/CodeGen/cxx-traits.cpp
new file mode 100644
index 0000000000000..e3f7d78f07d90
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/cxx-traits.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+void expression_trait_expr() {
+  bool a = __is_lvalue_expr(0);
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a", init]
+// CIR: %[[CONST_FALSE:.*]] = cir.const #false
+// CIR: cir.store {{.*}} %[[CONST_FALSE]], %[[A_ADDR]] : !cir.bool, !cir.ptr<!cir.bool>
+
+// LLVM: %[[A_ADDR:.*]] = alloca i8, i64 1, align 1
+// LLVM: store i8 0, ptr %[[A_ADDR]], align 1
+
+// OGCG: %[[A_ADDR:.*]] = alloca i8, align 1
+// OGCG: store i8 0, ptr %[[A_ADDR]], align 1
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/cxx1z-inline-variables.cpp b/clang/test/CIR/Incubator/CodeGen/cxx1z-inline-variables.cpp
new file mode 100644
index 0000000000000..c53e4977ccdb2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/cxx1z-inline-variables.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck -check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// For compatibility with C++11 and C++14, an out-of-line declaration of a
+// static constexpr local variable promotes the variable to weak_odr.
+struct compat {
+  static constexpr int a = 1;
+  static constexpr int b = 2;
+  static constexpr int c = 3;
+  static inline constexpr int d = 4;
+  static const int e = 5;
+  static const int f = 6;
+  static const int g = 7;
+};
+const int &compat_use_before_redecl = compat::b;
+const int compat::a;
+const int compat::b;
+const int compat::c;
+const int compat::d;
+const int compat::e;
+constexpr int compat::f;
+constexpr inline int compat::g;
+const int &compat_use_after_redecl1 = compat::c;
+const int &compat_use_after_redecl2 = compat::d;
+const int &compat_use_after_redecl3 = compat::g;
+
+// CIR: cir.global constant weak_odr comdat @_ZN6compat1bE = #cir.int<2> : !s32i {alignment = 4 : i64}
+// CIR: cir.global constant weak_odr comdat @_ZN6compat1aE = #cir.int<1> : !s32i {alignment = 4 : i64}
+// CIR: cir.global constant weak_odr comdat @_ZN6compat1cE = #cir.int<3> : !s32i {alignment = 4 : i64}
+// CIR: cir.global constant external @_ZN6compat1eE = #cir.int<5> : !s32i {alignment = 4 : i64}
+// CIR: cir.global constant weak_odr comdat @_ZN6compat1fE = #cir.int<6> : !s32i {alignment = 4 : i64}
+// CIR: cir.global constant linkonce_odr comdat @_ZN6compat1dE = #cir.int<4> : !s32i {alignment = 4 : i64}
+// CIR: cir.global constant linkonce_odr comdat @_ZN6compat1gE = #cir.int<7> : !s32i {alignment = 4 : i64}
+
+// LLVM: $_ZN6compat1bE = comdat any
+// LLVM: $_ZN6compat1aE = comdat any
+// LLVM: $_ZN6compat1cE = comdat any
+// LLVM: $_ZN6compat1fE = comdat any
+// LLVM: $_ZN6compat1dE = comdat any
+// LLVM: $_ZN6compat1gE = comdat any
+
+// LLVM: @_ZN6compat1bE = weak_odr constant i32 2, comdat, align 4
+// LLVM: @_ZN6compat1aE = weak_odr constant i32 1, comdat, align 4
+// LLVM: @_ZN6compat1cE = weak_odr constant i32 3, comdat, align 4
+// LLVM: @_ZN6compat1eE = constant i32 5, align 4
+// LLVM: @_ZN6compat1fE = weak_odr constant i32 6, comdat, align 4
+// LLVM: @_ZN6compat1dE = linkonce_odr constant i32 4, comdat, align 4
+// LLVM: @_ZN6compat1gE = linkonce_odr constant i32 7, comdat, align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/default-address-space.c b/clang/test/CIR/Incubator/CodeGen/default-address-space.c
new file mode 100644
index 0000000000000..8d07e6281a5d6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/default-address-space.c
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -triple amdgcn---amdgiz -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple amdgcn---amdgiz -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple amdgcn---amdgiz -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+// LLVM-DAG: @foo = addrspace(1) global i32 0
+// LLVM-DAG: @ban = addrspace(1) global [10 x i32] zeroinitializer
+// LLVM-DAG: @A = addrspace(1) global ptr null
+// LLVM-DAG: @B = addrspace(1) global ptr null
+// OGCG-DAG: @foo ={{.*}} addrspace(1) global i32 0
+// OGCG-DAG: @ban ={{.*}} addrspace(1) global [10 x i32] zeroinitializer
+// OGCG-DAG: @A ={{.*}} addrspace(1) global ptr null
+// OGCG-DAG: @B ={{.*}} addrspace(1) global ptr null
+int foo;
+int ban[10];
+int *A;
+int *B;
+
+// CIR-LABEL: cir.func {{.*}} @test1
+// LLVM-LABEL: define{{.*}} i32 @test1()
+// LLVM: alloca i32,{{.*}} addrspace(5)
+// LLVM: load i32, ptr addrspace(1) @foo
+// OGCG-LABEL: define{{.*}} i32 @test1()
+// OGCG: load i32, ptr addrspacecast{{[^@]+}} @foo
+int test1(void) { return foo; }
+
+// CIR-LABEL: cir.func {{.*}} @test2
+// CIR: %[[I_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i, lang_address_space(offload_private)>, ["i", init]
+// CIR: cir.cast address_space %[[I_ALLOCA]] : !cir.ptr<!s32i, lang_address_space(offload_private)> -> !cir.ptr<!s32i>
+// LLVM-LABEL: define{{.*}} i32 @test2(i32 %0)
+// LLVM: alloca i32,{{.*}} addrspace(5)
+// LLVM: addrspacecast ptr addrspace(5)
+// LLVM: getelementptr
+// LLVM: load i32, ptr
+// LLVM: ret i32
+// OGCG-LABEL: define{{.*}} i32 @test2(i32 noundef %i)
+// OGCG: %[[addr:.*]] = getelementptr
+// OGCG: load i32, ptr %[[addr]]
+// OGCG-NEXT: ret i32
+int test2(int i) { return ban[i]; }
+
+// This is the key test - static alloca with address space cast.
+// The alloca is in addrspace(5) and must be cast to generic addrspace(0).
+// CIR-LABEL: cir.func {{.*}} @test4
+// CIR: %[[A_ALLOCA:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>, lang_address_space(offload_private)>, ["a", init]
+// CIR: cir.cast address_space %[[A_ALLOCA]] : !cir.ptr<!cir.ptr<!s32i>, lang_address_space(offload_private)> -> !cir.ptr<!cir.ptr<!s32i>>
+// LLVM-LABEL: define{{.*}} void @test4(ptr %0)
+// LLVM: %[[alloca:.*]] = alloca ptr,{{.*}} addrspace(5)
+// LLVM: %[[a_addr:.*]] = addrspacecast ptr addrspace(5) %[[alloca]] to ptr
+// LLVM: store ptr %0, ptr %[[a_addr]]
+// LLVM: %[[r0:.*]] = load ptr, ptr %[[a_addr]]
+// LLVM: %[[arrayidx:.*]] = getelementptr i32, ptr %[[r0]]
+// LLVM: store i32 0, ptr %[[arrayidx]]
+// OGCG-LABEL: define{{.*}} void @test4(ptr noundef %a)
+// OGCG: %[[alloca:.*]] = alloca ptr, align 8, addrspace(5)
+// OGCG: %[[a_addr:.*]] ={{.*}} addrspacecast{{.*}} %[[alloca]] to ptr
+// OGCG: store ptr %a, ptr %[[a_addr]]
+// OGCG: %[[r0:.*]] = load ptr, ptr %[[a_addr]]
+// OGCG: %[[arrayidx:.*]] = getelementptr{{.*}} i32, ptr %[[r0]]
+// OGCG: store i32 0, ptr %[[arrayidx]]
+void test4(int *a) {
+  a[0] = 0;
+}
+
+// Test that the return value alloca also gets an address space cast.
+// The __retval alloca is in addrspace(5) and stores/loads should go
+// through the casted flat pointer.
+// CIR-LABEL: cir.func {{.*}} @test_retval
+// CIR: %[[RETVAL_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i, lang_address_space(offload_private)>, ["__retval"]
+// CIR: %[[RETVAL_CAST:.*]] = cir.cast address_space %[[RETVAL_ALLOCA]] : !cir.ptr<!s32i, lang_address_space(offload_private)> -> !cir.ptr<!s32i>
+// CIR: cir.store {{.*}}, %[[RETVAL_CAST]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[RET:.*]] = cir.load {{.*}} %[[RETVAL_CAST]] : !cir.ptr<!s32i>, !s32i
+// CIR: cir.return %[[RET]] : !s32i
+// LLVM-LABEL: define{{.*}} i32 @test_retval(i32 %{{.*}})
+// LLVM-DAG: alloca i32,{{.*}} addrspace(5)
+// LLVM-DAG: %[[RETVAL_ALLOCA:.*]] = alloca i32,{{.*}} addrspace(5)
+// LLVM-DAG: %[[RETVAL_CAST:.*]] = addrspacecast ptr addrspace(5) %[[RETVAL_ALLOCA]] to ptr
+// LLVM: store i32 {{.*}}, ptr %[[RETVAL_CAST]]
+// LLVM: %[[RET:.*]] = load i32, ptr %[[RETVAL_CAST]]
+// LLVM: ret i32 %[[RET]]
+// Note: OGCG optimizes away the store/load through retval for simple returns.
+// It stores and loads directly from the parameter, so we only check that
+// the retval addrspacecast is generated.
+// OGCG-LABEL: define{{.*}} i32 @test_retval(i32 noundef %{{.*}})
+// OGCG: %[[RETVAL:.*]] = alloca i32, align 4, addrspace(5)
+// OGCG: %[[RETVAL_CAST:.*]] = addrspacecast ptr addrspace(5) %[[RETVAL]] to ptr
+// OGCG: ret i32
+int test_retval(int x) {
+  return x;
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/default-methods.cpp b/clang/test/CIR/Incubator/CodeGen/default-methods.cpp
new file mode 100644
index 0000000000000..8418694f4512a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/default-methods.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+// We should emit and call both implicit operator= functions.
+struct S {
+  struct T {
+    int x;
+  } t;
+};
+
+// CIR-LABEL: cir.func {{.*}} linkonce_odr @_ZN1S1TaSERKS0_
+// CIR-LABEL: cir.func {{.*}} linkonce_odr @_ZN1SaSERKS_
+// CIR:         cir.call @_ZN1S1TaSERKS0_(
+// CIR-LABEL: cir.func {{.*}} @_Z1fR1SS0_(
+// CIR:         cir.call @_ZN1SaSERKS_(
+
+// LLVM-LABEL: define linkonce_odr ptr @_ZN1S1TaSERKS0_(
+// LLVM-LABEL: define linkonce_odr ptr @_ZN1SaSERKS_(
+// LLVM:         call ptr @_ZN1S1TaSERKS0_(
+// LLVM-LABEL: define dso_local void @_Z1fR1SS0_(
+// LLVM:         call ptr @_ZN1SaSERKS_(
+void f(S &s1, S &s2) { s1 = s2; }
diff --git a/clang/test/CIR/Incubator/CodeGen/defaultarg.cpp b/clang/test/CIR/Incubator/CodeGen/defaultarg.cpp
new file mode 100644
index 0000000000000..fcb03db891bb2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/defaultarg.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -std=c++17 %s -o - | FileCheck %s
+
+void bar(const int &i = 42);
+
+void foo() {
+  bar();
+}
+
+// CHECK: [[TMP0:%.*]] = cir.alloca !s32i
+// CHECK: [[TMP1:%.*]] = cir.const #cir.int<42>
+// CHECK: cir.store{{.*}} [[TMP1]], [[TMP0]]
+// CHECK: cir.call @_Z3barRKi([[TMP0]])
diff --git a/clang/test/CIR/Incubator/CodeGen/defined-pure-virtual-func.cpp b/clang/test/CIR/Incubator/CodeGen/defined-pure-virtual-func.cpp
new file mode 100644
index 0000000000000..5a57845c2e9ac
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/defined-pure-virtual-func.cpp
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Pure virtual functions are allowed to be defined, but the vtable should still
+// point to __cxa_pure_virtual instead of the definition. For destructors, the
+// base object destructor (which is not included in the vtable) should be
+// defined as usual. The complete object destructors and deleting destructors
+// should contain a trap, and the vtable entries for them should point to
+// __cxa_pure_virtual.
+class C {
+  C();
+  virtual ~C() = 0;
+  virtual void pure() = 0;
+};
+
+C::C() = default;
+C::~C() = default;
+void C::pure() {}
+
+// CHECK: @_ZTV1C = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1C> : !cir.ptr<!u8i>
+// complete object destructor (D1)
+// CHECK-SAME: #cir.global_view<@__cxa_pure_virtual> : !cir.ptr<!u8i>,
+// deleting destructor (D0)
+// CHECK-SAME: #cir.global_view<@__cxa_pure_virtual> : !cir.ptr<!u8i>,
+// C::pure
+// CHECK-SAME: #cir.global_view<@__cxa_pure_virtual> : !cir.ptr<!u8i>]>
+
+// The base object destructor should be emitted as normal.
+// CHECK-LABEL: cir.func {{.*}} @_ZN1CD2Ev(%arg0: !cir.ptr<!rec_C> loc({{[^)]+}})) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_C>>, !cir.ptr<!rec_C>
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
+
+// The complete object destructor should trap.
+// CHECK-LABEL: cir.func {{.*}} @_ZN1CD1Ev(%arg0: !cir.ptr<!rec_C> loc({{[^)]+}})) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_C>>, !cir.ptr<!rec_C>
+// CHECK-NEXT:    cir.trap
+// CHECK-NEXT:  }
+
+// The deleting destructor should trap.
+// CHECK-LABEL: cir.func {{.*}} @_ZN1CD0Ev(%arg0: !cir.ptr<!rec_C> loc({{[^)]+}})) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_C>>, !cir.ptr<!rec_C>
+// CHECK-NEXT:    cir.trap
+// CHECK-NEXT:  }
+
+// C::pure should be emitted as normal.
+// CHECK-LABEL: cir.func {{.*}} @_ZN1C4pureEv(%arg0: !cir.ptr<!rec_C> loc({{[^)]+}})) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store %arg0, %0 : !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_C>>, !cir.ptr<!rec_C>
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
diff --git a/clang/test/CIR/Incubator/CodeGen/delegating-ctor.cpp b/clang/test/CIR/Incubator/CodeGen/delegating-ctor.cpp
new file mode 100644
index 0000000000000..e473b2166cd28
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/delegating-ctor.cpp
@@ -0,0 +1,102 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fexceptions -fcxx-exceptions %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fexceptions -fcxx-exceptions -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fexceptions -fcxx-exceptions -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+struct Delegating {
+  Delegating();
+  Delegating(int);
+};
+
+// Check that the constructor being delegated to is called with the correct
+// arguments.
+Delegating::Delegating() : Delegating(0) {}
+
+// CHECK-LABEL: cir.func {{.*}} @_ZN10DelegatingC2Ev(%arg0: !cir.ptr<!rec_Delegating> {{.*}}) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!rec_Delegating>, !cir.ptr<!cir.ptr<!rec_Delegating>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_Delegating>, !cir.ptr<!cir.ptr<!rec_Delegating>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_Delegating>>, !cir.ptr<!rec_Delegating>
+// CHECK-NEXT:    %2 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:    cir.call @_ZN10DelegatingC2Ei(%1, %2) : (!cir.ptr<!rec_Delegating>, !s32i) -> ()
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
+
+struct DelegatingWithZeroing {
+  int i;
+  DelegatingWithZeroing() = default;
+  DelegatingWithZeroing(int);
+};
+
+// Check that the delegating constructor performs zero-initialization here.
+// The trivial default constructor call is now lowered away as expected.
+DelegatingWithZeroing::DelegatingWithZeroing(int) : DelegatingWithZeroing() {}
+
+// CHECK-LABEL: cir.func {{.*}} @_ZN21DelegatingWithZeroingC2Ei(%arg0: !cir.ptr<!rec_DelegatingWithZeroing> {{.*}}, %arg1: !s32i {{.*}}) {{.*}} {
+// CHECK-NEXT:    %0 = cir.alloca !cir.ptr<!rec_DelegatingWithZeroing>, !cir.ptr<!cir.ptr<!rec_DelegatingWithZeroing>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["", init] {alignment = 4 : i64}
+// CHECK-NEXT:    cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_DelegatingWithZeroing>, !cir.ptr<!cir.ptr<!rec_DelegatingWithZeroing>>
+// CHECK-NEXT:    cir.store{{.*}} %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:    %2 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_DelegatingWithZeroing>>, !cir.ptr<!rec_DelegatingWithZeroing>
+// CHECK-NEXT:    %3 = cir.const #cir.zero : !rec_DelegatingWithZeroing
+// CHECK-NEXT:    cir.store{{.*}} %3, %2 : !rec_DelegatingWithZeroing, !cir.ptr<!rec_DelegatingWithZeroing>
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
+
+// OG uses memset instead of calling the trivial default constructor.
+// CIR uses store zeroinitializer instead. Both achieve the same result.
+// LLVM-LABEL: define {{.*}} @_ZN21DelegatingWithZeroingC2Ei
+// LLVM-NOT:     call {{.*}} @_ZN21DelegatingWithZeroingC2Ev
+// LLVM:         store %struct.DelegatingWithZeroing zeroinitializer
+// LLVM:         ret void
+
+// OGCG-LABEL: define {{.*}} @_ZN21DelegatingWithZeroingC2Ei
+// OGCG-NOT:     call {{.*}} @_ZN21DelegatingWithZeroingC2Ev
+// OGCG:         call void @llvm.memset
+// OGCG:         ret void
+
+void canThrow();
+struct HasNonTrivialDestructor {
+  HasNonTrivialDestructor();
+  HasNonTrivialDestructor(int);
+  ~HasNonTrivialDestructor();
+};
+
+// Check that we call the destructor whenever a cleanup is needed.
+// FIXME: enable and check this when exceptions are fully supported.
+#if 0
+HasNonTrivialDestructor::HasNonTrivialDestructor(int)
+    : HasNonTrivialDestructor() {
+  canThrow();
+}
+#endif
+
+// From clang/test/CodeGenCXX/cxx0x-delegating-ctors.cpp, check that virtual
+// inheritance and delegating constructors interact correctly.
+// FIXME: enable and check this when virtual inheritance is fully supported.
+#if 0
+namespace PR14588 {
+void other();
+
+class Base {
+public:
+  Base() { squawk(); }
+  virtual ~Base() {}
+
+  virtual void squawk() { other(); }
+};
+
+class Foo : public virtual Base {
+public:
+  Foo();
+  Foo(const void *inVoid);
+  virtual ~Foo() {}
+
+  virtual void squawk() { other(); }
+};
+
+Foo::Foo() : Foo(nullptr) { other(); }
+Foo::Foo(const void *inVoid) { squawk(); }
+} // namespace PR14588
+#endif
diff --git a/clang/test/CIR/Incubator/CodeGen/delete-array.cpp b/clang/test/CIR/Incubator/CodeGen/delete-array.cpp
new file mode 100644
index 0000000000000..b76d0551f378a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/delete-array.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void test_delete_array(int *ptr) {
+  delete[] ptr;
+}
+
+// CHECK: cir.delete.array 
diff --git a/clang/test/CIR/Incubator/CodeGen/delete.cpp b/clang/test/CIR/Incubator/CodeGen/delete.cpp
new file mode 100644
index 0000000000000..36d81e836fd99
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/delete.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef __typeof(sizeof(int)) size_t;
+
+namespace test1 {
+  struct A { void operator delete(void*,size_t); int x; };
+  void a(A *x) {
+    delete x;
+  }
+  // CHECK: cir.func {{.*}} @_ZN5test11aEPNS_1AE
+
+  // CHECK: %[[CONST:.*]] = cir.const #cir.int<4> : !u64i
+  // CHECK: cir.call @_ZN5test11AdlEPvm({{.*}}, %[[CONST]])
+}
+
+namespace test2 {
+  struct A {
+    ~A() {}
+  };
+  struct B {
+    A *a;
+    ~B();
+  };
+    // CHECK-LABEL: cir.func{{.*}} @_ZN5test21BD2Ev
+    // CHECK:         cir.call @_ZN5test21AD2Ev
+    // CHECK:         cir.call @_ZdlPvm
+    // CHECK:         cir.return
+    B::~B() { delete a; }
+}
+
+namespace test3 {
+  struct X {
+    virtual ~X();
+  };
+
+// Calling delete with a virtual destructor.
+// CHECK-LABEL:   cir.func {{.*}} @_ZN5test37destroyEPNS_1XE
+// CHECK:           %[[ARG_VAR:.*]] = cir.alloca !cir.ptr<!rec_test33A3AX>
+// CHECK:           %[[ARG:.*]] = cir.load{{.*}} %[[ARG_VAR]] : !cir.ptr<!cir.ptr<!rec_test33A3AX>>, !cir.ptr<!rec_test33A3AX>
+// CHECK:           %[[VPTR_PTR:.*]] = cir.vtable.get_vptr %[[ARG]] : !cir.ptr<!rec_test33A3AX> -> !cir.ptr<!cir.vptr>
+// CHECK:           %[[VPTR:.*]] = cir.load{{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CHECK:           %[[DTOR_PTR:.*]] = cir.vtable.get_virtual_fn_addr %[[VPTR]][1] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_test33A3AX>)>>>
+// CHECK:           %[[DTOR_FUN:.*]] = cir.load{{.*}} %[[DTOR_PTR]]
+// CHECK:           cir.call %[[DTOR_FUN]](%[[ARG]])
+// CHECK:           cir.return
+  void destroy(X *x) {
+    delete x;
+  }
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/derived-cast.cpp b/clang/test/CIR/Incubator/CodeGen/derived-cast.cpp
new file mode 100644
index 0000000000000..61f67e538c9a7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/derived-cast.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+class A {
+    int a;
+};
+
+class B {
+    int b;
+public:
+    A *getAsA();
+};
+
+class X : public A, public B {
+    int x;
+};
+
+A *B::getAsA() {
+  return static_cast<X*>(this);
+}
+
+// CIR-LABEL: @_ZN1B6getAsAEv
+// CIR: %[[VAL_1:.*]] = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init] {alignment = 8 : i64}
+// CIR: %[[VAL_2:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>, ["__retval"] {alignment = 8 : i64}
+// CIR: %[[VAL_3:.*]] = cir.load %[[VAL_1]] : !cir.ptr<!cir.ptr<!rec_B>>, !cir.ptr<!rec_B>
+// CIR: %[[VAL_4:.*]] = cir.derived_class_addr %[[VAL_3]] : !cir.ptr<!rec_B> nonnull [4] -> !cir.ptr<!rec_X>
+// CIR: %[[VAL_5:.*]] = cir.base_class_addr %[[VAL_4]] : !cir.ptr<!rec_X> nonnull [0] -> !cir.ptr<!rec_A>
+// CIR: cir.store{{.*}} %[[VAL_5]], %[[VAL_2]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CIR: %[[VAL_6:.*]] = cir.load{{.*}} %[[VAL_2]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+// CIR: cir.return %[[VAL_6]] : !cir.ptr<!rec_A>
+
+// LLVM-LABEL: @_ZN1B6getAsAEv
+// LLVM:  %[[VAL_1:.*]] = alloca ptr, i64 1, align 8
+// LLVM:  store ptr %[[VAL_2:.*]], ptr %[[VAL_0:.*]], align 8
+// LLVM:  %[[VAL_3:.*]] = load ptr, ptr %[[VAL_0]], align 8
+// LLVM:  %[[VAL_4:.*]] = getelementptr i8, ptr %[[VAL_3]], i32 -4
+// LLVM-NOT: select i1
+// LLVM:  ret ptr
diff --git a/clang/test/CIR/Incubator/CodeGen/derived-to-base.cpp b/clang/test/CIR/Incubator/CodeGen/derived-to-base.cpp
new file mode 100644
index 0000000000000..d7b43e5b14d55
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/derived-to-base.cpp
@@ -0,0 +1,199 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef enum {
+  RequestFailed = -2004,
+} enumy;
+
+typedef struct {
+  const void* samples;
+  int cound;
+} buffy;
+
+class C1 {
+ public:
+  virtual ~C1();
+  C1(int i);
+
+  struct IE {
+    bool supported = false;
+    unsigned version = 0;
+  };
+
+  struct IEs {
+    IE chain;
+  };
+
+  static IEs availableIEs;
+  class Layer {
+   public:
+    Layer(int d);
+    virtual ~Layer() {}
+  };
+
+  virtual enumy SetStuff(enumy e, buffy b);
+  virtual enumy Initialize() = 0;
+};
+
+class C2 : public C1 {
+ public:
+  C2(
+    void* p,
+    int i
+  );
+
+  ~C2() override;
+
+  class Layer : public C1::Layer {
+   public:
+    Layer(int d, const C2* C1);
+    virtual ~Layer();
+
+   protected:
+    const C2* m_C1;
+  };
+
+  virtual enumy SetStuff(enumy e, buffy b) override;
+  virtual enumy Initialize() override;
+};
+
+class C3 : public C2 {
+  struct Layer : public C2::Layer {
+   public:
+    Layer(int d, const C2* C1);
+    void Initialize();
+  };
+
+  virtual enumy Initialize() override;
+};
+
+void C3::Layer::Initialize() {
+  if (m_C1 == nullptr) {
+    return;
+  }
+  if (m_C1->availableIEs.chain.supported) {
+  }
+}
+
+// CHECK-DAG: !rec_C23A3ALayer = !cir.record<class "C2::Layer"
+// CHECK-DAG: !rec_C33A3ALayer = !cir.record<struct "C3::Layer"
+// CHECK-DAG: !rec_A = !cir.record<class "A"
+// CHECK-DAG: !rec_A2Ebase = !cir.record<class "A.base"
+// CHECK-DAG: !rec_B = !cir.record<class "B" {!rec_A2Ebase
+
+// CHECK: cir.func {{.*}} @_ZN2C35Layer10InitializeEv
+
+// CHECK:  cir.scope {
+// CHECK:    %2 = cir.base_class_addr %1 : !cir.ptr<!rec_C33A3ALayer> nonnull [0] -> !cir.ptr<!rec_C23A3ALayer>
+// CHECK:    %3 = cir.get_member %2[1] {name = "m_C1"} : !cir.ptr<!rec_C23A3ALayer> -> !cir.ptr<!cir.ptr<!rec_C2>>
+// CHECK:    %4 = cir.load{{.*}} %3 : !cir.ptr<!cir.ptr<!rec_C2>>, !cir.ptr<!rec_C2>
+// CHECK:    %5 = cir.const #cir.ptr<null> : !cir.ptr<!rec_C2>
+// CHECK:    %6 = cir.cmp(eq, %4, %5) : !cir.ptr<!rec_C2>, !cir.bool
+
+enumy C3::Initialize() {
+  return C2::Initialize();
+}
+
+// CHECK: cir.func {{.*}} @_ZN2C310InitializeEv(%arg0: !cir.ptr<!rec_C3>
+// CHECK:     %0 = cir.alloca !cir.ptr<!rec_C3>, !cir.ptr<!cir.ptr<!rec_C3>>, ["this", init] {alignment = 8 : i64}
+
+// CHECK:     cir.store %arg0, %0 : !cir.ptr<!rec_C3>, !cir.ptr<!cir.ptr<!rec_C3>>
+// CHECK:     %2 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_C3>>, !cir.ptr<!rec_C3>
+// CHECK:     %3 = cir.base_class_addr %2 : !cir.ptr<!rec_C3> nonnull [0] -> !cir.ptr<!rec_C2>
+// CHECK:     %4 = cir.call @_ZN2C210InitializeEv(%3) : (!cir.ptr<!rec_C2>) -> !s32i
+
+void vcall(C1 &c1) {
+  buffy b;
+  enumy e;
+  c1.SetStuff(e, b);
+}
+
+// CHECK: cir.func {{.*}} @_Z5vcallR2C1(%arg0: !cir.ptr<!rec_C1>
+// CHECK:   %0 = cir.alloca !cir.ptr<!rec_C1>, !cir.ptr<!cir.ptr<!rec_C1>>, ["c1", init, const] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !rec_buffy, !cir.ptr<!rec_buffy>, ["b"] {alignment = 8 : i64}
+// CHECK:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["e"] {alignment = 4 : i64}
+// CHECK:   %3 = cir.alloca !rec_buffy, !cir.ptr<!rec_buffy>, ["agg.tmp0"] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!rec_C1>, !cir.ptr<!cir.ptr<!rec_C1>>
+// CHECK:   %4 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_C1>>, !cir.ptr<!rec_C1>
+// CHECK:   %5 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.copy %1 to %3 : !cir.ptr<!rec_buffy>
+// CHECK:   %6 = cir.load{{.*}} %3 : !cir.ptr<!rec_buffy>, !rec_buffy
+// CHECK:   %7 = cir.vtable.get_vptr %4 : !cir.ptr<!rec_C1> -> !cir.ptr<!cir.vptr>
+// CHECK:   %8 = cir.load{{.*}} %7 : !cir.ptr<!cir.vptr>, !cir.vptr
+// CHECK:   %9 = cir.vtable.get_virtual_fn_addr %8[2] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_C1>, !s32i, !rec_buffy) -> !s32i>>>
+// CHECK:   %10 = cir.load align(8) %9 : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_C1>, !s32i, !rec_buffy) -> !s32i>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_C1>, !s32i, !rec_buffy) -> !s32i>>
+// CHECK:   %11 = cir.call %10(%4, %5, %6) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_C1>, !s32i, !rec_buffy) -> !s32i>>, !cir.ptr<!rec_C1>, !s32i, !rec_buffy) -> !s32i
+// CHECK:   cir.return
+// CHECK: }
+
+class A {
+public:
+  int a;
+  virtual void foo() {a++;}
+};
+
+class B : public A {
+public:
+  int b;
+  void foo ()  { static_cast<A>(*this).foo();}
+};
+
+// CHECK: cir.func {{.*}} @_ZN1B3fooEv(%arg0: !cir.ptr<!rec_B>
+// CHECK:   %0 = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %0 : !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>
+// CHECK:   %1 = cir.load{{.*}} deref %0 : !cir.ptr<!cir.ptr<!rec_B>>, !cir.ptr<!rec_B>
+// CHECK:   cir.scope {
+// CHECK:     %2 = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK:     %3 = cir.base_class_addr %1 : !cir.ptr<!rec_B> nonnull [0] -> !cir.ptr<!rec_A>
+
+// Call @A::A(A const&)
+// CHECK:     cir.copy %3 to %2 : !cir.ptr<!rec_A>
+
+// Call @A::foo()
+// CHECK:     cir.call @_ZN1A3fooEv(%2) : (!cir.ptr<!rec_A>) -> ()
+// CHECK:   }
+// CHECK:   cir.return
+// CHECK: }
+
+void t() {
+  B b;
+  b.foo();
+}
+
+struct C : public A {
+  int& ref;
+  C(int& x) : ref(x) {}
+};
+
+// CHECK: cir.func {{.*}} @_Z8test_refv()
+// CHECK: cir.get_member %2[1] {name = "ref"}
+int test_ref() {
+  int x = 42;
+  C c(x);
+  return c.ref;
+}
+
+// Multiple base classes, to test non-zero offsets
+struct Base1 { int a; };
+struct Base2 { int b; };
+struct Derived : Base1, Base2 { int c; };
+void test_multi_base() {
+  Derived d;
+
+  Base2& bref = d; // no null check needed
+  // CHECK: %6 = cir.base_class_addr %0 : !cir.ptr<!rec_Derived> nonnull [4] -> !cir.ptr<!rec_Base2>
+
+  Base2* bptr = &d; // has null pointer check
+  // CHECK: %7 = cir.base_class_addr %0 : !cir.ptr<!rec_Derived> [4] -> !cir.ptr<!rec_Base2>
+
+  int a = d.a;
+  // CHECK: %8 = cir.base_class_addr %0 : !cir.ptr<!rec_Derived> nonnull [0] -> !cir.ptr<!rec_Base1>
+  // CHECK: %9 = cir.get_member %8[0] {name = "a"} : !cir.ptr<!rec_Base1> -> !cir.ptr<!s32i>
+
+  int b = d.b;
+  // CHECK: %11 = cir.base_class_addr %0 : !cir.ptr<!rec_Derived> nonnull [4] -> !cir.ptr<!rec_Base2>
+  // CHECK: %12 = cir.get_member %11[0] {name = "b"} : !cir.ptr<!rec_Base2> -> !cir.ptr<!s32i>
+
+  int c = d.c;
+  // CHECK: %14 = cir.get_member %0[2] {name = "c"} : !cir.ptr<!rec_Derived> -> !cir.ptr<!s32i>
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/dlti.c b/clang/test/CIR/Incubator/CodeGen/dlti.c
new file mode 100644
index 0000000000000..b878196119a08
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dlti.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo() {}
+
+//      CHECK: module @"{{.*}}dlti.c" attributes {
+//  CHECK-DAG: cir.sob = #cir.signed_overflow_behavior<undefined>,
+//  CHECK-DAG: cir.type_size_info =
+//  CHECK-DAG:   #cir.type_size_info<
+//  CHECK-DAG:     char = 8,
+//  CHECK-DAG:     int = {{16|32}},
+//  CHECK-DAG:     size_t = {{32|64}}
+//  CHECK-DAG: >
+//  CHECK-DAG: dlti.dl_spec =
+//  CHECK-DAG:   #dlti.dl_spec<
+//  CHECK-DAG:     i16 = dense<16> : vector<2xi64>,
+//  CHECK-DAG:     i32 = dense<32> : vector<2xi64>,
+//  CHECK-DAG:     i8 = dense<8> : vector<2xi64>,
+//  CHECK-DAG:     i1 = dense<8> : vector<2xi64>,
+//  CHECK-DAG:     !llvm.ptr = dense<64> : vector<4xi64>,
+//  CHECK-DAG:     f80 = dense<128> : vector<2xi64>,
+//  CHECK-DAG:     i128 = dense<128> : vector<2xi64>,
+//  CHECK-DAG:     !llvm.ptr<272> = dense<64> : vector<4xi64>,
+//  CHECK-DAG:     i64 = dense<64> : vector<2xi64>,
+//  CHECK-DAG:     !llvm.ptr<270> = dense<32> : vector<4xi64>,
+//  CHECK-DAG:     !llvm.ptr<271> = dense<32> : vector<4xi64>,
+//  CHECK-DAG:     f128 = dense<128> : vector<2xi64>,
+//  CHECK-DAG:     f16 = dense<16> : vector<2xi64>,
+//  CHECK-DAG:     f64 = dense<64> : vector<2xi64>,
+//  CHECK-DAG:     "dlti.stack_alignment" = 128 : i64
+//  CHECK-DAG:     "dlti.endianness" = "little"
+//  >,
diff --git a/clang/test/CIR/Incubator/CodeGen/dtor-alias.cpp b/clang/test/CIR/Incubator/CodeGen/dtor-alias.cpp
new file mode 100644
index 0000000000000..302a3c02b624e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dtor-alias.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu \
+// RUN:   -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir \
+// RUN:   -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir
+
+namespace {
+struct A {
+  ~A() {}
+};
+
+struct B : public A {};
+}
+
+B x;
+
+// CHECK: cir.call @_ZN12_GLOBAL__N_11AD2Ev({{.*}}) : (!cir.ptr<!rec_28anonymous_namespace293A3AA>) -> ()
diff --git a/clang/test/CIR/Incubator/CodeGen/dtors-scopes.cpp b/clang/test/CIR/Incubator/CodeGen/dtors-scopes.cpp
new file mode 100644
index 0000000000000..21b27d89aadcb
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dtors-scopes.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -std=c++20 -fclangir -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir %s --check-prefix=DTOR_BODY
+
+extern "C" int printf(char const*, ...);
+struct C {
+  C()  { printf("++A\n"); }
+  ~C()  { printf("--A\n"); }
+};
+void dtor1() {
+  {
+    C c;
+  }
+  printf("Done\n");
+}
+
+// CHECK: cir.func {{.*}} @_Z5dtor1v()
+// CHECK:   cir.scope {
+// CHECK:     %4 = cir.alloca !rec_C, !cir.ptr<!rec_C>, ["c", init] {alignment = 1 : i64}
+// CHECK:     cir.call @_ZN1CC2Ev(%4) : (!cir.ptr<!rec_C>) -> ()
+// CHECK:     cir.call @_ZN1CD2Ev(%4) : (!cir.ptr<!rec_C>) -> ()
+// CHECK:   }
+
+// DTOR_BODY: cir.func {{.*}} @_ZN1CD2Ev{{.*}}{
+// DTOR_BODY:   %2 = cir.get_global @printf
+// DTOR_BODY:   %3 = cir.get_global @".str.2"
+// DTOR_BODY:   %4 = cir.cast array_to_ptrdecay %3
+// DTOR_BODY:   %5 = cir.call @printf(%4)
+// DTOR_BODY:   cir.return
+
+// DTOR_BODY: cir.func {{.*}} @_ZN1CD1Ev(%arg0: !cir.ptr<!rec_C>
+
+// DTOR_BODY:   cir.call @_ZN1CD2Ev
+// DTOR_BODY:   cir.return
+// DTOR_BODY: }
diff --git a/clang/test/CIR/Incubator/CodeGen/dtors.cpp b/clang/test/CIR/Incubator/CodeGen/dtors.cpp
new file mode 100644
index 0000000000000..39c00110926dc
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dtors.cpp
@@ -0,0 +1,207 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -mconstructor-aliases -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+enum class EFMode { Always, Verbose };
+
+class PSEvent {
+ public:
+  PSEvent(
+      EFMode m,
+      const char* n);
+  ~PSEvent();
+
+ private:
+  const char* n;
+  EFMode m;
+};
+
+void blue() {
+  PSEvent p(EFMode::Verbose, __FUNCTION__);
+}
+
+class A
+{
+public:
+    A() noexcept {}
+    A(const A&) noexcept = default;
+
+    virtual ~A() noexcept;
+    virtual const char* quack() const noexcept;
+};
+
+class B : public A
+{
+public:
+    virtual ~B() noexcept {}
+};
+
+// Class A
+// CHECK: ![[ClassA:rec_.*]] = !cir.record<class "A" {!cir.vptr} #cir.record.decl.ast>
+
+// Class B
+// CHECK: ![[ClassB:rec_.*]] = !cir.record<class "B" {![[ClassA]]}>
+
+// CHECK: cir.func {{.*}} @_Z4bluev()
+// CHECK:   %0 = cir.alloca !rec_PSEvent, !cir.ptr<!rec_PSEvent>, ["p", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.const #cir.int<1> : !s32i
+// CHECK:   %2 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 5>>
+// CHECK:   %3 = cir.cast array_to_ptrdecay %2 : !cir.ptr<!cir.array<!s8i x 5>> -> !cir.ptr<!s8i>
+// CHECK:   cir.call @_ZN7PSEventC1E6EFModePKc(%0, %1, %3) : (!cir.ptr<!rec_PSEvent>, !s32i, !cir.ptr<!s8i>) -> ()
+// CHECK:   cir.return
+// CHECK: }
+
+struct X {
+  int a;
+  X(int a) : a(a) {}
+  ~X() {}
+};
+
+bool foo(const X &) { return false; }
+bool bar() { return foo(1) || foo(2); }
+
+// CHECK: cir.func {{.*}} @_Z3barv()
+// CHECK:   %[[V0:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["__retval"] {alignment = 1 : i64}
+// CHECK:   cir.scope {
+// CHECK:     %[[V2:.*]] = cir.alloca !rec_X, !cir.ptr<!rec_X>, ["ref.tmp0"] {alignment = 4 : i64}
+// CHECK:     %[[V3:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:     cir.call @_ZN1XC2Ei(%[[V2]], %[[V3]]) : (!cir.ptr<!rec_X>, !s32i) -> ()
+// CHECK:     %[[V4:.*]] = cir.call @_Z3fooRK1X(%[[V2]]) : (!cir.ptr<!rec_X>) -> !cir.bool
+// CHECK:     %[[V5:.*]] = cir.ternary(%[[V4]], true {
+// CHECK:       %[[V6:.*]] = cir.const #true
+// CHECK:       cir.yield %[[V6]] : !cir.bool
+// CHECK:     }, false {
+// CHECK:       %[[V6:.*]] = cir.alloca !rec_X, !cir.ptr<!rec_X>, ["ref.tmp1"] {alignment = 4 : i64}
+// CHECK:       %[[V7:.*]] = cir.const #cir.int<2> : !s32i
+// CHECK:       cir.call @_ZN1XC2Ei(%[[V6]], %[[V7]]) : (!cir.ptr<!rec_X>, !s32i) -> ()
+// CHECK:       %[[V8:.*]] = cir.call @_Z3fooRK1X(%[[V6]]) : (!cir.ptr<!rec_X>) -> !cir.bool
+// CHECK:       %[[V9:.*]] = cir.ternary(%[[V8]], true {
+// CHECK:         %[[V10:.*]] = cir.const #true
+// CHECK:         cir.yield %[[V10]] : !cir.bool
+// CHECK:       }, false {
+// CHECK:         %[[V10:.*]] = cir.const #false
+// CHECK:         cir.yield %[[V10]] : !cir.bool
+// CHECK:       }) : (!cir.bool) -> !cir.bool
+// CHECK:       cir.call @_ZN1XD2Ev(%[[V6]]) : (!cir.ptr<!rec_X>) -> ()
+// CHECK:       cir.yield %[[V9]] : !cir.bool
+// CHECK:     }) : (!cir.bool) -> !cir.bool
+// CHECK:     cir.store{{.*}} %[[V5]], %[[V0]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:     cir.call @_ZN1XD2Ev(%[[V2]]) : (!cir.ptr<!rec_X>) -> ()
+// CHECK:   }
+// CHECK:   %[[V1:.*]] = cir.load{{.*}} %[[V0]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK:   cir.return %[[V1]] : !cir.bool
+// CHECK: }
+
+bool bar2() { return foo(1) && foo(2); }
+
+// CHECK:  cir.func {{.*}} @_Z4bar2v()
+// CHECK:     cir.alloca !rec_X, !cir.ptr<!rec_X>
+// CHECK:       {{.*}} = cir.ternary({{.*}}, true {
+// CHECK:         cir.alloca !rec_X, !cir.ptr<!rec_X>
+// CHECK:         cir.call @_ZN1XD2Ev
+// CHECK:         cir.yield
+// CHECK:       }, false {
+// CHECK:         {{.*}} = cir.const #false
+// CHECK:         cir.yield
+// CHECK:       }) : (!cir.bool) -> !cir.bool
+// CHECK:     cir.call @_ZN1XD2Ev
+
+typedef int I;
+void pseudo_dtor() {
+  I x = 10;
+  x.I::~I();
+}
+// CHECK: cir.func {{.*}} @_Z11pseudo_dtorv()
+// CHECK:   %[[INT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>
+// CHECK:   %[[TEN:.*]] = cir.const #cir.int<10> : !s32i
+// CHECK:   cir.store{{.*}} %[[TEN]], %[[INT]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   cir.return
+
+// @B::~B() #1 definition call into base @A::~A()
+// CHECK:  cir.func {{.*}} @_ZN1BD2Ev{{.*}}{
+// CHECK:    cir.call @_ZN1AD2Ev(
+
+// void foo()
+// CHECK: cir.func {{.*}} @_Z3foov()
+// CHECK:   cir.scope {
+// CHECK:     cir.call @_ZN1BC2Ev(%0) : (!cir.ptr<!rec_B>) -> ()
+// CHECK:     cir.call @_ZN1BD2Ev(%0) : (!cir.ptr<!rec_B>) -> ()
+
+// operator delete(void*) declaration
+// CHECK:   cir.func {{.*}} @_ZdlPvm(!cir.ptr<!void>, !u64i)
+
+// B dtor => @B::~B() #2
+// Calls dtor #1
+// Calls operator delete
+//
+// CHECK:   cir.func {{.*}} @_ZN1BD0Ev(%arg0: !cir.ptr<![[ClassB]]>
+// CHECK:     %0 = cir.alloca !cir.ptr<![[ClassB]]>, !cir.ptr<!cir.ptr<![[ClassB]]>>, ["this", init] {alignment = 8 : i64}
+// CHECK:     cir.store %arg0, %0 : !cir.ptr<![[ClassB]]>, !cir.ptr<!cir.ptr<![[ClassB]]>>
+// CHECK:     %1 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<![[ClassB]]>>, !cir.ptr<![[ClassB]]>
+// CHECK:     cir.call @_ZN1BD2Ev(%1) : (!cir.ptr<![[ClassB]]>) -> ()
+// CHECK:     %2 = cir.cast bitcast %1 : !cir.ptr<![[ClassB]]> -> !cir.ptr<!void>
+// CHECK:     cir.call @_ZdlPvm(%2, %3) : (!cir.ptr<!void>, !u64i) -> ()
+// CHECK:     cir.return
+// CHECK:   }
+
+void foo() { B(); }
+
+class A2 {
+public:
+  ~A2();
+};
+
+struct B2 {
+  template <typename> using C = A2;
+};
+
+struct E {
+  typedef B2::C<int> D;
+};
+
+struct F {
+  F(long, A2);
+};
+
+class G : F {
+public:
+  A2 h;
+  G(long) : F(i(), h) {}
+  long i() { k(E::D()); };
+  long k(E::D);
+};
+
+int j;
+void m() { G l(j); }
+
+// CHECK: cir.func {{.*}} @_ZN1G1kE2A2(!cir.ptr<!rec_G>, !rec_A2) -> !s64i
+// CHECK: cir.func {{.*}} @_ZN1G1iEv(%arg0: !cir.ptr<!rec_G>
+// CHECK:   %[[V0:.*]] = cir.alloca !cir.ptr<!rec_G>, !cir.ptr<!cir.ptr<!rec_G>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   %[[V1:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["__retval"] {alignment = 8 : i64}
+// CHECK:   cir.store %arg0, %[[V0]] : !cir.ptr<!rec_G>, !cir.ptr<!cir.ptr<!rec_G>>
+// CHECK:   %[[V2:.*]] = cir.load{{.*}} %[[V0]] : !cir.ptr<!cir.ptr<!rec_G>>, !cir.ptr<!rec_G>
+// Trivial default constructor call is lowered away.
+// CHECK:   %[[V3:.*]] = cir.scope {
+// CHECK:     %[[V4:.*]] = cir.alloca !rec_A2, !cir.ptr<!rec_A2>, ["agg.tmp0"] {alignment = 1 : i64}
+// CHECK:     %[[V5:.*]] = cir.load{{.*}} %[[V4]] : !cir.ptr<!rec_A2>, !rec_A2
+// CHECK:     %[[V6:.*]] = cir.call @_ZN1G1kE2A2(%[[V2]], %[[V5]]) : (!cir.ptr<!rec_G>, !rec_A2) -> !s64i
+// CHECK:     cir.call @_ZN2A2D1Ev(%[[V4]]) : (!cir.ptr<!rec_A2>) -> ()
+// CHECK:     cir.yield %[[V6]] : !s64i
+// CHECK:   } : !s64i
+// CHECK:   cir.trap
+// CHECK: }
+
+// LLVM-LABEL: define {{.*}} @_ZN1G1iEv
+// LLVM:         alloca %class.A2
+// LLVM-NOT:     call {{.*}} @_ZN2A2C2Ev
+// LLVM:         call {{.*}} @_ZN1G1kE2A2
+// LLVM:         call {{.*}} @_ZN2A2D1Ev
+
+// OGCG-LABEL: define {{.*}} @_ZN1G1iEv
+// OGCG:         %agg.tmp = alloca %class.A2
+// OGCG-NOT:     call {{.*}} @_ZN2A2C2Ev
+// OGCG:         call {{.*}} @_ZN1G1kE2A2
+// OGCG:         call {{.*}} @_ZN2A2D1Ev
diff --git a/clang/test/CIR/Incubator/CodeGen/dumb-record.cpp b/clang/test/CIR/Incubator/CodeGen/dumb-record.cpp
new file mode 100644
index 0000000000000..6920ccc0cfb0a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dumb-record.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fdump-record-layouts %s -o - | FileCheck %s
+
+struct SimpleStruct {
+  int a;
+  float b;
+} simple;
+// CHECK: Layout: <CIRecordLayout
+// CHECK: CIR Type:!cir.record<struct "SimpleStruct" {!cir.int<s, 32>, !cir.float} #cir.record.decl.ast>
+// CHECK: NonVirtualBaseCIRType:!cir.record<struct "SimpleStruct" {!cir.int<s, 32>, !cir.float} #cir.record.decl.ast>
+// CHECK: IsZeroInitializable:1
+// CHECK:   BitFields:[
+// CHECK: ]>
+
+struct Empty {
+} empty;
+
+// CHECK: Layout: <CIRecordLayout
+// CHECK:  CIR Type:!cir.record<struct "Empty" padded {!cir.int<u, 8>} #cir.record.decl.ast>
+// CHECK:  NonVirtualBaseCIRType:!cir.record<struct "Empty" padded {!cir.int<u, 8>} #cir.record.decl.ast>
+// CHECK:  IsZeroInitializable:1
+// CHECK:  BitFields:[
+// CHECK:  ]>
+
+struct BitfieldsInOrder {
+  char a;
+  unsigned bit: 8;
+  unsigned should : 20;
+  unsigned have: 3;
+  unsigned order: 1;
+} bitfield_order;
+
+// CHECK: Layout: <CIRecordLayout
+// CHECK:  CIR Type:!cir.record<struct "BitfieldsInOrder" {!cir.int<s, 8>, !cir.int<u, 8>, !cir.int<u, 32>} #cir.record.decl.ast>
+// CHECK:  NonVirtualBaseCIRType:!cir.record<struct "BitfieldsInOrder" {!cir.int<s, 8>, !cir.int<u, 8>, !cir.int<u, 32>} #cir.record.decl.ast>
+// CHECK:  IsZeroInitializable:1
+// CHECK:  BitFields:[
+// CHECK-NEXT:   <CIRBitFieldInfo name:bit offset:0 size:8 isSigned:0 storageSize:8 storageOffset:1 volatileOffset:0 volatileStorageSize:0 volatileStorageOffset:0>
+// CHECK-NEXT:   <CIRBitFieldInfo name:should offset:0 size:20 isSigned:0 storageSize:32 storageOffset:4 volatileOffset:0 volatileStorageSize:0 volatileStorageOffset:0>
+// CHECK-NEXT:   <CIRBitFieldInfo name:have offset:20 size:3 isSigned:0 storageSize:32 storageOffset:4 volatileOffset:0 volatileStorageSize:0 volatileStorageOffset:0>
+// CHECK-NEXT:   <CIRBitFieldInfo name:order offset:23 size:1 isSigned:0 storageSize:32 storageOffset:4 volatileOffset:0 volatileStorageSize:0 volatileStorageOffset:0>
+// CHECK:]>
+
+struct Inner {
+  int x;
+} in;
+
+//CHECK: Layout: <CIRecordLayout
+//CHECK:  CIR Type:!cir.record<struct "Inner" {!cir.int<s, 32>} #cir.record.decl.ast>
+//CHECK:  NonVirtualBaseCIRType:!cir.record<struct "Inner" {!cir.int<s, 32>} #cir.record.decl.ast>
+//CHECK:  IsZeroInitializable:1
+//CHECK:  BitFields:[
+//CHECK:  ]>
+
+struct Outer {
+  Inner i;
+  int y = 6;
+} ou;
+
+//CHECK: Layout: <CIRecordLayout
+//CHECK:  CIR Type:!cir.record<struct "Outer" {!cir.record<struct "Inner" {!cir.int<s, 32>} #cir.record.decl.ast>, !cir.int<s, 32>} #cir.record.decl.ast>
+//CHECK:  NonVirtualBaseCIRType:!cir.record<struct "Outer" {!cir.record<struct "Inner" {!cir.int<s, 32>} #cir.record.decl.ast>, !cir.int<s, 32>} #cir.record.decl.ast>
+//CHECK:  IsZeroInitializable:1
+//CHECK:  BitFields:[
+//CHECK:  ]>
diff --git a/clang/test/CIR/Incubator/CodeGen/dynamic-alloca-with-address-space.c b/clang/test/CIR/Incubator/CodeGen/dynamic-alloca-with-address-space.c
new file mode 100644
index 0000000000000..b8697cd50b465
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dynamic-alloca-with-address-space.c
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -DOCL12 -x cl -std=cl1.2 \
+// RUN:   -fclangir -emit-llvm %s -o - | FileCheck %s --check-prefix=CIR-CL12
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x cl -std=cl2.0 \
+// RUN:   -fclangir -emit-llvm %s -o - | FileCheck %s --check-prefix=CIR-CL20
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -DOCL12 -x cl -std=cl1.2 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefix=OGCG-CL12
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x cl -std=cl2.0 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s --check-prefix=OGCG-CL20
+
+
+#if defined(OCL12)
+#define CAST (char *)(unsigned long)
+#else
+#define CAST (char *)
+#endif
+
+void allocas(unsigned long n) {
+    char *a = CAST __builtin_alloca(n);
+    char *uninitialized_a = CAST __builtin_alloca_uninitialized(n);
+}
+
+// CIR-LABEL: cir.func {{.*}} @allocas
+// CIR:         %[[ALLOCA1:.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, {{.*}} ["bi_alloca"]
+// CIR:         cir.cast address_space %[[ALLOCA1]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+// CIR:         %[[ALLOCA2:.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, {{.*}} ["bi_alloca"]
+// CIR:         cir.cast address_space %[[ALLOCA2]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+
+// LLVM-LABEL: define {{.*}} void @allocas(i64 %{{.*}})
+// LLVM:         %[[BI_ALLOCA1:.*]] = alloca i8, i64 %{{.*}}, align 8, addrspace(5)
+// LLVM:         addrspacecast ptr addrspace(5) %[[BI_ALLOCA1]] to ptr
+// LLVM:         %[[BI_ALLOCA2:.*]] = alloca i8, i64 %{{.*}}, align 8, addrspace(5)
+// LLVM:         addrspacecast ptr addrspace(5) %[[BI_ALLOCA2]] to ptr
+
+// OGCG-LABEL: define {{.*}} void @allocas(i64 {{.*}} %n)
+// OGCG:         %[[BI_ALLOCA1:.*]] = alloca i8, i64 %{{.*}}, align 8, addrspace(5)
+// OGCG:         addrspacecast ptr addrspace(5) %[[BI_ALLOCA1]] to ptr
+// OGCG:         %[[BI_ALLOCA2:.*]] = alloca i8, i64 %{{.*}}, align 8, addrspace(5)
+// OGCG:         addrspacecast ptr addrspace(5) %[[BI_ALLOCA2]] to ptr
+
+// CIR-CL12-NOT: addrspacecast
+// CIR-CL20-NOT: addrspacecast
+// OGCG-CL12-NOT: addrspacecast
+// OGCG-CL20-NOT: addrspacecast
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/dynamic-cast-address-space.cpp b/clang/test/CIR/Incubator/CodeGen/dynamic-cast-address-space.cpp
new file mode 100644
index 0000000000000..417575c60d696
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dynamic-cast-address-space.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s --check-prefix=CIR-BEFORE
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -std=c++20 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -std=c++20 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -std=c++20 -emit-llvm %s -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=OGCG
+
+struct Base {
+  virtual ~Base();
+};
+
+struct Derived : Base {};
+
+// Test dynamic_cast to void* with address space attribute.
+// The result pointer should preserve the address space of the source pointer.
+
+// CIR-BEFORE: cir.func {{.*}} @_Z30ptr_cast_to_complete_addrspaceP4Base
+// CIR-BEFORE:   %{{.+}} = cir.dyn_cast ptr %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+// CIR-BEFORE: }
+
+// CIR: cir.func {{.*}} @_Z30ptr_cast_to_complete_addrspaceP4Base
+// CIR:   %[[#SRC:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_Base>>, !cir.ptr<!rec_Base>
+// CIR:   %[[#SRC_IS_NOT_NULL:]] = cir.cast ptr_to_bool %[[#SRC]] : !cir.ptr<!rec_Base> -> !cir.bool
+// CIR:   %{{.+}} = cir.ternary(%[[#SRC_IS_NOT_NULL]], true {
+// CIR:     %[[#SRC_BYTES_PTR:]] = cir.cast bitcast %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!u8i>
+// CIR:     %[[#DST_BYTES_PTR:]] = cir.ptr_stride %[[#SRC_BYTES_PTR]], %{{.+}} : (!cir.ptr<!u8i>, !s64i) -> !cir.ptr<!u8i>
+// CIR:     %[[#CASTED_PTR:]] = cir.cast bitcast %[[#DST_BYTES_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+// CIR:     cir.yield %[[#CASTED_PTR]] : !cir.ptr<!void>
+// CIR:   }, false {
+// CIR:     %[[#NULL_PTR:]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CIR:     cir.yield %[[#NULL_PTR]] : !cir.ptr<!void>
+// CIR:   }) : (!cir.bool) -> !cir.ptr<!void>
+// CIR: }
+
+// LLVM: define dso_local ptr @_Z30ptr_cast_to_complete_addrspaceP4Base
+// LLVM-SAME: (ptr %{{.+}})
+// LLVM-DAG: alloca ptr, {{.*}}addrspace(5)
+// LLVM-DAG: %[[#RETVAL_ALLOCA:]] = alloca ptr, {{.*}}addrspace(5)
+// LLVM-DAG: %[[#RETVAL_ASCAST:]] = addrspacecast ptr addrspace(5) %[[#RETVAL_ALLOCA]] to ptr
+// LLVM-DAG: %[[#PTR_ASCAST:]] = addrspacecast ptr addrspace(5) %{{.+}} to ptr
+// LLVM:   store ptr %{{.+}}, ptr %[[#PTR_ASCAST]], align 8
+// LLVM:   %[[#SRC:]] = load ptr, ptr %[[#PTR_ASCAST]], align 8
+// LLVM:   %[[#SRC_IS_NOT_NULL:]] = icmp ne ptr %[[#SRC]], null
+// LLVM:   br i1 %[[#SRC_IS_NOT_NULL]], label %[[#TRUE_BLOCK:]], label %[[#FALSE_BLOCK:]]
+// LLVM: [[#TRUE_BLOCK]]:
+// LLVM:   %[[#VTABLE:]] = load ptr, ptr %[[#SRC]], align 8
+// LLVM:   %[[#OFFSET_PTR:]] = getelementptr i64, ptr %[[#VTABLE]], i64 -2
+// LLVM:   %[[#OFFSET:]] = load i64, ptr %[[#OFFSET_PTR]], align 8
+// LLVM:   %[[#RESULT:]] = getelementptr i8, ptr %[[#SRC]], i64 %[[#OFFSET]]
+// LLVM:   br label %[[#MERGE:]]
+// LLVM: [[#FALSE_BLOCK]]:
+// LLVM:   br label %[[#MERGE]]
+// LLVM: [[#MERGE]]:
+// LLVM:   %[[#PHI:]] = phi ptr [ null, %[[#FALSE_BLOCK]] ], [ %[[#RESULT]], %[[#TRUE_BLOCK]] ]
+// LLVM:   store ptr %[[#PHI]], ptr %[[#RETVAL_ASCAST]], align 8
+// LLVM:   %[[#RET:]] = load ptr, ptr %[[#RETVAL_ASCAST]], align 8
+// LLVM:   ret ptr %[[#RET]]
+// LLVM: }
+
+// OGCG: define dso_local noundef ptr @_Z30ptr_cast_to_complete_addrspaceP4Base
+// OGCG-SAME: (ptr noundef %{{.+}})
+// OGCG:   %[[RETVAL_ASCAST:[a-z0-9.]+]] = addrspacecast ptr addrspace(5) %{{.+}} to ptr
+// OGCG:   %[[PTR_ASCAST:[a-z0-9.]+]] = addrspacecast ptr addrspace(5) %{{.+}} to ptr
+// OGCG:   store ptr %{{.+}}, ptr %[[PTR_ASCAST]], align 8
+// OGCG:   %[[SRC:[0-9]+]] = load ptr, ptr %[[PTR_ASCAST]], align 8
+// OGCG:   icmp eq ptr %[[SRC]], null
+// OGCG: dynamic_cast.notnull:
+// OGCG:   %[[VTABLE:[a-z0-9]+]] = load ptr, ptr %[[SRC]], align 8
+// OGCG:   getelementptr inbounds i64, ptr %[[VTABLE]], i64 -2
+// OGCG:   %[[OFFSET:[a-z0-9.]+]] = load i64, ptr %{{.+}}, align 8
+// OGCG:   %[[RESULT:[0-9]+]] = getelementptr inbounds i8, ptr %[[SRC]], i64 %[[OFFSET]]
+// OGCG: dynamic_cast.end:
+// OGCG:   %[[PHI:[0-9]+]] = phi ptr [ %[[RESULT]], %dynamic_cast.notnull ], [ null, %dynamic_cast.null ]
+// OGCG:   ret ptr %[[PHI]]
+// OGCG: }
+void *ptr_cast_to_complete_addrspace(Base *ptr) {
+  return dynamic_cast<void *>(ptr);
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/dynamic-cast-exact.cpp b/clang/test/CIR/Incubator/CodeGen/dynamic-cast-exact.cpp
new file mode 100644
index 0000000000000..8bb7d1041e161
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dynamic-cast-exact.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O1 -fclangir -clangir-disable-passes -emit-cir -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O1 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -o %t-cir.ll %s
+// RUN: FileCheck --input-file=%t-cir.ll --check-prefix=LLVM %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O1 -emit-llvm -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll --check-prefix=OGCG %s
+
+struct Base1 {
+  virtual ~Base1();
+};
+
+struct Base2 {
+  virtual ~Base2();
+};
+
+struct Derived final : Base1 {};
+
+Derived *ptr_cast(Base1 *ptr) {
+  return dynamic_cast<Derived *>(ptr);
+  //      CHECK: %[[#SRC:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_Base1>>, !cir.ptr<!rec_Base1>
+  // CHECK-NEXT: %[[#SRC_IS_NONNULL:]] = cir.cast ptr_to_bool %[[#SRC]] : !cir.ptr<!rec_Base1> -> !cir.bool
+  // CHECK-NEXT: %[[#SRC_IS_NULL:]] = cir.unary(not, %[[#SRC_IS_NONNULL]]) : !cir.bool, !cir.bool
+  // CHECK-NEXT: %[[#RESULT:]] = cir.ternary(%4, true {
+  // CHECK-NEXT:   %[[#NULL_DEST_PTR:]] = cir.const #cir.ptr<null> : !cir.ptr<!rec_Derived>
+  // CHECK-NEXT:   cir.yield %[[#NULL_DEST_PTR]] : !cir.ptr<!rec_Derived>
+  // CHECK-NEXT: }, false {
+  // CHECK-NEXT:   %[[#EXPECTED_VPTR:]] = cir.vtable.address_point(@_ZTV7Derived, address_point = <index = 0, offset = 2>) : !cir.vptr
+  // CHECK-NEXT:   %[[#SRC_VPTR_PTR:]] = cir.cast bitcast %[[#SRC]] : !cir.ptr<!rec_Base1> -> !cir.ptr<!cir.vptr>
+  // CHECK-NEXT:   %[[#SRC_VPTR:]] = cir.load{{.*}} %[[#SRC_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+  // CHECK-NEXT:   %[[#SUCCESS:]] = cir.cmp(eq, %[[#SRC_VPTR]], %[[#EXPECTED_VPTR]]) : !cir.vptr, !cir.bool
+  // CHECK-NEXT:   %[[#EXACT_RESULT:]] = cir.ternary(%[[#SUCCESS]], true {
+  // CHECK-NEXT:     %[[#RES:]] = cir.cast bitcast %[[#SRC]] : !cir.ptr<!rec_Base1> -> !cir.ptr<!rec_Derived>
+  // CHECK-NEXT:     cir.yield %[[#RES]] : !cir.ptr<!rec_Derived>
+  // CHECK-NEXT:   }, false {
+  // CHECK-NEXT:     %[[#NULL:]] = cir.const #cir.ptr<null> : !cir.ptr<!rec_Derived>
+  // CHECK-NEXT:     cir.yield %[[#NULL]] : !cir.ptr<!rec_Derived>
+  // CHECK-NEXT:   }) : (!cir.bool) -> !cir.ptr<!rec_Derived>
+  // CHECK-NEXT:   cir.yield %[[#EXACT_RESULT]] : !cir.ptr<!rec_Derived>
+  // CHECK-NEXT: }) : (!cir.bool) -> !cir.ptr<!rec_Derived>
+}
+
+//      LLVM: define dso_local ptr @_Z8ptr_castP5Base1(ptr {{.*}} %[[#SRC:]])
+// LLVM-NEXT:   %[[SRC_IS_NULL:.*]] = icmp eq ptr %[[#SRC]], null
+// LLVM-NEXT:   br i1 %[[SRC_IS_NULL]], label %[[#LABEL_END:]], label %[[#LABEL_NONNULL:]]
+//      LLVM: [[#LABEL_NONNULL]]
+// LLVM-NEXT:   %[[#VPTR:]] = load ptr, ptr %[[#SRC]], align 8
+// LLVM-NEXT:   %[[#SUCCESS:]] = icmp eq ptr %[[#VPTR]], getelementptr inbounds nuw (i8, ptr @_ZTV7Derived, i64 16)
+// LLVM-NEXT:   %[[EXACT_RESULT:.*]] = select i1 %[[#SUCCESS]], ptr %[[#SRC]], ptr null
+// LLVM-NEXT:   br label %[[#LABEL_END]]
+//      LLVM: [[#LABEL_END]]
+// LLVM-NEXT:   %[[#RESULT:]] = phi ptr [ %[[EXACT_RESULT]], %[[#LABEL_NONNULL]] ], [ null, %{{.*}} ]
+// LLVM-NEXT:   ret ptr %[[#RESULT]]
+// LLVM-NEXT: }
+
+//      OGCG: define{{.*}} ptr @_Z8ptr_castP5Base1(ptr {{.*}} %[[SRC:.*]])
+// OGCG-NEXT: entry:
+// OGCG-NEXT:   %[[NULL_CHECK:.*]] = icmp eq ptr %[[SRC]], null
+// OGCG-NEXT:   br i1 %[[NULL_CHECK]], label %[[LABEL_NULL:.*]], label %[[LABEL_NOTNULL:.*]]
+//      OGCG: [[LABEL_NOTNULL]]:
+// OGCG-NEXT:   %[[VTABLE:.*]] = load ptr, ptr %[[SRC]], align 8
+// OGCG-NEXT:   %[[VTABLE_CHECK:.*]] = icmp eq ptr %[[VTABLE]], getelementptr inbounds {{.*}} (i8, ptr @_ZTV7Derived, i64 16)
+// OGCG-NEXT:   br i1 %[[VTABLE_CHECK]], label %[[LABEL_END:.*]], label %[[LABEL_NULL]]
+//      OGCG: [[LABEL_NULL]]:
+// OGCG-NEXT:   br label %[[LABEL_END]]
+//      OGCG: [[LABEL_END]]:
+// OGCG-NEXT:   %[[RESULT:.*]] = phi ptr [ %[[SRC]], %[[LABEL_NOTNULL]] ], [ null, %[[LABEL_NULL]] ]
+// OGCG-NEXT:   ret ptr %[[RESULT]]
+// OGCG-NEXT: }
+
+Derived &ref_cast(Base1 &ref) {
+  return dynamic_cast<Derived &>(ref);
+  //      CHECK: %[[#SRC:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_Base1>>, !cir.ptr<!rec_Base1>
+  // CHECK-NEXT: %[[#EXPECTED_VPTR:]] = cir.vtable.address_point(@_ZTV7Derived, address_point = <index = 0, offset = 2>) : !cir.vptr
+  // CHECK-NEXT: %[[#SRC_VPTR_PTR:]] = cir.cast bitcast %[[#SRC]] : !cir.ptr<!rec_Base1> -> !cir.ptr<!cir.vptr>
+  // CHECK-NEXT: %[[#SRC_VPTR:]] = cir.load{{.*}} %[[#SRC_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+  // CHECK-NEXT: %[[#SUCCESS:]] = cir.cmp(eq, %[[#SRC_VPTR]], %[[#EXPECTED_VPTR]]) : !cir.vptr, !cir.bool
+  // CHECK-NEXT: %[[#FAILED:]] = cir.unary(not, %[[#SUCCESS]]) : !cir.bool, !cir.bool
+  // CHECK-NEXT: cir.if %[[#FAILED]] {
+  // CHECK-NEXT:   cir.call @__cxa_bad_cast() : () -> ()
+  // CHECK-NEXT:   cir.unreachable
+  // CHECK-NEXT: }
+  // CHECK-NEXT: %{{.+}} = cir.cast bitcast %[[#SRC]] : !cir.ptr<!rec_Base1> -> !cir.ptr<!rec_Derived>
+}
+
+//      LLVM: define dso_local noundef ptr @_Z8ref_castR5Base1(ptr readonly returned captures(ret: address, provenance) %[[#SRC:]])
+// LLVM-NEXT:   %[[#VPTR:]] = load ptr, ptr %[[#SRC]], align 8
+// LLVM-NEXT:   %[[OK:.+]] = icmp eq ptr %[[#VPTR]], getelementptr inbounds nuw (i8, ptr @_ZTV7Derived, i64 16)
+// LLVM-NEXT:   br i1 %[[OK]], label %[[#LABEL_OK:]], label %[[#LABEL_FAIL:]]
+//      LLVM: [[#LABEL_FAIL]]:
+// LLVM-NEXT:   tail call void @__cxa_bad_cast()
+// LLVM-NEXT:   unreachable
+//      LLVM: [[#LABEL_OK]]:
+// LLVM-NEXT:   ret ptr %[[#SRC]]
+// LLVM-NEXT: }
+
+//      OGCG: define{{.*}} ptr @_Z8ref_castR5Base1(ptr {{.*}} %[[REF:.*]])
+// OGCG-NEXT: entry:
+// OGCG-NEXT:   %[[VTABLE:.*]] = load ptr, ptr %[[REF]], align 8
+// OGCG-NEXT:   %[[VTABLE_CHECK:.*]] = icmp eq ptr %[[VTABLE]], getelementptr inbounds {{.*}} (i8, ptr @_ZTV7Derived, i64 16)
+// OGCG-NEXT:   br i1 %[[VTABLE_CHECK]], label %[[LABEL_END:.*]], label %[[LABEL_NULL:.*]]
+//      OGCG: [[LABEL_NULL]]:
+// OGCG-NEXT:   {{.*}}call void @__cxa_bad_cast()
+// OGCG-NEXT:   unreachable
+//      OGCG: [[LABEL_END]]:
+// OGCG-NEXT:   ret ptr %[[REF]]
+// OGCG-NEXT: }
+
+Derived *ptr_cast_always_fail(Base2 *ptr) {
+  return dynamic_cast<Derived *>(ptr);
+  //      CHECK: %{{.+}} = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_Base2>>, !cir.ptr<!rec_Base2>
+  // CHECK-NEXT: %[[#RESULT:]] = cir.const #cir.ptr<null> : !cir.ptr<!rec_Derived>
+  // CHECK-NEXT: cir.store{{.*}} %[[#RESULT]], %{{.+}} : !cir.ptr<!rec_Derived>, !cir.ptr<!cir.ptr<!rec_Derived>>
+}
+
+//      LLVM: define dso_local noalias noundef ptr @_Z20ptr_cast_always_failP5Base2(ptr readnone captures(none) %{{.+}})
+// LLVM-NEXT:   ret ptr null
+// LLVM-NEXT: }
+
+Derived &ref_cast_always_fail(Base2 &ref) {
+  return dynamic_cast<Derived &>(ref);
+  //      CHECK: %{{.+}} = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_Base2>>, !cir.ptr<!rec_Base2>
+  // CHECK-NEXT: %{{.+}} = cir.const #cir.ptr<null> : !cir.ptr<!rec_Derived>
+  // CHECK-NEXT: cir.call @__cxa_bad_cast() : () -> ()
+  // CHECK-NEXT: cir.unreachable
+}
+
+//      LLVM: define dso_local noalias noundef nonnull ptr @_Z20ref_cast_always_failR5Base2(ptr  readnone captures(none) %{{.+}})
+// LLVM-NEXT:   tail call void @__cxa_bad_cast()
+// LLVM-NEXT:   unreachable
+// LLVM-NEXT: }
diff --git a/clang/test/CIR/Incubator/CodeGen/dynamic-cast-relative-layout.cpp b/clang/test/CIR/Incubator/CodeGen/dynamic-cast-relative-layout.cpp
new file mode 100644
index 0000000000000..be5c5bf549112
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dynamic-cast-relative-layout.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fexperimental-relative-c++-abi-vtables -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fexperimental-relative-c++-abi-vtables -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+
+struct Base {
+  virtual ~Base();
+};
+
+// BEFORE: !rec_Base = !cir.record<struct "Base"
+
+void *ptr_cast_to_complete(Base *ptr) {
+  return dynamic_cast<void *>(ptr);
+}
+
+// BEFORE: cir.func {{.*}} @_Z20ptr_cast_to_completeP4Base
+// BEFORE:   %{{.+}} = cir.dyn_cast ptr relative_layout %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+// BEFORE: }
+
+//      AFTER: cir.func {{.*}} @_Z20ptr_cast_to_completeP4Base
+//      AFTER:   %[[#SRC:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_Base>>, !cir.ptr<!rec_Base>
+// AFTER-NEXT:   %[[#SRC_IS_NOT_NULL:]] = cir.cast ptr_to_bool %[[#SRC]] : !cir.ptr<!rec_Base> -> !cir.bool
+// AFTER-NEXT:   %{{.+}} = cir.ternary(%[[#SRC_IS_NOT_NULL]], true {
+// AFTER-NEXT:     %[[#VPTR_PTR:]] = cir.vtable.get_vptr %[[#SRC:]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr>
+// AFTER-NEXT:     %[[#VPTR:]] = cir.load %[[#VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// AFTER-NEXT:     %[[#ELEM_PTR:]] = cir.cast bitcast %[[#VPTR:]] : !cir.vptr -> !cir.ptr<!s32i>
+// AFTER-NEXT:     %[[#MINUS_TWO:]] = cir.const #cir.int<-2> : !s64i
+// AFTER-NEXT:     %[[#OFFSET_TO_TOP_PTR:]] = cir.ptr_stride %[[#ELEM_PTR]], %[[#MINUS_TWO:]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// AFTER-NEXT:     %[[#OFFSET_TO_TOP:]] = cir.load align(4) %[[#OFFSET_TO_TOP_PTR]] : !cir.ptr<!s32i>, !s32i
+// AFTER-NEXT:     %[[#SRC_BYTES_PTR:]] = cir.cast bitcast %[[#SRC]] : !cir.ptr<!rec_Base> -> !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#DST_BYTES_PTR:]] = cir.ptr_stride %[[#SRC_BYTES_PTR]], %[[#OFFSET_TO_TOP]] : (!cir.ptr<!u8i>, !s32i) -> !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#DST:]] = cir.cast bitcast %[[#DST_BYTES_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+// AFTER-NEXT:     cir.yield %[[#DST]] : !cir.ptr<!void>
+// AFTER-NEXT:   }, false {
+// AFTER-NEXT:     %[[#NULL:]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// AFTER-NEXT:     cir.yield %[[#NULL]] : !cir.ptr<!void>
+// AFTER-NEXT:   }) : (!cir.bool) -> !cir.ptr<!void>
+//      AFTER: }
diff --git a/clang/test/CIR/Incubator/CodeGen/dynamic-cast.cpp b/clang/test/CIR/Incubator/CodeGen/dynamic-cast.cpp
new file mode 100644
index 0000000000000..dd2e7eacd7dd9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dynamic-cast.cpp
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2> %t.before.log
+// RUN: FileCheck %s --input-file=%t.before.log -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2> %t.after.log
+// RUN: FileCheck %s --input-file=%t.after.log -check-prefix=AFTER
+
+struct Base {
+  virtual ~Base();
+};
+
+struct Derived : Base {};
+
+// BEFORE-DAG: #dyn_cast_info__ZTI4Base__ZTI7Derived = #cir.dyn_cast_info<src_rtti = #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>, dest_rtti = #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>, runtime_func = @__dynamic_cast, bad_cast_func = @__cxa_bad_cast, offset_hint = #cir.int<0> : !s64i>
+// BEFORE-DAG: !rec_Base = !cir.record
+// BEFORE-DAG: !rec_Derived = !cir.record
+
+Derived *ptr_cast(Base *b) {
+  return dynamic_cast<Derived *>(b);
+}
+
+// BEFORE: cir.func {{.*}} @_Z8ptr_castP4Base
+// BEFORE:   %{{.+}} = cir.dyn_cast ptr %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!rec_Derived> #dyn_cast_info__ZTI4Base__ZTI7Derived
+// BEFORE: }
+
+//      AFTER: cir.func {{.*}} @_Z8ptr_castP4Base
+//      AFTER:   %[[#SRC:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_Base>>, !cir.ptr<!rec_Base>
+// AFTER-NEXT:   %[[#SRC_IS_NOT_NULL:]] = cir.cast ptr_to_bool %[[#SRC]] : !cir.ptr<!rec_Base> -> !cir.bool
+// AFTER-NEXT:   %{{.+}} = cir.ternary(%[[#SRC_IS_NOT_NULL]], true {
+// AFTER-NEXT:     %[[#SRC_VOID_PTR:]] = cir.cast bitcast %[[#SRC]] : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+// AFTER-NEXT:     %[[#BASE_RTTI:]] = cir.const #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#DERIVED_RTTI:]] = cir.const #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#HINT:]] = cir.const #cir.int<0> : !s64i
+// AFTER-NEXT:     %[[#RT_CALL_RET:]] = cir.call @__dynamic_cast(%[[#SRC_VOID_PTR]], %[[#BASE_RTTI]], %[[#DERIVED_RTTI]], %[[#HINT]]) : (!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void>
+// AFTER-NEXT:     %[[#CASTED:]] = cir.cast bitcast %[[#RT_CALL_RET]] : !cir.ptr<!void> -> !cir.ptr<!rec_Derived>
+// AFTER-NEXT:     cir.yield %[[#CASTED]] : !cir.ptr<!rec_Derived>
+// AFTER-NEXT:   }, false {
+// AFTER-NEXT:     %[[#NULL_PTR:]] = cir.const #cir.ptr<null> : !cir.ptr<!rec_Derived>
+// AFTER-NEXT:     cir.yield %[[#NULL_PTR]] : !cir.ptr<!rec_Derived>
+// AFTER-NEXT:   }) : (!cir.bool) -> !cir.ptr<!rec_Derived>
+//      AFTER: }
+
+Derived &ref_cast(Base &b) {
+  return dynamic_cast<Derived &>(b);
+}
+
+// BEFORE: cir.func {{.*}} @_Z8ref_castR4Base
+// BEFORE:   %{{.+}} = cir.dyn_cast ref %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!rec_Derived> #dyn_cast_info__ZTI4Base__ZTI7Derived
+// BEFORE: }
+
+//      AFTER: cir.func {{.*}} @_Z8ref_castR4Base
+//      AFTER:   %[[#SRC_VOID_PTR:]] = cir.cast bitcast %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+// AFTER-NEXT:   %[[#SRC_RTTI:]] = cir.const #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>
+// AFTER-NEXT:   %[[#DEST_RTTI:]] = cir.const #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>
+// AFTER-NEXT:   %[[#OFFSET_HINT:]] = cir.const #cir.int<0> : !s64i
+// AFTER-NEXT:   %[[#CASTED_PTR:]] = cir.call @__dynamic_cast(%[[#SRC_VOID_PTR]], %[[#SRC_RTTI]], %[[#DEST_RTTI]], %[[#OFFSET_HINT]]) : (!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void>
+// AFTER-NEXT:   %[[#CASTED_PTR_IS_NOT_NULL:]] = cir.cast ptr_to_bool %[[#CASTED_PTR]] : !cir.ptr<!void> -> !cir.bool
+// AFTER-NEXT:   %[[#CASTED_PTR_IS_NULL:]] = cir.unary(not, %[[#CASTED_PTR_IS_NOT_NULL]]) : !cir.bool, !cir.bool
+// AFTER-NEXT:   cir.if %[[#CASTED_PTR_IS_NULL]] {
+// AFTER-NEXT:     cir.call @__cxa_bad_cast() : () -> ()
+// AFTER-NEXT:     cir.unreachable
+// AFTER-NEXT:   }
+// AFTER-NEXT:   %{{.+}} = cir.cast bitcast %[[#CASTED_PTR]] : !cir.ptr<!void> -> !cir.ptr<!rec_Derived>
+//      AFTER: }
+
+void *ptr_cast_to_complete(Base *ptr) {
+  return dynamic_cast<void *>(ptr);
+}
+
+// BEFORE: cir.func {{.*}} @_Z20ptr_cast_to_completeP4Base
+// BEFORE:   %{{.+}} = cir.dyn_cast ptr %{{.+}} : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+// BEFORE: }
+
+//      AFTER: cir.func {{.*}} @_Z20ptr_cast_to_completeP4Base
+//      AFTER:   %[[#SRC:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!rec_Base>>, !cir.ptr<!rec_Base>
+// AFTER-NEXT:   %[[#SRC_IS_NOT_NULL:]] = cir.cast ptr_to_bool %[[#SRC]] : !cir.ptr<!rec_Base> -> !cir.bool
+// AFTER-NEXT:   %{{.+}} = cir.ternary(%[[#SRC_IS_NOT_NULL]], true {
+// AFTER-NEXT:     %[[#VPTR_PTR:]] = cir.vtable.get_vptr %[[#SRC]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr>
+// AFTER-NEXT:     %[[#VPTR:]] = cir.load %[[#VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// AFTER-NEXT:     %[[#ELEM_PTR:]] = cir.cast bitcast %[[#VPTR]] : !cir.vptr -> !cir.ptr<!s64i>
+// AFTER-NEXT:     %[[#MINUS_TWO:]] = cir.const #cir.int<-2> : !s64i
+// AFTER-NEXT:     %[[#BASE_OFFSET_PTR:]] = cir.ptr_stride %[[#ELEM_PTR]], %[[#MINUS_TWO:]] : (!cir.ptr<!s64i>, !s64i) -> !cir.ptr<!s64i>
+// AFTER-NEXT:     %[[#BASE_OFFSET:]] = cir.load align(8) %[[#BASE_OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i
+// AFTER-NEXT:     %[[#SRC_BYTES_PTR:]] = cir.cast bitcast %[[#SRC]] : !cir.ptr<!rec_Base> -> !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#DST_BYTES_PTR:]] = cir.ptr_stride %[[#SRC_BYTES_PTR]], %[[#BASE_OFFSET]] : (!cir.ptr<!u8i>, !s64i) -> !cir.ptr<!u8i>
+// AFTER-NEXT:     %[[#CASTED_PTR:]] = cir.cast bitcast %[[#DST_BYTES_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+// AFTER-NEXT:     cir.yield %[[#CASTED_PTR]] : !cir.ptr<!void>
+// AFTER-NEXT:   }, false {
+// AFTER-NEXT:     %[[#NULL_PTR:]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// AFTER-NEXT:     cir.yield %[[#NULL_PTR]] : !cir.ptr<!void>
+// AFTER-NEXT:   }) : (!cir.bool) -> !cir.ptr<!void>
+//      AFTER: }
diff --git a/clang/test/CIR/Incubator/CodeGen/dynamic-cast.mlir b/clang/test/CIR/Incubator/CodeGen/dynamic-cast.mlir
new file mode 100644
index 0000000000000..d69ed5975ba22
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/dynamic-cast.mlir
@@ -0,0 +1,123 @@
+!rec_Base = !cir.record<struct "Base" {!cir.vptr} #cir.record.decl.ast>
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+#loc5 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":16:19)
+#loc6 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":16:25)
+#loc12 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":41:19)
+#loc13 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":41:25)
+#loc21 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":64:28)
+#loc22 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":64:34)
+!rec_Derived = !cir.record<struct "Derived" {!rec_Base} #cir.record.decl.ast>
+#loc30 = loc(fused[#loc5, #loc6])
+#loc33 = loc(fused[#loc12, #loc13])
+#loc37 = loc(fused[#loc21, #loc22])
+!rec_anon_struct = !cir.record<struct  {!cir.ptr<!u8i>, !cir.ptr<!u8i>, !cir.ptr<!u8i>}>
+module @"/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp" attributes {cir.lang = #cir.lang<cxx>, cir.sob = #cir.signed_overflow_behavior<undefined>, cir.triple = "x86_64-unknown-linux-gnu", cir.type_size_info = #cir.type_size_info<char = 8, int = 32, size_t = 64>, dlti.dl_spec = #dlti.dl_spec<!llvm.ptr<270> = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array<i32: 8, 16, 32, 64>, "dlti.stack_alignment" = 128 : i64>} {
+  cir.global "private" constant external @_ZTI4Base : !cir.ptr<!u8i> loc(#loc28)
+  cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<!cir.ptr<!u8i>> loc(#loc28)
+  cir.global linkonce_odr comdat @_ZTS7Derived = #cir.const_array<"7Derived" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64} loc(#loc28)
+  cir.global constant external @_ZTI7Derived = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS7Derived> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>}> : !rec_anon_struct {alignment = 8 : i64} loc(#loc28)
+  cir.func private @__dynamic_cast(!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void> loc(#loc)
+  cir.func private @__cxa_bad_cast() loc(#loc)
+  cir.func no_inline dso_local optnone @_Z8ptr_castP4Base(%arg0: !cir.ptr<!rec_Base> loc(fused[#loc5, #loc6])) -> !cir.ptr<!rec_Derived> {nothrow = #cir.nothrow} {
+    %0 = cir.alloca !cir.ptr<!rec_Base>, !cir.ptr<!cir.ptr<!rec_Base>>, ["b", init] {alignment = 8 : i64} loc(#loc30)
+    %1 = cir.alloca !cir.ptr<!rec_Derived>, !cir.ptr<!cir.ptr<!rec_Derived>>, ["__retval"] {alignment = 8 : i64} loc(#loc4)
+    cir.store %arg0, %0 : !cir.ptr<!rec_Base>, !cir.ptr<!cir.ptr<!rec_Base>> loc(#loc7)
+    %2 = cir.load align(8) %0 : !cir.ptr<!cir.ptr<!rec_Base>>, !cir.ptr<!rec_Base> loc(#loc8)
+    %3 = cir.cast ptr_to_bool %2 : !cir.ptr<!rec_Base> -> !cir.bool loc(#loc8)
+    %4 = cir.ternary(%3, true {
+      %6 = cir.cast bitcast %2 : !cir.ptr<!rec_Base> -> !cir.ptr<!void> loc(#loc8)
+      %7 = cir.const #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i> loc(#loc28)
+      %8 = cir.const #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i> loc(#loc28)
+      %9 = cir.const #cir.int<0> : !s64i loc(#loc28)
+      %10 = cir.call @__dynamic_cast(%6, %7, %8, %9) : (!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void> loc(#loc28)
+      %11 = cir.cast bitcast %10 : !cir.ptr<!void> -> !cir.ptr<!rec_Derived> loc(#loc28)
+      cir.yield %11 : !cir.ptr<!rec_Derived> loc(#loc28)
+    }, false {
+      %6 = cir.const #cir.ptr<null> : !cir.ptr<!rec_Derived> loc(#loc28)
+      cir.yield %6 : !cir.ptr<!rec_Derived> loc(#loc28)
+    }) : (!cir.bool) -> !cir.ptr<!rec_Derived> loc(#loc28)
+    cir.store %4, %1 : !cir.ptr<!rec_Derived>, !cir.ptr<!cir.ptr<!rec_Derived>> loc(#loc31)
+    %5 = cir.load %1 : !cir.ptr<!cir.ptr<!rec_Derived>>, !cir.ptr<!rec_Derived> loc(#loc31)
+    cir.return %5 : !cir.ptr<!rec_Derived> loc(#loc31)
+  } loc(#loc29)
+  cir.func no_inline dso_local optnone @_Z8ref_castR4Base(%arg0: !cir.ptr<!rec_Base> loc(fused[#loc12, #loc13])) -> !cir.ptr<!rec_Derived> {nothrow = #cir.nothrow} {
+    %0 = cir.alloca !cir.ptr<!rec_Base>, !cir.ptr<!cir.ptr<!rec_Base>>, ["b", init, const] {alignment = 8 : i64} loc(#loc33)
+    %1 = cir.alloca !cir.ptr<!rec_Derived>, !cir.ptr<!cir.ptr<!rec_Derived>>, ["__retval"] {alignment = 8 : i64} loc(#loc11)
+    cir.store %arg0, %0 : !cir.ptr<!rec_Base>, !cir.ptr<!cir.ptr<!rec_Base>> loc(#loc14)
+    %2 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_Base>>, !cir.ptr<!rec_Base> loc(#loc15)
+    %3 = cir.cast bitcast %2 : !cir.ptr<!rec_Base> -> !cir.ptr<!void> loc(#loc15)
+    %4 = cir.const #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i> loc(#loc34)
+    %5 = cir.const #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i> loc(#loc34)
+    %6 = cir.const #cir.int<0> : !s64i loc(#loc34)
+    %7 = cir.call @__dynamic_cast(%3, %4, %5, %6) : (!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void> loc(#loc34)
+    %8 = cir.cast ptr_to_bool %7 : !cir.ptr<!void> -> !cir.bool loc(#loc34)
+    %9 = cir.unary(not, %8) : !cir.bool, !cir.bool loc(#loc34)
+    cir.if %9 {
+      cir.call @__cxa_bad_cast() : () -> () loc(#loc34)
+      cir.unreachable loc(#loc34)
+    } loc(#loc34)
+    %10 = cir.cast bitcast %7 : !cir.ptr<!void> -> !cir.ptr<!rec_Derived> loc(#loc34)
+    cir.store align(8) %10, %1 : !cir.ptr<!rec_Derived>, !cir.ptr<!cir.ptr<!rec_Derived>> loc(#loc35)
+    %11 = cir.load %1 : !cir.ptr<!cir.ptr<!rec_Derived>>, !cir.ptr<!rec_Derived> loc(#loc35)
+    cir.return %11 : !cir.ptr<!rec_Derived> loc(#loc35)
+  } loc(#loc32)
+  cir.func no_inline dso_local optnone @_Z20ptr_cast_to_completeP4Base(%arg0: !cir.ptr<!rec_Base> loc(fused[#loc21, #loc22])) -> !cir.ptr<!void> {nothrow = #cir.nothrow} {
+    %0 = cir.alloca !cir.ptr<!rec_Base>, !cir.ptr<!cir.ptr<!rec_Base>>, ["ptr", init] {alignment = 8 : i64} loc(#loc37)
+    %1 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["__retval"] {alignment = 8 : i64} loc(#loc20)
+    cir.store %arg0, %0 : !cir.ptr<!rec_Base>, !cir.ptr<!cir.ptr<!rec_Base>> loc(#loc23)
+    %2 = cir.load align(8) %0 : !cir.ptr<!cir.ptr<!rec_Base>>, !cir.ptr<!rec_Base> loc(#loc24)
+    %3 = cir.cast ptr_to_bool %2 : !cir.ptr<!rec_Base> -> !cir.bool loc(#loc24)
+    %4 = cir.ternary(%3, true {
+      %6 = cir.vtable.get_vptr %2 : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr> loc(#loc38)
+      %7 = cir.load %6 : !cir.ptr<!cir.vptr>, !cir.vptr loc(#loc38)
+      %8 = cir.cast bitcast %7 : !cir.vptr -> !cir.ptr<!s64i> loc(#loc38)
+      %9 = cir.const #cir.int<-2> : !s64i loc(#loc38)
+      %10 = cir.ptr_stride %8, %9 : (!cir.ptr<!s64i>, !s64i) -> !cir.ptr<!s64i> loc(#loc38)
+      %11 = cir.load align(8) %10 : !cir.ptr<!s64i>, !s64i loc(#loc38)
+      %12 = cir.cast bitcast %2 : !cir.ptr<!rec_Base> -> !cir.ptr<!u8i> loc(#loc24)
+      %13 = cir.ptr_stride %12, %11 : (!cir.ptr<!u8i>, !s64i) -> !cir.ptr<!u8i> loc(#loc38)
+      %14 = cir.cast bitcast %13 : !cir.ptr<!u8i> -> !cir.ptr<!void> loc(#loc38)
+      cir.yield %14 : !cir.ptr<!void> loc(#loc38)
+    }, false {
+      %6 = cir.const #cir.ptr<null> : !cir.ptr<!void> loc(#loc38)
+      cir.yield %6 : !cir.ptr<!void> loc(#loc38)
+    }) : (!cir.bool) -> !cir.ptr<!void> loc(#loc38)
+    cir.store %4, %1 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>> loc(#loc39)
+    %5 = cir.load %1 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void> loc(#loc39)
+    cir.return %5 : !cir.ptr<!void> loc(#loc39)
+  } loc(#loc36)
+} loc(#loc)
+#loc = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":0:0)
+#loc1 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":17:10)
+#loc2 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":17:35)
+#loc3 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":16:1)
+#loc4 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":18:1)
+#loc7 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":16:28)
+#loc8 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":17:34)
+#loc9 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":17:3)
+#loc10 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":41:1)
+#loc11 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":43:1)
+#loc14 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":41:28)
+#loc15 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":42:34)
+#loc16 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":42:10)
+#loc17 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":42:35)
+#loc18 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":42:3)
+#loc19 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":64:1)
+#loc20 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":66:1)
+#loc23 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":64:39)
+#loc24 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":65:31)
+#loc25 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":65:10)
+#loc26 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":65:34)
+#loc27 = loc("/Users/henrichlauko/src/clangir/clang/test/CIR/CodeGen/dynamic-cast.cpp":65:3)
+#loc28 = loc(fused[#loc1, #loc2])
+#loc29 = loc(fused[#loc3, #loc4])
+#loc31 = loc(fused[#loc9, #loc2])
+#loc32 = loc(fused[#loc10, #loc11])
+#loc34 = loc(fused[#loc16, #loc17])
+#loc35 = loc(fused[#loc18, #loc17])
+#loc36 = loc(fused[#loc19, #loc20])
+#loc38 = loc(fused[#loc25, #loc26])
+#loc39 = loc(fused[#loc27, #loc26])
diff --git a/clang/test/CIR/Incubator/CodeGen/eh.cpp b/clang/test/CIR/Incubator/CodeGen/eh.cpp
new file mode 100644
index 0000000000000..b55383579d65d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/eh.cpp
@@ -0,0 +1,63 @@
+// XFAIL: *
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -fcxx-exceptions -fexceptions -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -fcxx-exceptions -fexceptions -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+struct test1_D {
+  double d;
+} d1;
+
+void test1() {
+  throw d1;
+}
+
+// CIR-LABEL: @_Z5test1v
+// CIR:   %[[ALLOC:.*]] = cir.alloc.exception 8 -> !cir.ptr<!rec_test1_D>
+// CIR:   %[[G:.*]] = cir.get_global @d1 : !cir.ptr<!rec_test1_D>
+// CIR:   cir.copy %1 to %0 : !cir.ptr<!rec_test1_D>
+// CIR:   cir.throw %[[ALLOC]] : !cir.ptr<!rec_test1_D>, @_ZTI7test1_D
+// CIR:   cir.unreachable
+// CIR: }
+
+// LLVM-LABEL: @_Z5test1v
+// LLVM:   %[[ALLOC:.*]] = call ptr @__cxa_allocate_exception(i64 8)
+
+// FIXME: this is a llvm.memcpy.p0.p0.i64 once we fix isTrivialCtorOrDtor().
+// LLVM:   call void @llvm.memcpy.p0.p0.i32(ptr %1, ptr @d1, i32 8, i1 false)
+// LLVM:   call void @__cxa_throw(ptr %[[ALLOC]], ptr @_ZTI7test1_D, ptr null)
+// LLVM:   unreachable
+// LLVM: }
+
+struct test2_D {
+  test2_D(const test2_D&o);
+  test2_D();
+  virtual void bar() { }
+  int i; int j;
+} d2;
+
+void test2() {
+  throw d2;
+}
+
+// CIR-LABEL: @_Z5test2v
+// CIR:   %[[ALLOC:.*]] = cir.alloc.exception 16 -> !cir.ptr<!rec_test2_D>
+// CIR:   %[[G:.*]] = cir.get_global @d2 : !cir.ptr<!rec_test2_D>
+// CIR:   cir.try synthetic cleanup {
+// CIR:     cir.copy %[[G:.*]] to %[[ALLOC:.*]] : !cir.ptr<!rec_test2_D>
+// CIR:     cir.yield
+// CIR:   } catch [#cir.unwind {
+// CIR:     cir.resume
+// CIR:   }]
+// CIR:   cir.throw %[[ALLOC]] : !cir.ptr<!rec_test2_D>, @_ZTI7test2_D
+// CIR:   cir.unreachable
+
+// LLVM-LABEL: @_Z5test2v
+
+// LLVM: %[[ALLOC:.*]] = call ptr @__cxa_allocate_exception(i64 16)
+
+// LLVM: landingpad { ptr, i32 }
+// LLVM:         cleanup
+// LLVM: extractvalue { ptr, i32 }
+// LLVM: extractvalue { ptr, i32 }
+// LLVM: call void @__cxa_free_exception(ptr %[[ALLOC]])
diff --git a/clang/test/CIR/Incubator/CodeGen/empty-try-catch.cpp b/clang/test/CIR/Incubator/CodeGen/empty-try-catch.cpp
new file mode 100644
index 0000000000000..8e280a1056f3f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/empty-try-catch.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+void empty_try_block_with_catch_all() {
+  try {} catch (...) {}
+}
+
+// CIR: cir.func{{.*}} @_Z30empty_try_block_with_catch_allv()
+// CIR:   cir.return
+
+// LLVM: define{{.*}} void @_Z30empty_try_block_with_catch_allv()
+// LLVM:  ret void
+
+// OGCG: define{{.*}} void @_Z30empty_try_block_with_catch_allv()
+// OGCG:   ret void
+
+void empty_try_block_with_catch_with_int_exception() {
+  try {} catch (int e) {}
+}
+
+// CIR: cir.func{{.*}} @_Z45empty_try_block_with_catch_with_int_exceptionv()
+// CIR:   cir.return
+
+// LLVM: define{{.*}} void @_Z45empty_try_block_with_catch_with_int_exceptionv()
+// LLVM:  ret void
+
+// OGCG: define{{.*}} void @_Z45empty_try_block_with_catch_with_int_exceptionv()
+// OGCG:   ret void
+
diff --git a/clang/test/CIR/Incubator/CodeGen/error-attr.c b/clang/test/CIR/Incubator/CodeGen/error-attr.c
new file mode 100644
index 0000000000000..993a7b9a19772
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/error-attr.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --input-file=%t-og.ll --check-prefix=OGCG %s
+
+// Test __attribute__((error("msg")))
+__attribute__((error("This function should not be called")))
+void error_function(void) {}
+
+// CIR: #cir<extra({{.*}}dontcall = #cir.dontcall<"This function should not be called", true>
+// LLVM: define{{.*}}@error_function{{.*}}#[[#ATTR_ERROR:]]
+// OGCG: define{{.*}}@error_function{{.*}}#[[#OGCG_ATTR_ERROR:]]
+
+// Test __attribute__((warning("msg")))
+__attribute__((warning("This function is deprecated")))
+void warning_function(void) {}
+
+// CIR: #cir<extra({{.*}}dontcall = #cir.dontcall<"This function is deprecated", false>
+// LLVM: define{{.*}}@warning_function{{.*}}#[[#ATTR_WARNING:]]
+// OGCG: define{{.*}}@warning_function{{.*}}#[[#OGCG_ATTR_WARNING:]]
+
+// LLVM-DAG: attributes #[[#ATTR_ERROR]] = {{.*}}"dontcall-error"="This function should not be called"
+// LLVM-DAG: attributes #[[#ATTR_WARNING]] = {{.*}}"dontcall-warn"="This function is deprecated"
+// OGCG-DAG: attributes #[[#OGCG_ATTR_ERROR]] = {{.*}}"dontcall-error"="This function should not be called"
+// OGCG-DAG: attributes #[[#OGCG_ATTR_WARNING]] = {{.*}}"dontcall-warn"="This function is deprecated"
diff --git a/clang/test/CIR/Incubator/CodeGen/evaluate-expr.c b/clang/test/CIR/Incubator/CodeGen/evaluate-expr.c
new file mode 100644
index 0000000000000..e40dd863ee435
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/evaluate-expr.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+static const int g = 1;
+void foo() {
+  if ((g != 1) && (g != 1))
+    return;
+  if ((g == 1) || (g == 1))
+    return;
+}
+// CHECK:  cir.func {{.*}} @foo()
+// CHECK:    cir.scope {
+// CHECK:      [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:      [[FALSE:%.*]] = cir.cast int_to_bool [[ZERO:%.*]] : !s32i -> !cir.bool
+// CHECK:      cir.if [[FALSE]] {
+// CHECK:        cir.return
+// CHECK:      }
+// CHECK:    }
+// CHECK:    cir.return
+
+typedef struct { int x; } S;
+static const S s = {0};
+void bar() {
+  int a =  s.x;
+}
+// CHECK:  cir.func {{.*}} @bar()
+// CHECK:    [[ALLOC:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK:    {{%.*}} = cir.get_global @s : !cir.ptr<!rec_S>
+// CHECK:    [[CONST:%.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:    cir.store{{.*}} [[CONST]], [[ALLOC]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/expressions.cpp b/clang/test/CIR/Incubator/CodeGen/expressions.cpp
new file mode 100644
index 0000000000000..9921965037a2b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/expressions.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+void test(int a) {
+// CIR: cir.func {{.*}} @{{.+}}test
+
+  // Should generate LValue parenthesis expression.
+  (a) = 1;
+  // CIR: %[[CONST:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: cir.store{{.*}} %[[CONST]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/finegrain-bitfield-access.cpp b/clang/test/CIR/Incubator/CodeGen/finegrain-bitfield-access.cpp
new file mode 100644
index 0000000000000..3699e7def1d8f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/finegrain-bitfield-access.cpp
@@ -0,0 +1,271 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -ffine-grained-bitfield-accesses %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -ffine-grained-bitfield-accesses %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -ffine-grained-bitfield-accesses %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+struct S1 {
+  unsigned f1:2;
+  unsigned f2:6;
+  unsigned f3:8;
+  unsigned f4:4;
+  unsigned f5:8;
+};
+
+// CIR-DAG: !rec_S1 = !cir.record<struct "S1" {!u8i, !u8i, !u16i} #cir.record.decl.ast>
+// LLVM-DAG: %struct.S1 = type { i8, i8, i16 }
+// OGCG-DAG: %struct.S1 = type { i8, i8, i16 }
+
+struct S2 {
+  unsigned long f1:16;
+  unsigned long f2:16;
+  unsigned long f3:6;
+};
+
+// CIR-DAG: !rec_S2 = !cir.record<struct "S2" padded {!u16i, !u16i, !u8i, !cir.array<!u8i x 3>} #cir.record.decl.ast>
+// LLVM-DAG: %struct.S2 = type { i16, i16, i8, [3 x i8] }
+// OGCG-DAG: %struct.S2 = type { i16, i16, i8, [3 x i8] }
+
+struct S3 {
+  unsigned long f1:14;
+  unsigned long f2:18;
+  unsigned long f3:32;
+};
+
+// CIR-DAG: !rec_S3 = !cir.record<struct "S3" {!u32i, !u32i} #cir.record.decl.ast>
+// LLVM-DAG: %struct.S3 = type { i32, i32 }
+// OGCG-DAG: %struct.S3 = type { i32, i32 }
+
+S1 a1;
+S2 a2;
+S3 a3;
+
+unsigned read8_1() {
+  return a1.f3;
+}
+
+// CIR-LABEL: @_Z7read8_1v
+// CIR: [[MEMBER:%.*]] = cir.get_member %1[1] {name = "f3"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u8i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(1) (#bfi_f3, [[MEMBER]] : !cir.ptr<!u8i>) -> !u32i
+// CIR: cir.store{{.*}} [[BITFI]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load{{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z7read8_1v
+// LLVM:  [[MEMBER:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 1), align 1
+// LLVM:  [[BFCAST:%.*]] = zext i8 [[MEMBER]] to i32
+// LLVM:  store i32 [[BFCAST]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z7read8_1v
+// OGCG: [[BFLOAD:%.*]] = load i8, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 1), align 1
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i8 [[BFLOAD]] to i32
+// OGCG-NEXT: ret i32 [[BFCAST]]
+
+void write8_1() {
+  a1.f3 = 3;
+}
+
+// CIR-LABEL: @_Z8write8_1v
+// CIR: [[CONST3:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[INT3:%.*]] = cir.cast integral [[CONST3]] : !s32i -> !u32i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u8i>
+// CIR: cir.set_bitfield align(1) (#bfi_f3, [[MEMBER]] : !cir.ptr<!u8i>, [[INT3]] : !u32i) -> !u32i
+
+// LLVM-LABEL: @_Z8write8_1v
+// LLVM:  store i8 3, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 1), align 1
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z8write8_1v
+// OGCG: store i8 3, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 1), align 1
+// OGCG-NEXT: ret void
+
+unsigned read8_2() {
+
+  return a1.f5;
+}
+
+// CIR-LABEL: @_Z7read8_2v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[2] {name = "f5"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u16i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(2) (#bfi_f5, [[MEMBER]] : !cir.ptr<!u16i>) -> !u32i
+// CIR: cir.store{{.*}} [[BITFI]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load{{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z7read8_2v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  [[BFLSHR:%.*]] = lshr i16 [[BFLOAD]], 4
+// LLVM:  [[BFCLEAR:%.*]] = and i16 [[BFLSHR]], 255
+// LLVM:  [[BFCAST:%.*]] = zext i16 [[BFCLEAR]] to i32
+// LLVM:  store i32 [[BFCAST]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z7read8_2v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 2), align 2
+// OGCG-NEXT: [[BFLSHR:%.*]] = lshr i16 [[BFLOAD]], 4
+// OGCG-NEXT: [[BFCLEAR:%.*]] = and i16 [[BFLSHR]], 255
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i16 [[BFCLEAR]] to i32
+// OGCG-NEXT: ret i32 [[BFCAST]]
+
+void write8_2() {
+  a1.f5 = 3;
+}
+
+// CIR-LABEL: @_Z8write8_2v
+// CIR: [[CONST3:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[INT3:%.*]] = cir.cast integral [[CONST3]] : !s32i -> !u32i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[2] {name = "f5"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u16i>
+// CIR: cir.set_bitfield align(2) (#bfi_f5, %3 : !cir.ptr<!u16i>, {{.*}} : !u32i) -> !u32i
+
+// LLVM-LABEL: @_Z8write8_2v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  [[BFCLEAR:%.*]] = and i16 [[BFLOAD]], -4081
+// LLVM:  [[BFSET:%.*]] = or i16 [[BFCLEAR]], 48
+// LLVM:  store i16 [[BFSET]], ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z8write8_2v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 2), align 2
+// OGCG-NEXT: [[BFCLEAR:%.*]] = and i16 [[BFLOAD]], -4081
+// OGCG-NEXT: [[BFSET:%.*]] = or i16 [[BFCLEAR]], 48
+// OGCG-NEXT: store i16 [[BFSET]], ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 2), align 2
+// OGCG-NEXT: ret void
+
+unsigned read16_1() {
+  return a2.f1;
+}
+
+// CIR-LABEL: @_Z8read16_1v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[0] {name = "f1"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(8) (#bfi_f1, [[MEMBER]] : !cir.ptr<!u16i>) -> !u64i
+// CIR: [[BFCAST:%.*]] = cir.cast integral [[BITFI]] : !u64i -> !u32i
+// CIR: cir.store{{.*}} [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load{{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z8read16_1v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr {{.*}}, align 8
+// LLVM:  [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// LLVM:  [[BF:%.*]] = trunc i64 [[BFCAST]] to i32
+// LLVM:  store i32 [[BF]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z8read16_1v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr {{.*}}, align 8
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// OGCG-NEXT: [[RET:%.*]] = trunc i64 [[BFCAST]] to i32
+// OGCG-NEXT: ret i32 [[RET]]
+
+unsigned read16_2() {
+  return a2.f2;
+}
+
+// CIR-LABEL: @_Z8read16_2v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f2"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(2) (#bfi_f2, [[MEMBER]] : !cir.ptr<!u16i>) -> !u64i
+// CIR: [[BFCAST:%.*]] = cir.cast integral [[BITFI]] : !u64i -> !u32i
+// CIR: cir.store{{.*}} [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load{{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z8read16_2v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// LLVM:  [[BF:%.*]] = trunc i64 [[BFCAST]] to i32
+// LLVM:  store i32 [[BF]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z8read16_2v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (%struct.S2, ptr {{.*}}, i32 0, i32 1), align 2
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// OGCG-NEXT: [[RET:%.*]] = trunc i64 [[BFCAST]] to i32
+// OGCG-NEXT: ret i32 [[RET]]
+
+void write16_1() {
+  a2.f1 = 5;
+}
+
+// CIR-LABEL: @_Z9write16_1v
+// CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
+// CIR: [[INT5:%.*]] = cir.cast integral [[CONST5]] : !s32i -> !u64i
+// CIR: [[MEMBER:%.*]]  = cir.get_member {{.*}}[0] {name = "f1"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: cir.set_bitfield align(8) (#bfi_f1, [[MEMBER]] : !cir.ptr<!u16i>, [[INT5]] : !u64i) -> !u64i
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z9write16_1v
+// LLVM:  store i16 5, ptr {{.*}}, align 8
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z9write16_1v
+// OGCG: store i16 5, ptr {{.*}}, align 8
+// OGCG-NEXT: ret void
+
+void write16_2() {
+
+  a2.f2 = 5;
+}
+
+// CIR-LABEL: @_Z9write16_2v
+// CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
+// CIR: [[INT5:%.*]] = cir.cast integral [[CONST5]] : !s32i -> !u64i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f2"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: cir.set_bitfield align(2) (#bfi_f2, [[MEMBER]] : !cir.ptr<!u16i>, {{.*}} : !u64i) -> !u64i
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z9write16_2v
+// LLVM: store i16 5, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM: ret void
+
+// OGCG-LABEL: @_Z9write16_2v
+// OGCG: store i16 5, ptr getelementptr inbounds nuw (%struct.S2, ptr {{.*}}, i32 0, i32 1), align 2
+// OGCG-NEXT: ret void
+
+unsigned read32_1() {
+
+  return a3.f3;
+}
+// CIR-LABEL: @_Z8read32_1v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S3> -> !cir.ptr<!u32i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(4) (#bfi_f3_1, [[MEMBER]] : !cir.ptr<!u32i>) -> !u64i
+// CIR: [[BFCAST:%.*]] = cir.cast integral [[BITFI]] : !u64i -> !u32i
+// CIR: cir.store{{.*}} [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load{{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z8read32_1v
+// LLVM: [[BFLOAD:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 4), align 4
+// LLVM: [[BFCAST:%.*]] = zext i32 [[BFLOAD]] to i64
+// LLVM: [[BF:%.*]] = trunc i64 [[BFCAST]] to i32
+// LLVM: store i32 [[BF]], ptr {{.*}}, align 4
+// LLVM: [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z8read32_1v
+// OGCG: [[BFLOAD:%.*]] = load i32, ptr getelementptr inbounds nuw (%struct.S3, ptr {{.*}}, i32 0, i32 1), align 4
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i32 %bf.load to i64
+// OGCG-NEXT: [[RET:%.*]] = trunc i64 %bf.cast to i32
+// OGCG-NEXT: ret i32 [[RET]]
+
+void write32_1() {
+  a3.f3 = 5;
+}
+
+// CIR-LABEL: @_Z9write32_1v
+// CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
+// CIR: [[INT5:%.*]] = cir.cast integral [[CONST5]] : !s32i -> !u64i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S3> -> !cir.ptr<!u32i>
+// CIR: cir.set_bitfield align(4) (#bfi_f3_1, [[MEMBER]] : !cir.ptr<!u32i>, [[INT5]] : !u64i) -> !u64i
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z9write32_1v
+// LLVM:  store i32 5, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 4), align 4
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z9write32_1v
+// OGCG: store i32 5, ptr getelementptr inbounds nuw (%struct.S3, ptr {{.*}}, i32 0, i32 1), align 4
+// OGCG-NEXT: ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/fixedpoint-literal.c b/clang/test/CIR/Incubator/CodeGen/fixedpoint-literal.c
new file mode 100644
index 0000000000000..3f914bc9adfda
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/fixedpoint-literal.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -ffixed-point -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test basic fixed-point literals
+void test_short_fract() {
+  // CHECK: cir.func{{.*}} @test_short_fract
+  short _Fract sf = 0.5hr;
+  // CHECK: %{{.*}} = cir.const #cir.int<64> : !s8i
+
+  unsigned short _Fract usf = 0.5uhr;
+  // CHECK: %{{.*}} = cir.const #cir.int<128> : !u8i
+}
+
+void test_fract() {
+  // CHECK: cir.func{{.*}} @test_fract
+  _Fract f = 0.5r;
+  // CHECK: %{{.*}} = cir.const #cir.int<16384> : !s16i
+
+  unsigned _Fract uf = 0.5ur;
+  // CHECK: %{{.*}} = cir.const #cir.int<32768> : !u16i
+}
+
+void test_long_fract() {
+  // CHECK: cir.func{{.*}} @test_long_fract
+  long _Fract lf = 0.5lr;
+  // CHECK: %{{.*}} = cir.const #cir.int<1073741824> : !s32i
+}
+
+void test_accum() {
+  // CHECK: cir.func{{.*}} @test_accum
+  short _Accum sa = 0.5hk;
+  // CHECK: %{{.*}} = cir.const #cir.int<64> : !s16i
+}
+
+void test_negative() {
+  // CHECK: cir.func{{.*}} @test_negative
+  short _Fract sf = -0.5hr;
+  // CHECK: %{{.*}} = cir.const #cir.int<64> : !s8i
+  // CHECK: %{{.*}} = cir.unary(minus, %{{.*}}) : !s8i, !s8i
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/float16-ops.c b/clang/test/CIR/Incubator/CodeGen/float16-ops.c
new file mode 100644
index 0000000000000..5f5ee53df9160
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/float16-ops.c
@@ -0,0 +1,1636 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -o %t.cir %s
+// FileCheck --input-file=%t.cir --check-prefix=NONATIVE %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fnative-half-type -fclangir -emit-cir -o %t.cir %s
+// FileCheck --input-file=%t.cir --check-prefix=NATIVE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// FileCheck --input-file=%t.ll --check-prefix=NONATIVE-LLVM %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fnative-half-type -fclangir -emit-llvm -o %t.ll %s
+// FileCheck --input-file=%t.ll --check-prefix=NATIVE-LLVM %s
+
+volatile unsigned test;
+volatile int i0;
+volatile _Float16 h0 = 0.0, h1 = 1.0, h2;
+volatile float f0, f1, f2;
+volatile double d0;
+short s0;
+
+void foo(void) {
+  test = (h0);
+  // NONATIVE: %{{.+}} = cir.cast float_to_int %{{.+}} : !cir.f16 -> !u32i
+  // NATIVE: %{{.+}} = cir.cast float_to_int %{{.+}} : !cir.f16 -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fptoui half %{{.+}} to i32
+  // NATIVE-LLVM: %{{.+}} = fptoui half %{{.+}} to i32
+
+  h0 = (test);
+  // NONATIVE: %{{.+}} = cir.cast int_to_float %{{.+}} : !u32i -> !cir.f16
+  // NATIVE: %{{.+}} = cir.cast int_to_float %{{.+}} : !u32i -> !cir.f16
+
+  // NONATIVE-LLVM: %{{.+}} = uitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = uitofp i32 %{{.+}} to half
+
+  test = (!h1);
+  //      NONATIVE: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.f16 -> !cir.bool
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(not, %[[#A]]) : !cir.bool, !cir.bool
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.f16 -> !cir.bool
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(not, %[[#A]]) : !cir.bool, !cir.bool
+  // NATIVE-NEXT: %[[#C:]] = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = zext i1 %[[#A]] to i8
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = xor i8 %[[#B]], 1
+  // NONATIVE-LLVM-NEXT: %{{.+}} = zext i8 %[[#C]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // NATIVE-LLVM-NEXT: %[[#B:]] = zext i1 %[[#A]] to i8
+  // NATIVE-LLVM-NEXT: %[[#C:]] = xor i8 %[[#B]], 1
+  // NATIVE-LLVM-NEXT: %{{.+}} = zext i8 %[[#C]] to i32
+
+  h1 = -h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //  NATIVE-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //  NATIVE-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.float -> !cir.f16
+  //      NATIVE: %{{.+}} = cir.unary(minus, %{{.+}}) : !cir.f16, !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fneg float %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fneg half %{{.+}}
+
+  h1 = +h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(plus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //  NATIVE-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //  NATIVE-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.float -> !cir.f16
+  //      NATIVE: %{{.+}} = cir.unary(plus, %{{.+}}) : !cir.f16, !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h1, align 2
+  // NATIVE-LLVM-NEXT: store volatile half %[[#A]], ptr @h1, align 2
+
+  h1++;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // NONATIVE-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  // NATIVE-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  ++h1;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // NONATIVE-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  // NATIVE-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  --h1;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // NONATIVE-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  // NATIVE-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  h1--;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // NONATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // NONATIVE-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  // NATIVE-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  h1 = h0 * h2;
+  //      NONATIVE: %[[#LHS:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#RHS:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#A:]] = cir.binop(mul, %[[#LHS]], %[[#RHS]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#A]] : !cir.float -> !cir.f16
+
+  // NATIVE: %{{.+}} = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#SUM:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#SUM]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, %{{.+}}
+
+  h1 = h0 * (_Float16) -2.0f;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.unary(minus, %[[#B]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(mul, %[[#A]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#F]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#C]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], -2.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, 0xHC000
+
+  h1 = h0 * f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = f0 * h2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = h0 * i0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext half %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fmul half %{{.+}}, %[[#A]]
+
+  h1 = (h0 / h2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+
+  // NATIVE: %{{.+}} = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, %{{.+}}
+
+  h1 = (h0 / (_Float16) -2.0f);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.unary(minus, %[[#B]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(div, %[[#A]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#F]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#C]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], -2.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, 0xHC000
+
+  h1 = (h0 / f2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (f0 / h2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (h0 / i0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(div, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext half %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fdiv half %{{.+}}, %[[#A]]
+
+  h1 = (h2 + h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+
+  // NATIVE: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fadd half %{{.+}}, %{{.+}}
+
+  h1 = ((_Float16)-2.0 + h0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#E:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(add, %[[#D]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#F]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.f16
+  //      NATIVE: %{{.+}} = cir.binop(add, %[[#C]], %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float -2.000000e+00, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fadd half 0xHC000, %{{.+}}
+
+  h1 = (h2 + f0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (f2 + h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float %{{.+}}, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (h0 + i0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(add, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext half %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.=}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fadd half %{{.+}}, %[[#A]]
+
+  h1 = (h2 - h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#A]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+
+  // NATIVE: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fsub half %{{.+}}, %{{.+}}
+
+  h1 = ((_Float16)-2.0f - h0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#E:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.binop(sub, %[[#D]], %[[#E]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#F]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  //      NATIVE: %{{.+}} = cir.binop(sub, %[[#C]], %{{.+}}) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float -2.000000e+00, %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fsub half 0xHC000, %{{.+}}
+
+  h1 = (h2 - f0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NATIVE-LLVM: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (f2 - h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.float
+  // NATIVE-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %{{.+}}, %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %{{.+}}, %[[#RHS]]
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (h0 - i0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#A]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %{{.+}} = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %{{.+}} = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#A:]] = fpext half %[[#RHS]] to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#A]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fsub half %{{.+}}, %[[#A]]
+
+  test = (h2 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, %{{.+}}
+
+  test = (h2 < (_Float16)42.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, 0xH5140
+
+  // NATIVE-LLVM: %{{.+}} = fcmp olt half %{{.+}}, 0xH5140
+
+  test = (h2 < f0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt float %[[#A]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp olt float %[[#A]], %{{.+}}
+
+  test = (f2 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.=}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#A]]
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#A]]
+
+  test = (i0 < h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp olt half %[[#A]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp olt half %[[#A]], %{{.+}}
+
+  test = (h0 < i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp olt half %{{.+}}, %[[#A]]
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp olt half %{{.+}}, %[[#A]]
+
+  test = (h0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt half %{{.+}}, %{{.+}}
+
+  test = ((_Float16)42.0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  //      NONATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  //      NATIVE: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt half 0xH5140, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt half 0xH5140, %{{.+}}
+
+  test = (h0 > f2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
+
+  test = (f0 > h2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
+
+  test = (i0 > h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp ogt half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp ogt half %[[#LHS]], %{{.+}}
+
+  test = (h0 > i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ogt half %{{.+}}, %[[#RHS]]
+
+  test = (h2 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, %{{.+}}
+
+  test = (h2 <= (_Float16)42.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, 0xH5140
+
+  // NATIVE-LLVM: %{{.+}} = fcmp ole half %{{.+}}, 0xH5140
+
+  test = (h2 <= f0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
+
+  test = (f2 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
+
+  test = (i0 <= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp ole half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp ole half %[[#LHS]], %{{.+}}
+
+  test = (h0 <= i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp ole half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp ole half %{{.+}}, %[[#RHS]]
+
+  test = (h0 >= h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+  // NONATIVE-NEXT: %{{.+}} = cir.get_global @test : !cir.ptr<!u32i>
+
+  //      NATIVE: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, %{{.+}}
+
+  test = (h0 >= (_Float16)-2.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#D]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#D]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, 0xHC000
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oge half %{{.+}}, 0xHC000
+
+  test = (h0 >= f2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
+
+  test = (f0 >= h2);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
+
+  test = (i0 >= h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp oge half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp oge half %[[#LHS]], %{{.+}}
+
+  test = (h0 >= i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oge half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oge half %{{.+}}, %[[#RHS]]
+
+  test = (h1 == h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, %{{.+}}
+
+  test = (h1 == (_Float16)1.0);
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, 0xH3C00
+
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, 0xH3C00
+
+  test = (h1 == f1);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq float %[[#LHS]], %{{.+}}
+
+  test = (f1 == h1);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
+
+  test = (i0 == h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp oeq half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp oeq half %[[#LHS]], %{{.+}}
+
+  test = (h0 == i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.=}} = fcmp oeq half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.=}} = fcmp oeq half %{{.+}}, %[[#RHS]]
+
+  test = (h1 != h2);
+  //      NONATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, %{{.+}}
+
+  // NATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, %{{.+}}
+
+  test = (h1 != (_Float16)1.0);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, 0xH3C00
+
+  // NATIVE-LLVM: %{{.+}} = fcmp une half %{{.+}}, 0xH3C00
+
+  test = (h1 != f1);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
+  // NATIVE-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
+
+  test = (f1 != h1);
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#A]]
+
+  //      NATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#A]]
+
+  test = (i0 != h0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NONATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // NONATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM: %{{.+}} = fcmp une half %[[#LHS]], %{{.+}}
+
+  // NATIVE-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fcmp une half %[[#LHS]], %{{.+}}
+
+  test = (h0 != i0);
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // NATIVE-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fcmp une half %{{.+}}, %[[#RHS]]
+
+  //      NATIVE-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %{{.+}} = fcmp une half %{{.+}}, %[[#RHS]]
+
+  h1 = (h1 ? h2 : h0);
+  //      NONATIVE: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.f16 -> !cir.bool
+  // NONATIVE-NEXT: %{{.+}} = cir.ternary(%[[#A]], true {
+  //      NONATIVE:   cir.yield %{{.+}} : !cir.f16
+  // NONATIVE-NEXT: }, false {
+  //      NONATIVE:   cir.yield %{{.+}} : !cir.f16
+  // NONATIVE-NEXT: }) : (!cir.bool) -> !cir.f16
+  //      NONATIVE: %{{.+}} = cir.get_global @h1 : !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.f16 -> !cir.bool
+  // NATIVE-NEXT: %[[#B:]] = cir.ternary(%[[#A]], true {
+  //      NATIVE:   cir.yield %{{.+}} : !cir.f16
+  // NATIVE-NEXT: }, false {
+  //      NATIVE:   cir.yield %{{.+}} : !cir.f16
+  // NATIVE-NEXT: }) : (!cir.bool) -> !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.get_global @h1 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM:   %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // NONATIVE-LLVM-NEXT:   br i1 %[[#A]], label %[[#LABEL_A:]], label %[[#LABEL_B:]]
+  //      NONATIVE-LLVM: [[#LABEL_A]]:
+  // NONATIVE-LLVM-NEXT:   %[[#B:]] = load volatile half, ptr @h2, align 2
+  // NONATIVE-LLVM-NEXT:   br label %[[#LABEL_C:]]
+  //      NONATIVE-LLVM: [[#LABEL_B]]:
+  // NONATIVE-LLVM-NEXT:   %[[#C:]] = load volatile half, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT:   br label %[[#LABEL_C]]
+  //      NONATIVE-LLVM: [[#LABEL_C]]:
+  // NONATIVE-LLVM-NEXT:   %8 = phi half [ %[[#C]], %[[#LABEL_B]] ], [ %[[#B]], %[[#LABEL_A]] ]
+
+  //      NATIVE-LLVM:   %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // NATIVE-LLVM-NEXT:   br i1 %[[#A]], label %[[#LABEL_A:]], label %[[#LABEL_B:]]
+  //      NATIVE-LLVM: [[#LABEL_A]]:
+  // NATIVE-LLVM-NEXT:   %[[#B:]] = load volatile half, ptr @h2, align 2
+  // NATIVE-LLVM-NEXT:   br label %[[#LABEL_C:]]
+  //      NATIVE-LLVM: [[#LABEL_B]]:
+  // NATIVE-LLVM-NEXT:   %[[#C:]] = load volatile half, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT:   br label %[[#LABEL_C]]
+  //      NATIVE-LLVM: [[#LABEL_C]]:
+  // NATIVE-LLVM-NEXT:   %8 = phi half [ %[[#C]], %[[#LABEL_B]] ], [ %[[#B]], %[[#LABEL_A]] ]
+
+  h0 = h1;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h1 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h1 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h1, align 2
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#A]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h1, align 2
+  // NATIVE-LLVM-NEXT: store volatile half %[[#A]], ptr @h0, align 2
+
+  h0 = (_Float16)-2.0f;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  // NONATIVE-LLVM: store volatile half 0xHC000, ptr @h0, align 2
+
+  // NATIVE-LLVM: store volatile half 0xHC000, ptr @h0, align 2
+
+  h0 = f0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @f0 : !cir.ptr<!cir.float>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.float>, !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @f0 : !cir.ptr<!cir.float>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.float>, !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile float, ptr @f0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile float, ptr @f0, align 4
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  h0 = i0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // NATIVE-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile i32, ptr @i0, align 4
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = sitofp i32 %[[#A]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile i32, ptr @i0, align 4
+  // NATIVE-LLVM-NEXT: %[[#B:]] = sitofp i32 %[[#A]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  i0 = h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !s32i, !cir.ptr<!s32i>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptosi half %[[#A]] to i32
+  // NONATIVE-LLVM-NEXT: store volatile i32 %[[#B]], ptr @i0, align 4
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptosi half %[[#A]] to i32
+  // NATIVE-LLVM-NEXT: store volatile i32 %[[#B]], ptr @i0, align 4
+
+  h0 += h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fadd half %{{.+}}, %{{.+}}
+
+  h0 += (_Float16)1.0f;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#D:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.binop(add, %[[#D]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.cast floating %[[#E]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#F]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.float -> !cir.f16
+  //      NATIVE: %[[#C:]] = cir.binop(add, %{{.+}}, %[[#B]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fadd float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fadd half %{{.+}}, 0xH3C00
+
+  h0 += f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 += h0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(add, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast float_to_int %[[#C]] : !cir.float -> !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATVE-LLVM: %[[#LHS:]] = sitofp i32 %3 to float
+  // NONATVE-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %[[#RHS]]
+  // NONATVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fadd half %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 += i0;
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(add, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#C:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#D:]] = fadd float %[[#C]], %[[#B]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#D]] to half
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fadd half %{{.+}}, %[[#A]]
+
+  h0 -= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fsub half %{{.+}}, %{{.+}}
+
+  h0 -= (_Float16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  //      NATIVE: %[[#C:]] = cir.binop(sub, %{{.+}}, %[[#B]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fsub float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fsub half %{{.+}}, 0xH3C00
+
+  h0 -= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 -= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(sub, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast float_to_int %[[#C]] : !cir.float -> !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATVE-LLVM: %[[#LHS:]] = sitofp i32 %3 to float
+  // NONATVE-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %[[#RHS]]
+  // NONATVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fsub half %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 -= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(sub, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#C:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#D:]] = fsub float %[[#C]], %[[#B]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#D]] to half
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fsub half %{{.+}}, %[[#A]]
+
+  h0 *= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(mul, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, %{{.+}}
+
+  h0 *= (_Float16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  //      NATIVE: %[[#C:]] = cir.binop(mul, %{{.+}}, %[[#B]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fmul float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, 0xH3C00
+
+  h0 *= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 *= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(mul, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast float_to_int %[[#C]] : !cir.float -> !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATVE-LLVM: %[[#LHS:]] = sitofp i32 %3 to float
+  // NONATVE-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %[[#RHS]]
+  // NONATVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fmul half %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 *= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(mul, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#C:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#D:]] = fmul float %[[#C]], %[[#B]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#D]] to half
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fmul half %{{.+}}, %[[#A]]
+
+  h0 /= h1;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, %{{.+}}
+
+  h0 /= (_Float16)1.0;
+  //      NONATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#D:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.binop(div, %[[#D]], %[[#C]]) : !cir.float
+  // NONATIVE-NEXT: %[[#F:]] = cir.cast floating %[[#E]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#F]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // NATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  //      NATIVE: %[[#C:]] = cir.binop(div, %{{.+}}, %[[#B]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fdiv float %[[#A]], 1.000000e+00
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#B]] to half
+
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, 0xH3C00
+
+  h0 /= f2;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  //      NATIVE-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // NATIVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 /= h0;
+  //      NONATIVE: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#B:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.float
+  // NONATIVE-NEXT: %[[#C:]] = cir.binop(div, %[[#B]], %[[#A]]) : !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast float_to_int %[[#C]] : !cir.float -> !s32i
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NATIVE-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      NONATVE-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  //      NONATVE-LLVM: %[[#LHS:]] = sitofp i32 %3 to float
+  // NONATVE-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %[[#RHS]]
+  // NONATVE-LLVM-NEXT: %{{.+}} = fptosi float %[[#RES]] to i32
+
+  //      NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fdiv half %[[#A]], %{{.+}}
+  // NATIVE-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 /= i0;
+  //      NONATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // NONATIVE-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.f16 -> !cir.float
+  //      NONATIVE: %[[#C:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.binop(div, %[[#C]], %[[#B]]) : !cir.float
+  // NONATIVE-NEXT: %[[#E:]] = cir.cast floating %[[#D]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: cir.store volatile %[[#E]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      NATIVE: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.f16
+  // NATIVE-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  //      NONATIVE-LLVM: %[[#C:]] = fpext half %{{.+}} to float
+  // NONATIVE-LLVM-NEXT: %[[#D:]] = fdiv float %[[#C]], %[[#B]]
+  // NONATIVE-LLVM-NEXT: %{{.+}} = fptrunc float %[[#D]] to half
+
+  // NATIVE-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // NATIVE-LLVM: %{{.+}} = fdiv half %{{.+}}, %[[#A]]
+
+  h0 = d0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  h0 = (float)d0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+  // NONATIVE-NEXT: %[[#E:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.float
+  // NATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+  // NATIVE-NEXT: %[[#E:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to float
+  // NONATIVE-LLVM-NEXT: %[[#C:]] = fptrunc float %[[#B]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#C]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to float
+  // NATIVE-LLVM-NEXT: %[[#C:]] = fptrunc float %[[#B]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#C]], ptr @h0, align 2
+
+  d0 = h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.double
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.double
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NONATVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NONATVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to double
+  // NONATVE-LLVM-NEXT: store volatile double %[[#B]], ptr @d0, align 8
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to double
+  // NATIVE-LLVM-NEXT: store volatile double %[[#B]], ptr @d0, align 8
+
+  d0 = (float)h0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.float
+  // NONATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.double
+  // NONATIVE-NEXT: %[[#E:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NONATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // NATIVE-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.float
+  // NATIVE-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.double
+  // NATIVE-NEXT: %[[#E:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // NATIVE-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      NONATVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NONATVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  // NONATVE-LLVM-NEXT: %[[#C:]] = fpext float %[[#B]] to double
+  // NONATVE-LLVM-NEXT: store volatile double %[[#C]], ptr @d0, align 8
+
+  //      NATIVE-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  // NATIVE-LLVM-NEXT: %[[#C:]] = fpext float %[[#B]] to double
+  // NATIVE-LLVM-NEXT: store volatile double %[[#C]], ptr @d0, align 8
+
+  h0 = s0;
+  //      NONATIVE: %[[#A:]] = cir.get_global @s0 : !cir.ptr<!s16i>
+  // NONATIVE-NEXT: %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!s16i>, !s16i
+  // NONATIVE-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s16i -> !cir.f16
+  // NONATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NONATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NATIVE: %[[#A:]] = cir.get_global @s0 : !cir.ptr<!s16i>
+  // NATIVE-NEXT: %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!s16i>, !s16i
+  // NATIVE-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s16i -> !cir.f16
+  // NATIVE-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // NATIVE-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      NONATIVE-LLVM: %[[#A:]] = load i16, ptr @s0, align 2
+  // NONATIVE-LLVM-NEXT: %[[#B:]] = sitofp i16 %[[#A]] to half
+  // NONATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  //      NATIVE-LLVM: %[[#A:]] = load i16, ptr @s0, align 2
+  // NATIVE-LLVM-NEXT: %[[#B:]] = sitofp i16 %[[#A]] to half
+  // NATIVE-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/forward-decls.cpp b/clang/test/CIR/Incubator/CodeGen/forward-decls.cpp
new file mode 100644
index 0000000000000..059ef9c6e860e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/forward-decls.cpp
@@ -0,0 +1,124 @@
+// RUN: split-file %s %t
+
+
+//--- incomplete_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/incomplete_struct -o %t/incomplete_struct.cir
+// RUN: FileCheck %s --input-file=%t/incomplete_struct.cir --check-prefix=CHECK1
+
+// Forward declaration of the record is never defined, so it is created as
+// an incomplete struct in CIR and will remain as such.
+
+// CHECK1: ![[INC_STRUCT:.+]] = !cir.record<struct "IncompleteStruct" incomplete>
+struct IncompleteStruct;
+// CHECK1: testIncompleteStruct(%arg0: !cir.ptr<![[INC_STRUCT]]>
+void testIncompleteStruct(struct IncompleteStruct *s) {};
+
+
+
+//--- mutated_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/mutated_struct -o %t/mutated_struct.cir
+// RUN: FileCheck %s --input-file=%t/mutated_struct.cir --check-prefix=CHECK2
+
+// Foward declaration of the struct is followed by usage, then definition.
+// This means it will initially be created as incomplete, then completed.
+
+// CHECK2: ![[COMPLETE:.+]] = !cir.record<struct "ForwardDeclaredStruct" {!s32i} #cir.record.decl.ast>
+// CHECK2: testForwardDeclaredStruct(%arg0: !cir.ptr<![[COMPLETE]]>
+struct ForwardDeclaredStruct;
+void testForwardDeclaredStruct(struct ForwardDeclaredStruct *fds) {};
+struct ForwardDeclaredStruct {
+  int testVal;
+};
+
+
+
+//--- recursive_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/recursive_struct -o %t/recursive_struct.cir
+// RUN: FileCheck --check-prefix=CHECK3 --input-file=%t/recursive_struct.cir %s
+
+// Struct is initially forward declared since the self-reference is generated
+// first. Then, once the type is fully generated, it is completed.
+
+// CHECK3: ![[STRUCT:.+]] = !cir.record<struct "RecursiveStruct" {!s32i, !cir.ptr<!cir.record<struct "RecursiveStruct">>} #cir.record.decl.ast>
+struct RecursiveStruct {
+  int value;
+  struct RecursiveStruct *next;
+};
+// CHECK3: testRecursiveStruct(%arg0: !cir.ptr<![[STRUCT]]>
+void testRecursiveStruct(struct RecursiveStruct *arg) {
+  // CHECK3: %[[#NEXT:]] = cir.get_member %{{.+}}[1] {name = "next"} : !cir.ptr<![[STRUCT]]> -> !cir.ptr<!cir.ptr<![[STRUCT]]>>
+  // CHECK3: %[[#DEREF:]] = cir.load{{.*}} %[[#NEXT]] : !cir.ptr<!cir.ptr<![[STRUCT]]>>, !cir.ptr<![[STRUCT]]>
+  // CHECK3: cir.get_member %[[#DEREF]][0] {name = "value"} : !cir.ptr<![[STRUCT]]> -> !cir.ptr<!s32i>
+  arg->next->value;
+}
+
+
+
+//--- indirect_recursive_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/indirect_recursive_struct -o %t/indirect_recursive_struct.cir
+// RUN: FileCheck --check-prefix=CHECK4 --input-file=%t/indirect_recursive_struct.cir %s
+
+// Node B refers to A, and vice-versa, so a forward declaration is used to
+// ensure the classes can be defined. Since types alias are not yet supported
+// in recursive type, each struct is expanded until there are no more recursive
+// types, or all the recursive types are self references.
+
+// CHECK4: ![[B:.+]] = !cir.record<struct "StructNodeB" {!s32i, !cir.ptr<!cir.record<struct "StructNodeA" {!s32i, !cir.ptr<!cir.record<struct "StructNodeB">>}
+// CHECK4: ![[A:.+]] = !cir.record<struct "StructNodeA" {!s32i, !cir.ptr<![[B]]>}>
+struct StructNodeB;
+struct StructNodeA {
+  int value;
+  struct StructNodeB *next;
+};
+struct StructNodeB {
+  int value;
+  struct StructNodeA *next;
+};
+
+void testIndirectSelfReference(struct StructNodeA arg) {
+  // CHECK4: %[[#V1:]] = cir.get_member %{{.+}}[1] {name = "next"} : !cir.ptr<![[A]]> -> !cir.ptr<!cir.ptr<![[B]]>>
+  // CHECK4: %[[#V2:]] = cir.load{{.*}} %[[#V1]] : !cir.ptr<!cir.ptr<![[B]]>>, !cir.ptr<![[B]]>
+  // CHECK4: %[[#V3:]] = cir.get_member %[[#V2]][1] {name = "next"} : !cir.ptr<![[B]]> -> !cir.ptr<!cir.ptr<![[A]]>>
+  // CHECK4: %[[#V4:]] = cir.load{{.*}} %[[#V3]] : !cir.ptr<!cir.ptr<![[A]]>>, !cir.ptr<![[A]]>
+  // CHECK4: cir.get_member %[[#V4]][0] {name = "value"} : !cir.ptr<![[A]]> -> !cir.ptr<!s32i>
+  arg.next->next->value;
+}
+
+
+
+//--- complex_struct
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %t/complex_struct -o %t/complex_struct.cir
+// RUN: FileCheck --check-prefix=CHECK5 --input-file=%t/complex_struct.cir %s
+
+// A sizeable complex struct just to double check that stuff is working.
+// CHECK5: !cir.record<struct "anon.0" {!cir.ptr<!cir.record<struct "A" {!cir.record<struct "anon.0">, !cir.record<struct "B" {!cir.ptr<!cir.record<struct "B">>, !cir.record<struct "C" {!cir.ptr<!cir.record<struct "A">>, !cir.ptr<!cir.record<struct "B">>, !cir.ptr<!cir.record<struct "C">>} #cir.record.decl.ast>, !cir.record<union "anon.1" {!cir.ptr<!cir.record<struct "A">>, !cir.record<struct "anon.2" {!cir.ptr<!cir.record<struct "B">>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>>} #cir.record.decl.ast>
+// CHECK5: !cir.record<struct "C" {!cir.ptr<!cir.record<struct "A" {!rec_anon2E0, !cir.record<struct "B" {!cir.ptr<!cir.record<struct "B">>, !cir.record<struct "C">, !cir.record<union "anon.1" {!cir.ptr<!cir.record<struct "A">>, !cir.record<struct "anon.2" {!cir.ptr<!cir.record<struct "B">>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>>, !cir.ptr<!cir.record<struct "B" {!cir.ptr<!cir.record<struct "B">>, !cir.record<struct "C">, !cir.record<union "anon.1" {!cir.ptr<!cir.record<struct "A" {!rec_anon2E0, !cir.record<struct "B">} #cir.record.decl.ast>>, !cir.record<struct "anon.2" {!cir.ptr<!cir.record<struct "B">>} #cir.record.decl.ast>} #cir.record.decl.ast>} #cir.record.decl.ast>>, !cir.ptr<!cir.record<struct "C">>} #cir.record.decl.ast>
+// CHECK5: !cir.record<struct "anon.2" {!cir.ptr<!cir.record<struct "B" {!cir.ptr<!cir.record<struct "B">>, !rec_C, !cir.record<union "anon.1" {!cir.ptr<!cir.record<struct "A" {!rec_anon2E0, !cir.record<struct "B">} #cir.record.decl.ast>>, !cir.record<struct "anon.2">} #cir.record.decl.ast>} #cir.record.decl.ast>>} #cir.record.decl.ast>
+// CHECK5: !cir.record<union "anon.1" {!cir.ptr<!cir.record<struct "A" {!rec_anon2E0, !cir.record<struct "B" {!cir.ptr<!cir.record<struct "B">>, !rec_C, !cir.record<union "anon.1">} #cir.record.decl.ast>} #cir.record.decl.ast>>, !rec_anon2E2} #cir.record.decl.ast>
+// CHECK5: !cir.record<struct "B" {!cir.ptr<!cir.record<struct "B">>, !rec_C, !rec_anon2E1} #cir.record.decl.ast>
+// CHECK5: !cir.record<struct "A" {!rec_anon2E0, !rec_B} #cir.record.decl.ast>
+struct A {
+  struct {
+    struct A *a1;
+  };
+  struct B {
+    struct B *b1;
+    struct C {
+      struct A *a2;
+      struct B *b2;
+      struct C *c1;
+    } c;
+    union {
+      struct A *a2;
+      struct {
+        struct B *b3;
+      };
+    } u;
+  } b;
+};
+void test(struct A *a){};
diff --git a/clang/test/CIR/Incubator/CodeGen/fp16-ops.c b/clang/test/CIR/Incubator/CodeGen/fp16-ops.c
new file mode 100644
index 0000000000000..8a0302fe93e96
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/fp16-ops.c
@@ -0,0 +1,805 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -o %t.cir %s
+// FileCheck --input-file=%t.cir --check-prefix=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o %t.ll %s
+// FileCheck --input-file=%t.ll --check-prefix=CHECK-LLVM %s
+
+// TODO: once we have support for targets that does not have native fp16
+//       support but have fp16 conversion intrinsic support, add tests for
+//       these targets.
+
+volatile unsigned test;
+volatile int i0;
+volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
+volatile float f0, f1, f2;
+volatile double d0;
+short s0;
+
+void foo(void) {
+  test = (h0);
+  // CHECK: %{{.+}} = cir.cast float_to_int %{{.+}} : !cir.f16 -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fptoui half %{{.+}} to i32
+
+  h0 = (test);
+  // CHECK: %{{.+}} = cir.cast int_to_float %{{.+}} : !u32i -> !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = uitofp i32 %{{.+}} to half
+
+  test = (!h1);
+  //      CHECK: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.f16 -> !cir.bool
+  // CHECK-NEXT: %[[#B:]] = cir.unary(not, %[[#A]]) : !cir.bool, !cir.bool
+  // CHECK-NEXT: %[[#C:]] = cir.cast bool_to_int %[[#B]] : !cir.bool -> !s32i
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // CHECK-LLVM-NEXT: %[[#B:]] = zext i1 %[[#A]] to i8
+  // CHECK-LLVM-NEXT: %[[#C:]] = xor i8 %[[#B]], 1
+  // CHECK-LLVM-NEXT: %{{.+}} = zext i8 %[[#C]] to i32
+
+  h1 = -h1;
+  //  CHECK-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //  CHECK-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.float -> !cir.f16
+  //      CHECK: %{{.+}} = cir.unary(minus, %{{.+}}) : !cir.f16, !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = fneg half %{{.+}}
+
+  h1 = +h1;
+  //  CHECK-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //  CHECK-NOT: %{{.+}} = cir.cast floating %{{.+}} : !cir.float -> !cir.f16
+  //      CHECK: %{{.+}} = cir.unary(plus, %{{.+}}) : !cir.f16, !cir.f16
+
+  //      CHECK-LLVM: %[[#A:]] = load volatile half, ptr @h1, align 2
+  // CHECK-LLVM-NEXT: store volatile half %[[#A]], ptr @h1, align 2
+
+  h1++;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // CHECK-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  ++h1;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // CHECK-LLVM: %{.+} = fadd half %{.+}, 0xH3C00
+
+  --h1;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // CHECK-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  h1--;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<-1.000000e+00> : !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  // CHECK-LLVM: %{.+} = fadd half %{.+}, 0xHBC00
+
+  h1 = h0 * h2;
+  // CHECK: %{{.+}} = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = fmul half %{{.+}}, %{{.+}}
+
+  h1 = h0 * (__fp16) -2.0f;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#C]]) : !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = fmul half %{{.+}}, 0xHC000
+
+  h1 = h0 * f2;
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // CHECK-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      CHECK-LLVM: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = f0 * h2;
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.float
+  // CHECK-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %[[#RES:]] = fmul float %{{.+}}, %[[#RHS]]
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = h0 * i0;
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.+}} = fmul half %{{.+}}, %[[#A]]
+
+  h1 = (h0 / h2);
+  // CHECK: %{{.+}} = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = fdiv half %{{.+}}, %{{.+}}
+
+  h1 = (h0 / (__fp16) -2.0f);
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#C]]) : !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = fdiv half %{{.+}}, 0xHC000
+
+  h1 = (h0 / f2);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // CHECK-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      CHECK-LLVM: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (f0 / h2);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.float
+  // CHECK-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %[[#RES:]] = fdiv float %{{.+}}, %[[#RHS]]
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (h0 / i0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.+}} = fdiv half %{{.+}}, %[[#A]]
+
+  h1 = (h2 + h0);
+  // CHECK: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = fadd half %{{.+}}, %{{.+}}
+
+  h1 = ((__fp16)-2.0 + h0);
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.f16
+  //      CHECK: %{{.+}} = cir.binop(add, %[[#C]], %{{.+}}) : !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = fadd half 0xHC000, %{{.+}}
+
+  h1 = (h2 + f0);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // CHECK-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      CHECK-LLVM: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (f2 + h0);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.float
+  // CHECK-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.=}} to float
+  // CHECK-LLVM-NEXT: %[[#RES:]] = fadd float %{{.+}}, %[[#RHS]]
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (h0 + i0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.+}} = fadd half %{{.+}}, %[[#A]]
+
+  h1 = (h2 - h0);
+  // CHECK: %{{.+}} = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = fsub half %{{.+}}, %{{.+}}
+
+  h1 = ((__fp16)-2.0f - h0);
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  //      CHECK: %{{.+}} = cir.binop(sub, %[[#C]], %{{.+}}) : !cir.f16
+
+  // CHECK-LLVM: %{{.+}} = fsub half 0xHC000, %{{.+}}
+
+  h1 = (h2 - f0);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // CHECK-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  //      CHECK-LLVM: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (f2 - h0);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.float
+  // CHECK-NEXT: %{{.+}} = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+
+  //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.=}} to float
+  // CHECK-LLVM-NEXT: %[[#RES:]] = fsub float %{{.+}}, %[[#RHS]]
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  h1 = (h0 - i0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %{{.+}} = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.f16
+
+  //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.+}} = fsub half %{{.+}}, %[[#A]]
+
+  test = (h2 < h0);
+  //      CHECK: %[[#A:]] = cir.cmp(lt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp olt half %{{.+}}, %{{.+}}
+
+  test = (h2 < (__fp16)42.0);
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cmp(lt, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp olt half %{{.+}}, 0xH5140
+
+  test = (h2 < f0);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM: %{{.+}} = fcmp olt float %[[#A]], %{{.+}}
+
+  test = (f2 < h0);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#A:]] = fpext half %{{.=}} to float
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp olt float %{{.+}}, %[[#A]]
+
+  test = (i0 < h0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.cmp(lt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fcmp olt half %[[#A]], %{{.+}}
+
+  test = (h0 < i0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(lt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp olt half %{{.+}}, %[[#A]]
+
+  test = (h0 > h2);
+  //      CHECK: %[[#A:]] = cir.cmp(gt, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp ogt half %{{.+}}, %{{.+}}
+
+  test = ((__fp16)42.0 > h2);
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  //      CHECK: %[[#C:]] = cir.cmp(gt, %[[#B]], %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp ogt half 0xH5140, %{{.+}}
+
+  test = (h0 > f2);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
+  // CHECK-LLVM: %{{.+}} = fcmp ogt float %[[#LHS]], %{{.+}}
+
+  test = (f0 > h2);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp ogt float %{{.+}}, %[[#RHS]]
+
+  test = (i0 > h0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.cmp(gt, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fcmp ogt half %[[#LHS]], %{{.+}}
+
+  test = (h0 > i0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(gt, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp ogt half %{{.+}}, %[[#RHS]]
+
+  test = (h2 <= h0);
+  //      CHECK: %[[#A:]] = cir.cmp(le, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp ole half %{{.+}}, %{{.+}}
+
+  test = (h2 <= (__fp16)42.0);
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<4.200000e+01> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cmp(le, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp ole half %{{.+}}, 0xH5140
+
+  test = (h2 <= f0);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM: %{{.+}} = fcmp ole float %[[#LHS]], %{{.+}}
+
+  test = (f2 <= h0);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp ole float %{{.+}}, %[[#RHS]]
+
+  test = (i0 <= h0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.cmp(le, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fcmp ole half %[[#LHS]], %{{.+}}
+
+  test = (h0 <= i0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(le, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp ole half %{{.+}}, %[[#RHS]]
+
+  test = (h0 >= h2);
+  //      CHECK: %[[#A:]] = cir.cmp(ge, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp oge half %{{.+}}, %{{.+}}
+
+  test = (h0 >= (__fp16)-2.0);
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.double, !cir.double
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.f16
+  // CHECK-NEXT: %[[#D:]] = cir.cmp(ge, %{{.+}}, %[[#C]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#D]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp oge half %{{.+}}, 0xHC000
+
+  test = (h0 >= f2);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM: %{{.+}} = fcmp oge float %[[#LHS]], %{{.+}}
+
+  test = (f0 >= h2);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp oge float %{{.+}}, %[[#RHS]]
+
+  test = (i0 >= h0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.cmp(ge, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fcmp oge half %[[#LHS]], %{{.+}}
+
+  test = (h0 >= i0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(ge, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp oge half %{{.+}}, %[[#RHS]]
+
+  test = (h1 == h2);
+  //      CHECK: %[[#A:]] = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, %{{.+}}
+
+  test = (h1 == (__fp16)1.0);
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cmp(eq, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp oeq half %{{.+}}, 0xH3C00
+
+  test = (h1 == f1);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM: %{{.+}} = fcmp oeq float %[[#LHS]], %{{.+}}
+
+  test = (f1 == h1);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#RHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp oeq float %{{.+}}, %[[#RHS]]
+
+  test = (i0 == h0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.cmp(eq, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fcmp oeq half %[[#LHS]], %{{.+}}
+
+  test = (h0 == i0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(eq, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.=}} = fcmp oeq half %{{.+}}, %[[#RHS]]
+
+  test = (h1 != h2);
+  //      CHECK: %[[#A:]] = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#A]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp une half %{{.+}}, %{{.+}}
+
+  test = (h1 != (__fp16)1.0);
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cmp(ne, %{{.+}}, %[[#B]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#C]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %{{.+}} = fcmp une half %{{.+}}, 0xH3C00
+
+  test = (h1 != f1);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  //      CHECK: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = fpext half %{{.=}} to float
+  // CHECK-LLVM: %{{.+}} = fcmp une float %[[#LHS]], %{{.+}}
+
+  test = (f1 != h1);
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.float, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#A:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp une float %{{.+}}, %[[#A]]
+
+  test = (i0 != h0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.cmp(ne, %[[#A]], %{{.+}}) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  // CHECK-LLVM: %[[#LHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fcmp une half %[[#LHS]], %{{.+}}
+
+  test = (h0 != i0);
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.cmp(ne, %{{.+}}, %[[#A]]) : !cir.f16, !cir.bool
+  // CHECK-NEXT: %{{.+}} = cir.cast integral %[[#B]] : !s32i -> !u32i
+
+  //      CHECK-LLVM: %[[#RHS:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %{{.+}} = fcmp une half %{{.+}}, %[[#RHS]]
+
+  h1 = (h1 ? h2 : h0);
+  //      CHECK: %[[#A:]] = cir.cast float_to_bool %{{.+}} : !cir.f16 -> !cir.bool
+  // CHECK-NEXT: %[[#B:]] = cir.ternary(%[[#A]], true {
+  //      CHECK:   cir.yield %{{.+}} : !cir.f16
+  // CHECK-NEXT: }, false {
+  //      CHECK:   cir.yield %{{.+}} : !cir.f16
+  // CHECK-NEXT: }) : (!cir.bool) -> !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.get_global @h1 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM:   %[[#A:]] = fcmp une half %{{.+}}, 0xH0000
+  // CHECK-LLVM-NEXT:   br i1 %[[#A]], label %[[#LABEL_A:]], label %[[#LABEL_B:]]
+  //      CHECK-LLVM: [[#LABEL_A]]:
+  // CHECK-LLVM-NEXT:   %[[#B:]] = load volatile half, ptr @h2, align 2
+  // CHECK-LLVM-NEXT:   br label %[[#LABEL_C:]]
+  //      CHECK-LLVM: [[#LABEL_B]]:
+  // CHECK-LLVM-NEXT:   %[[#C:]] = load volatile half, ptr @h0, align 2
+  // CHECK-LLVM-NEXT:   br label %[[#LABEL_C]]
+  //      CHECK-LLVM: [[#LABEL_C]]:
+  // CHECK-LLVM-NEXT:   %8 = phi half [ %[[#C]], %[[#LABEL_B]] ], [ %[[#B]], %[[#LABEL_A]] ]
+
+  h0 = h1;
+  //      CHECK: %[[#A:]] = cir.get_global @h1 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: cir.store volatile %[[#B]], %[[#C]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#A:]] = load volatile half, ptr @h1, align 2
+  // CHECK-LLVM-NEXT: store volatile half %[[#A]], ptr @h0, align 2
+
+  h0 = (__fp16)-2.0f;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<2.000000e+00> : !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.unary(minus, %[[#A]]) : !cir.float, !cir.float
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // CHECK-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: store volatile half 0xHC000, ptr @h0, align 2
+
+  h0 = f0;
+  //      CHECK: %[[#A:]] = cir.get_global @f0 : !cir.ptr<!cir.float>
+  // CHECK-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // CHECK-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#A:]] = load volatile float, ptr @f0, align 4
+  // CHECK-LLVM-NEXT: %[[#B:]] = fptrunc float %[[#A]] to half
+  // CHECK-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  h0 = i0;
+  //      CHECK: %[[#A:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // CHECK-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#A:]] = load volatile i32, ptr @i0, align 4
+  // CHECK-LLVM-NEXT: %[[#B:]] = sitofp i32 %[[#A]] to half
+  // CHECK-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  i0 = h0;
+  //      CHECK: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // CHECK-NEXT: %[[#D:]] = cir.get_global @i0 : !cir.ptr<!s32i>
+  // CHECK-NEXT: cir.store volatile %[[#C]], %[[#D]] : !s32i, !cir.ptr<!s32i>
+
+  //      CHECK-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // CHECK-LLVM-NEXT: %[[#B:]] = fptosi half %[[#A]] to i32
+  // CHECK-LLVM-NEXT: store volatile i32 %[[#B]], ptr @i0, align 4
+
+  h0 += h1;
+  //      CHECK: %[[#A:]] = cir.binop(add, %{{.+}}, %{{.+}}) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %{{.+}} = fadd half %{{.+}}, %{{.+}}
+
+  h0 += (__fp16)1.0f;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.float -> !cir.f16
+  //      CHECK: %[[#C:]] = cir.binop(add, %{{.+}}, %[[#B]]) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %{{.+}} = fadd half %{{.+}}, 0xH3C00
+
+  h0 += f2;
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.float
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %[[#RES:]] = fadd float %[[#LHS]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 += h0;
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.binop(add, %[[#A]], %{{.+}}) : !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %[[#B:]] = fadd half %[[#A]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 += i0;
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.binop(add, %{{.+}}, %[[#A]]) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fadd half %{{.+}}, %[[#A]]
+
+  h0 -= h1;
+  //      CHECK: %[[#A:]] = cir.binop(sub, %{{.+}}, %{{.+}}) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %{{.+}} = fsub half %{{.+}}, %{{.+}}
+
+  h0 -= (__fp16)1.0;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  //      CHECK: %[[#C:]] = cir.binop(sub, %{{.+}}, %[[#B]]) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %{{.+}} = fsub half %{{.+}}, 0xH3C00
+
+  h0 -= f2;
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.float
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %[[#RES:]] = fsub float %[[#LHS]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 -= h0;
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.binop(sub, %[[#A]], %{{.+}}) : !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %[[#B:]] = fsub half %[[#A]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 -= i0;
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.binop(sub, %{{.+}}, %[[#A]]) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fsub half %{{.+}}, %[[#A]]
+
+  h0 *= h1;
+  //      CHECK: %[[#A:]] = cir.binop(mul, %{{.+}}, %{{.+}}) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %{{.+}} = fmul half %{{.+}}, %{{.+}}
+
+  h0 *= (__fp16)1.0;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  //      CHECK: %[[#C:]] = cir.binop(mul, %{{.+}}, %[[#B]]) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %{{.+}} = fmul half %{{.+}}, 0xH3C00
+
+  h0 *= f2;
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.float
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %[[#RES:]] = fmul float %[[#LHS]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 *= h0;
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.binop(mul, %[[#A]], %{{.+}}) : !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %[[#B:]] = fmul half %[[#A]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 *= i0;
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.binop(mul, %{{.+}}, %[[#A]]) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fmul half %{{.+}}, %[[#A]]
+
+  h0 /= h1;
+  //      CHECK: %[[#A:]] = cir.binop(div, %{{.+}}, %{{.+}}) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#A]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %{{.+}} = fdiv half %{{.+}}, %{{.+}}
+
+  h0 /= (__fp16)1.0;
+  //      CHECK: %[[#A:]] = cir.const #cir.fp<1.000000e+00> : !cir.double
+  // CHECK-NEXT: %[[#B:]] = cir.cast floating %[[#A]] : !cir.double -> !cir.f16
+  //      CHECK: %[[#C:]] = cir.binop(div, %{{.+}}, %[[#B]]) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %{{.+}} = fdiv half %{{.+}}, 0xH3C00
+
+  h0 /= f2;
+  //      CHECK: %[[#A:]] = cir.cast floating %{{.+}} : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.float
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.float -> !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#LHS:]] = fpext half %{{.+}} to float
+  // CHECK-LLVM-NEXT: %[[#RES:]] = fdiv float %[[#LHS]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptrunc float %[[#RES]] to half
+
+  i0 /= h0;
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  // CHECK-NEXT: %[[#B:]] = cir.binop(div, %[[#A]], %{{.+}}) : !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cast float_to_int %[[#B]] : !cir.f16 -> !s32i
+  // CHECK-NEXT: cir.store volatile %[[#C]], %{{.+}} : !s32i, !cir.ptr<!s32i>
+
+  //      CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM-NEXT: %[[#B:]] = fdiv half %[[#A]], %{{.+}}
+  // CHECK-LLVM-NEXT: %{{.+}} = fptosi half %[[#B]] to i32
+
+  h0 /= i0;
+  //      CHECK: %[[#A:]] = cir.cast int_to_float %{{.+}} : !s32i -> !cir.f16
+  //      CHECK: %[[#B:]] = cir.binop(div, %{{.+}}, %[[#A]]) : !cir.f16
+  // CHECK-NEXT: cir.store volatile %[[#B]], %{{.+}} : !cir.f16, !cir.ptr<!cir.f16>
+
+  // CHECK-LLVM: %[[#A:]] = sitofp i32 %{{.+}} to half
+  // CHECK-LLVM: %{{.+}} = fdiv half %{{.+}}, %[[#A]]
+
+  h0 = d0;
+  //      CHECK: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // CHECK-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.f16
+  // CHECK-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // CHECK-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to half
+  // CHECK-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+
+  h0 = (float)d0;
+  //      CHECK: %[[#A:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // CHECK-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.double>, !cir.double
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.double -> !cir.float
+  // CHECK-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.f16
+  // CHECK-NEXT: %[[#E:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#A:]] = load volatile double, ptr @d0, align 8
+  // CHECK-LLVM-NEXT: %[[#B:]] = fptrunc double %[[#A]] to float
+  // CHECK-LLVM-NEXT: %[[#C:]] = fptrunc float %[[#B]] to half
+  // CHECK-LLVM-NEXT: store volatile half %[[#C]], ptr @h0, align 2
+
+  d0 = h0;
+  //      CHECK: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.double
+  // CHECK-NEXT: %[[#D:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // CHECK-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      CHECK-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // CHECK-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to double
+  // CHECK-LLVM-NEXT: store volatile double %[[#B]], ptr @d0, align 8
+
+  d0 = (float)h0;
+  //      CHECK: %[[#A:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: %[[#B:]] = cir.load volatile %[[#A]] : !cir.ptr<!cir.f16>, !cir.f16
+  // CHECK-NEXT: %[[#C:]] = cir.cast floating %[[#B]] : !cir.f16 -> !cir.float
+  // CHECK-NEXT: %[[#D:]] = cir.cast floating %[[#C]] : !cir.float -> !cir.double
+  // CHECK-NEXT: %[[#E:]] = cir.get_global @d0 : !cir.ptr<!cir.double>
+  // CHECK-NEXT: cir.store volatile %[[#D]], %[[#E]] : !cir.double, !cir.ptr<!cir.double>
+
+  //      CHECK-LLVM: %[[#A:]] = load volatile half, ptr @h0, align 2
+  // CHECK-LLVM-NEXT: %[[#B:]] = fpext half %[[#A]] to float
+  // CHECK-LLVM-NEXT: %[[#C:]] = fpext float %[[#B]] to double
+  // CHECK-LLVM-NEXT: store volatile double %[[#C]], ptr @d0, align 8
+
+  h0 = s0;
+  //      CHECK: %[[#A:]] = cir.get_global @s0 : !cir.ptr<!s16i>
+  // CHECK-NEXT: %[[#B:]] = cir.load %[[#A]] : !cir.ptr<!s16i>, !s16i
+  // CHECK-NEXT: %[[#C:]] = cir.cast int_to_float %[[#B]] : !s16i -> !cir.f16
+  // CHECK-NEXT: %[[#D:]] = cir.get_global @h0 : !cir.ptr<!cir.f16>
+  // CHECK-NEXT: cir.store volatile %[[#C]], %[[#D]] : !cir.f16, !cir.ptr<!cir.f16>
+
+  //      CHECK-LLVM: %[[#A:]] = load i16, ptr @s0, align 2
+  // CHECK-LLVM-NEXT: %[[#B:]] = sitofp i16 %[[#A]] to half
+  // CHECK-LLVM-NEXT: store volatile half %[[#B]], ptr @h0, align 2
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/fullexpr.cpp b/clang/test/CIR/Incubator/CodeGen/fullexpr.cpp
new file mode 100644
index 0000000000000..128ad6a54aaea
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/fullexpr.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir-flat %s -o %t.cir.flat
+// RUN: FileCheck --check-prefix=FLAT  --input-file=%t.cir.flat %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -o - %s \
+// RUN: | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+int go(int const& val);
+
+int go1() {
+  auto x = go(1);
+  return x;
+}
+
+// CHECK: cir.func {{.*}} @_Z3go1v() -> !s32i
+// CHECK: %[[#XAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK: %[[#RVal:]] = cir.scope {
+// CHECK-NEXT:   %[[#TmpAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %[[#One:]] = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.store{{.*}} %[[#One]], %[[#TmpAddr]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %[[#RValTmp:]] = cir.call @_Z2goRKi(%[[#TmpAddr]]) : (!cir.ptr<!s32i>) -> !s32i
+// CHECK-NEXT:   cir.yield %[[#RValTmp]] : !s32i
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.store{{.*}} %[[#RVal]], %[[#XAddr]] : !s32i, !cir.ptr<!s32i>
+
+// FLAT: cir.func {{.*}} @_Z3go1v() -> !s32i
+// FLAT: %[[#TmpAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init] {alignment = 4 : i64}
+// FLAT: %[[#XAddr:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// FLAT: cir.br ^[[before_body:.*]]{{ loc.*}}
+// FLAT-NEXT: ^[[before_body]]:  // pred: ^bb0
+// FLAT-NEXT:   %[[#One:]] = cir.const #cir.int<1> : !s32i
+// FLAT-NEXT:   cir.store{{.*}} %[[#One]], %[[#TmpAddr]] : !s32i, !cir.ptr<!s32i>
+// FLAT-NEXT:   %[[#RValTmp:]] = cir.call @_Z2goRKi(%[[#TmpAddr]]) : (!cir.ptr<!s32i>) -> !s32i
+// FLAT-NEXT:   cir.br ^[[continue_block:.*]](%[[#RValTmp]] : !s32i) {{loc.*}}
+// FLAT-NEXT: ^[[continue_block]](%[[#BlkArgRval:]]: !s32i {{loc.*}}):  // pred: ^[[before_body]]
+// FLAT-NEXT:   cir.store{{.*}} %[[#BlkArgRval]], %[[#XAddr]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @_Z3go1v()
+// LLVM-NEXT: %[[#TmpAddr:]] = alloca i32, i64 1, align 4
+// LLVM: br label %[[before_body:[0-9]+]]
+// LLVM: [[before_body]]:
+// LLVM-NEXT: store i32 1, ptr %[[#TmpAddr]], align 4
+// LLVM-NEXT: %[[#RValTmp:]] = call i32 @_Z2goRKi(ptr %[[#TmpAddr]])
+// LLVM-NEXT: br label %[[continue_block:[0-9]+]]
+
+// LLVM: [[continue_block]]:
+// LLVM-NEXT: [[PHI:%.*]] = phi i32 [ %[[#RValTmp]], %[[before_body]] ]
+// LLVM: store i32 [[PHI]], ptr [[TMP0:%.*]], align 4
+// LLVM: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// LLVM: store i32 [[TMP1]], ptr [[TMP2:%.*]], align 4
+// LLVM: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// LLVM: ret i32 [[TMP3]]
diff --git a/clang/test/CIR/Incubator/CodeGen/fun-ptr.c b/clang/test/CIR/Incubator/CodeGen/fun-ptr.c
new file mode 100644
index 0000000000000..0718eeb33e61f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/fun-ptr.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -x c++ -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+// RUN: %clang_cc1 -x c++ -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+typedef struct {
+    int a;
+    int b;
+} Data;
+
+typedef int (*fun_t)(Data* d);
+
+struct A;
+typedef int (*fun_typ)(struct A*);
+
+typedef struct A {
+  fun_typ fun;
+} A;
+
+// CIR: !rec_A = !cir.record<struct "A" {!cir.ptr<!cir.func<(!cir.ptr<!cir.record<struct "A">>) -> !s32i>>} #cir.record.decl.ast>
+A a = {(fun_typ)0};
+
+int extract_a(Data* d) {
+    return d->a;
+}
+
+// CIR: cir.func {{.*}} {{@.*foo.*}}(%arg0: !cir.ptr<!rec_Data>
+// CIR:   [[TMP0:%.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init]
+// CIR:   [[TMP1:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   [[TMP2:%.*]] = cir.alloca !cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>>, ["f", init]
+// CIR:   cir.store{{.*}} %arg0, [[TMP0]] : !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>
+// CIR:   [[TMP3:%.*]] = cir.const #cir.ptr<null> : !cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>
+// CIR:   cir.store{{.*}} [[TMP3]], [[TMP2]] : !cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>>
+// CIR:   [[TMP4:%.*]] = cir.get_global {{@.*extract_a.*}} : !cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>
+// CIR:   cir.store{{.*}} [[TMP4]], [[TMP2]] : !cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>>
+// CIR:   [[TMP5:%.*]] = cir.load{{.*}} [[TMP2]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>
+// CIR:   [[TMP6:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
+// CIR:   [[TMP7:%.*]] = cir.call [[TMP5]]([[TMP6]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Data>) -> !s32i>>, !cir.ptr<!rec_Data>) -> !s32i
+// CIR:   cir.store{{.*}} [[TMP7]], [[TMP1]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: define dso_local i32 {{@.*foo.*}}(ptr %0)
+// LLVM:   [[TMP1:%.*]] = alloca ptr, i64 1
+// LLVM:   [[TMP2:%.*]] = alloca i32, i64 1
+// LLVM:   [[TMP3:%.*]] = alloca ptr, i64 1
+// LLVM:   store ptr %0, ptr [[TMP1]]
+// LLVM:   store ptr null, ptr [[TMP3]]
+// LLVM:   store ptr {{@.*extract_a.*}}, ptr [[TMP3]]
+// LLVM:   [[TMP4:%.*]] = load ptr, ptr [[TMP3]]
+// LLVM:   [[TMP5:%.*]] = load ptr, ptr [[TMP1]]
+// LLVM:   [[TMP6:%.*]] = call i32 [[TMP4]](ptr [[TMP5]])
+// LLVM:   store i32 [[TMP6]], ptr [[TMP2]]
+int foo(Data* d) {
+    fun_t f = 0;
+    f = extract_a;
+    return f(d);
+}
+
+// CIR:  cir.func private {{@.*test.*}}() -> !cir.ptr<!cir.func<()>>
+// CIR:  cir.func {{.*}} {{@.*bar.*}}()
+// CIR:    [[RET:%.*]] = cir.call {{@.*test.*}}() : () -> !cir.ptr<!cir.func<()>>
+// CIR:    cir.call [[RET]]() : (!cir.ptr<!cir.func<()>>) -> ()
+// CIR:    cir.return
+
+// LLVM: declare ptr {{@.*test.*}}()
+// LLVM: define dso_local void {{@.*bar.*}}()
+// LLVM:   [[RET:%.*]] = call ptr {{@.*test.*}}()
+// LLVM:   call void [[RET]]()
+// LLVM:   ret void
+void (*test(void))(void);
+void bar(void) {
+  test()();
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/func_dsolocal_pie.c b/clang/test/CIR/Incubator/CodeGen/func_dsolocal_pie.c
new file mode 100644
index 0000000000000..35ba7e5338948
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/func_dsolocal_pie.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -pic-is-pie -pic-level 1 %s -o %t1.cir
+// RUN: FileCheck --input-file=%t1.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -pic-is-pie -pic-level 1 %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+void foo(int i) {
+
+}
+
+int main() {
+  foo(2);
+  return 0;
+}
+
+// CIR: cir.func {{.*}} @foo(%arg0: !s32i
+// CIR-NEXT:   [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CIR-NEXT:   cir.store %arg0, [[TMP0]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:   cir.return
+
+// CIR: cir.func {{.*}}dso_local{{.*}} @main() -> !s32i
+// CIR: [[TMP1:%.*]] = cir.const #cir.int<2> : !s32i
+// CIR: cir.call @foo([[TMP1]]) : (!s32i) -> ()
+
+// LLVM: define dso_local void @foo(i32 [[TMP3:%.*]])
+// LLVM: [[ARG_STACK:%.*]] = alloca i32, i64 1, align 4
+// LLVM: store i32 [[TMP3]], ptr [[ARG_STACK]], align 4
+// LLVM: ret void
+
+// LLVM: define dso_local i32 @main()
+// LLVM: [[TMP4:%.*]] = alloca i32, i64 1, align 4
+// LLVM: call void @foo(i32 2)
+// LLVM: store i32 0, ptr [[TMP4]], align 4
+// LLVM: [[RET_VAL:%.*]] = load i32, ptr [[TMP4]], align 4
+// LLVM: ret i32 [[RET_VAL]]
diff --git a/clang/test/CIR/Incubator/CodeGen/function-attrs.cpp b/clang/test/CIR/Incubator/CodeGen/function-attrs.cpp
new file mode 100644
index 0000000000000..527acd215f90e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/function-attrs.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+
+inline int s0(int a, int b) {
+  int x = a + b;
+  return x;
+}
+
+__attribute__((noinline))
+int s1(int a, int b) {
+  return s0(a,b);
+}
+
+__attribute__((always_inline))
+int s2(int a, int b) {
+  return s0(a,b);
+}
+
+int s3(int a, int b) {
+  int x = a + b;
+  return x;
+}
+
+// CIR: #fn_attr = #cir<extra({nothrow = #cir.nothrow})>
+
+// CIR:   cir.func inline_hint linkonce_odr @_Z2s0ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} extra(#fn_attr)
+// CIR:   cir.func no_inline {{.*}} @_Z2s1ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} extra(#fn_attr)
+// CIR:   cir.func always_inline {{.*}} @_Z2s2ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} extra(#fn_attr)
+// CIR:   cir.func {{.*}} @_Z2s3ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} {
+
+// LLVM: define dso_local i32 @_Z2s1ii(i32 %0, i32 %1) {{.*}} #[[#ATTR1:]]
+// LLVM: define dso_local i32 @_Z2s2ii(i32 %0, i32 %1) {{.*}} #[[#ATTR2:]]
+// LLVM: attributes #[[#ATTR1]] = {{.*}} noinline
+// LLVM: attributes #[[#ATTR2]] = {{.*}} alwaysinline
diff --git a/clang/test/CIR/Incubator/CodeGen/function-to-pointer-decay.c b/clang/test/CIR/Incubator/CodeGen/function-to-pointer-decay.c
new file mode 100644
index 0000000000000..5b691c3395ff0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/function-to-pointer-decay.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+void f(void);
+
+void test_call_lvalue_cast() {
+  (*(void (*)(int))f)(42);
+}
+
+// CHECK: cir.func{{.*}} @test_call_lvalue_cast()
+// CHECK: [[F:%.+]] = cir.get_global @f
+// CHECK: [[CASTED:%.+]] = cir.cast bitcast [[F]]
+// CHECK: [[CONST:%.+]] = cir.const #cir.int<42>
+// CHECK: cir.call [[CASTED]]([[CONST]])
diff --git a/clang/test/CIR/Incubator/CodeGen/generic-selection.c b/clang/test/CIR/Incubator/CodeGen/generic-selection.c
new file mode 100644
index 0000000000000..76234431e6936
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/generic-selection.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+void foo() {
+  int a;
+  int r = _Generic(a, double: 1, float: 2, int: 3, default: 4);
+}
+  
+// CIR: %[[A:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"]
+// CIR: %[[RES:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["r", init]
+// CIR: %[[RES_VAL:.*]] = cir.const #cir.int<3> : !s32i
+// CIR: cir.store{{.*}} %[[RES_VAL]], %[[RES]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[A:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[RES:.*]] = alloca i32, i64 1, align 4
+// LLVM: store i32 3, ptr %[[RES]], align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/global-const-record-crash.c b/clang/test/CIR/Incubator/CodeGen/global-const-record-crash.c
new file mode 100644
index 0000000000000..c72f52caae789
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/global-const-record-crash.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG %s --input-file=%t-og.ll
+
+// Test for crash in replaceGlobal with ConstantOp containing GlobalViewAttr.
+// The crash occurred because getNewInitValue() can return GlobalViewAttr,
+// ConstRecordAttr, or ConstArrayAttr, but the code incorrectly assumed
+// it always returns ConstArrayAttr.
+
+char typedef e;
+typedef struct a *b;
+typedef struct { b a; } f;
+struct a { e a; f b; };
+static struct a d = {};
+const b a = &d;
+b c() { return a; }
+
+// CIR: cir.global "private" internal dso_local @d
+// CIR: cir.global constant external @a = #cir.global_view<@d>
+
+// LLVM: %struct.f = type { ptr }
+// LLVM: @d = internal global { i8, [7 x i8], %struct.f } zeroinitializer, align 8
+// LLVM: @a = constant ptr @d, align 8
+
+// OGCG: %struct.f = type { ptr }
+// OGCG: @a = constant ptr @d, align 8
+// OGCG: @d = internal global { i8, [7 x i8], %struct.f } zeroinitializer, align 8
diff --git a/clang/test/CIR/Incubator/CodeGen/global-constant.c b/clang/test/CIR/Incubator/CodeGen/global-constant.c
new file mode 100644
index 0000000000000..4301fcee7a7a0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/global-constant.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+const int global_no_use = 12;
+// CIR: cir.global constant {{.*}}@global_no_use
+// LLVM: @global_no_use = constant
+
+const float global_used = 1.2f;
+// CIR: cir.global constant {{.*}}@global_used
+// LLVM: @global_used = constant
+
+float const * get_float_ptr() {
+  return &global_used;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/global-ctor-dtor.cpp b/clang/test/CIR/Incubator/CodeGen/global-ctor-dtor.cpp
new file mode 100644
index 0000000000000..dc8a55e3422e8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/global-ctor-dtor.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=BEFORE --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t2.cir 2>&1
+// RUN: FileCheck --check-prefix=AFTER --input-file=%t2.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+extern int bar();
+void foo(void) __attribute__((constructor));
+void foo(void) {
+  bar();
+}
+
+// BEFORE: cir.func {{.*}} @_Z3foov() global_ctor
+
+void foo2(void) __attribute__((constructor(777)));
+void foo2(void) {
+  bar();
+}
+
+// BEFORE: cir.func {{.*}} @_Z4foo2v() global_ctor(777)
+
+void foo3(void) __attribute__((destructor));
+void foo3(void) {
+  bar();
+}
+
+// BEFORE: cir.func {{.*}} @_Z4foo3v() global_dtor
+
+void foo4(void) __attribute__((destructor(789)));
+void foo4(void) {
+  bar();
+}
+
+// BEFORE: cir.func {{.*}} @_Z4foo4v() global_dtor(789)
+
+// AFTER: module @{{.*}} attributes {cir.global_ctors = [#cir.global_ctor<"_Z3foov", 65535>, #cir.global_ctor<"_Z4foo2v", 777>], cir.global_dtors = [#cir.global_dtor<"_Z4foo3v", 65535>, #cir.global_dtor<"_Z4foo4v", 789>]
+// LLVM: @llvm.global_ctors = appending constant [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_Z3foov, ptr null }, { i32, ptr, ptr } { i32 777, ptr @_Z4foo2v, ptr null }]
+// LLVM-NEXT: @llvm.global_dtors = appending constant [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_Z4foo3v, ptr null }, { i32, ptr, ptr } { i32 789, ptr @_Z4foo4v, ptr null }]
diff --git a/clang/test/CIR/Incubator/CodeGen/global-init.cpp b/clang/test/CIR/Incubator/CodeGen/global-init.cpp
new file mode 100644
index 0000000000000..897f71e4d05ef
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/global-init.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2> %t-before.cir
+// RUN: FileCheck --input-file=%t-before.cir %s --check-prefix=CIR-BEFORE-LPP
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+float num;
+float _Complex a = {num, num};
+
+// CIR-BEFORE-LPP: cir.global external @num = #cir.fp<0.000000e+00> : !cir.float
+// CIR-BEFORE-LPP: cir.global external @a = ctor : !cir.complex<!cir.float> {
+// CIR-BEFORE-LPP:  %[[THIS:.*]] = cir.get_global @a : !cir.ptr<!cir.complex<!cir.float>>
+// CIR-BEFORE-LPP:  %[[NUM:.*]] = cir.get_global @num : !cir.ptr<!cir.float>
+// CIR-BEFORE-LPP:  %[[REAL:.*]] = cir.load{{.*}} %[[NUM]] : !cir.ptr<!cir.float>, !cir.float
+// CIR-BEFORE-LPP:  %[[NUM:.*]] = cir.get_global @num : !cir.ptr<!cir.float>
+// CIR-BEFORE-LPP:  %[[IMAG:.*]] = cir.load{{.*}} %[[NUM]] : !cir.ptr<!cir.float>, !cir.float
+// CIR-BEFORE-LPP:  %[[COMPLEX_VAL:.*]] = cir.complex.create %[[REAL]], %[[IMAG]] : !cir.float -> !cir.complex<!cir.float>
+// CIR-BEFORE-LPP:  cir.store{{.*}} %[[COMPLEX_VAL:.*]], %[[THIS]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+// CIR-BEFORE-LPP: }
+
+// CIR:  cir.global external @num = #cir.fp<0.000000e+00> : !cir.float
+// CIR:  cir.global external @a = #cir.zero : !cir.complex<!cir.float>
+// CIR:  cir.func internal private @__cxx_global_var_init()
+// CIR:   %[[A_ADDR:.*]] = cir.get_global @a : !cir.ptr<!cir.complex<!cir.float>>
+// CIR:   %[[NUM:.*]] = cir.get_global @num : !cir.ptr<!cir.float>
+// CIR:   %[[REAL:.*]] = cir.load{{.*}} %[[NUM]] : !cir.ptr<!cir.float>, !cir.float
+// CIR:   %[[NUM:.*]] = cir.get_global @num : !cir.ptr<!cir.float>
+// CIR:   %[[IMAG:.*]] = cir.load{{.*}} %[[NUM]] : !cir.ptr<!cir.float>, !cir.float
+// CIR:   %[[COMPLEX_VAL:.*]] = cir.complex.create %[[REAL]], %[[IMAG]] : !cir.float -> !cir.complex<!cir.float>
+// CIR:   cir.store{{.*}} %[[COMPLEX_VAL]], %[[A_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: define internal void @__cxx_global_var_init()
+// LLVM:   %[[REAL:.*]] = load float, ptr @num, align 4
+// LLVM:   %[[IMAG:.*]] = load float, ptr @num, align 4
+// LLVM:   %[[TMP_COMPLEX_VAL:.*]] = insertvalue { float, float } {{.*}}, float %[[REAL]], 0
+// LLVM:   %[[COMPLEX_VAL:.*]] = insertvalue { float, float } %[[TMP_COMPLEX_VAL]], float %[[IMAG]], 1
+// LLVM:   store { float, float } %[[COMPLEX_VAL]], ptr @a, align 4
+
+// OGCG: define internal void @__cxx_global_var_init() {{.*}} section ".text.startup"
+// OGCG:   %[[REAL:.*]] = load float, ptr @num, align 4
+// OGCG:   %[[IMAG:.*]] = load float, ptr @num, align 4
+// OGCG:   store float %[[REAL]], ptr @a, align 4
+// OGCG:   store float %[[IMAG]], ptr getelementptr inbounds nuw ({ float, float }, ptr @a, i32 0, i32 1), align 4
diff --git a/clang/test/CIR/Incubator/CodeGen/global-new.cpp b/clang/test/CIR/Incubator/CodeGen/global-new.cpp
new file mode 100644
index 0000000000000..9e2beb6560999
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/global-new.cpp
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIR_BEFORE
+// RUN: FileCheck %s -check-prefix=CIR_AFTER --input-file=%t.cir
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck %s -check-prefix=LLVM --input-file=%t.ll
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-cir -fexceptions -fcxx-exceptions %s -o %t.eh.cir
+// RUN: FileCheck %s -check-prefix=CIR_EH --input-file=%t.eh.cir
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -fno-clangir-call-conv-lowering -emit-cir-flat -fno-clangir-call-conv-lowering -fexceptions -fcxx-exceptions %s -o %t.eh.flat.cir
+// RUN: FileCheck %s -check-prefix=CIR_FLAT_EH --input-file=%t.eh.flat.cir
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fno-clangir-call-conv-lowering -fexceptions -fcxx-exceptions %s -o %t.eh.ll
+// RUN: FileCheck %s -check-prefix=LLVM_EH --input-file=%t.eh.ll
+
+struct e { e(int); };
+e *g = new e(0);
+
+// CIR_BEFORE: ![[ty:.*]] = !cir.record<struct "e" padded {!u8i}
+
+// CIR_BEFORE: cir.global external @g = ctor : !cir.ptr<![[ty]]> {
+// CIR_BEFORE:     %[[GlobalAddr:.*]] = cir.get_global @g : !cir.ptr<!cir.ptr<![[ty]]>>
+// CIR_BEFORE:     %[[Size:.*]] = cir.const #cir.int<1> : !u64i
+// CIR_BEFORE:     %[[NewAlloc:.*]] = cir.call @_Znwm(%[[Size]]) : (!u64i) -> !cir.ptr<!void>
+// CIR_BEFORE:     %[[NewCasted:.*]] = cir.cast bitcast %[[NewAlloc]] : !cir.ptr<!void> -> !cir.ptr<![[ty]]>
+// CIR_BEFORE:     %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR_BEFORE:     cir.call @_ZN1eC1Ei(%[[NewCasted]], %[[ZERO]]) : (!cir.ptr<![[ty]]>, !s32i) -> ()
+// CIR_BEFORE:     cir.store{{.*}} %3, %[[GlobalAddr]] : !cir.ptr<![[ty]]>, !cir.ptr<!cir.ptr<![[ty]]>>
+// CIR_BEFORE: }
+
+// CIR_AFTER:  {{%.*}} = cir.const #cir.int<1> : !u64i
+// CIR_AFTER:  {{%.*}} = cir.call @_Znwm(%1) : (!u64i) -> !cir.ptr<!void>
+
+// CIR_EH: cir.try synthetic cleanup {
+// CIR_EH:   cir.call exception @_ZN1eC1Ei{{.*}} cleanup {
+// CIR_EH:     cir.call @_ZdlPvm
+// CIR_EH:     cir.yield
+// CIR_EH:   }
+// CIR_EH:   cir.yield
+// CIR_EH: } catch [#cir.unwind {
+// CIR_EH:   cir.resume
+// CIR_EH: }]
+
+// CIR_FLAT_EH: cir.func internal private @__cxx_global_var_init()
+// CIR_FLAT_EH: ^bb3:
+// CIR_FLAT_EH:   %exception_ptr, %type_id = cir.eh.inflight_exception
+// CIR_FLAT_EH:   cir.call @_ZdlPvm({{.*}}) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR_FLAT_EH:   cir.br ^bb4(%exception_ptr, %type_id : !cir.ptr<!void>, !u32i)
+
+// LLVM_EH: define internal void @__cxx_global_var_init() personality ptr @__gxx_personality_v0
+// LLVM_EH:   call ptr @_Znwm(i64 1)
+// LLVM_EH:   br label %[[L2:.*]]
+
+// LLVM_EH: [[L2]]:
+// LLVM_EH:   invoke void @_ZN1eC1Ei
+// LLVM_EH:           to label %[[CONT:.*]] unwind label %[[PAD:.*]]
+
+// LLVM_EH: [[CONT]]:
+// LLVM_EH:   br label %[[END:.*]]
+
+// LLVM_EH: [[PAD]]:
+// LLVM_EH:   landingpad { ptr, i32 }
+// LLVM_EH:      cleanup
+// LLVM_EH:   call void @_ZdlPvm
+// LLVM_EH:   br label %[[RESUME:.*]]
+
+// LLVM_EH: [[RESUME]]:
+// LLVM_EH:   resume { ptr, i32 }
+
+// LLVM_EH: [[END]]:
+// LLVM_EH:   store ptr {{.*}}, ptr @g, align 8
+// LLVM_EH:   ret void
+// LLVM_EH: }
+
+// LLVM-DAG: @llvm.global_ctors = appending constant [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init.1, ptr null }]
+// LLVM: define internal void @__cxx_global_var_init()
+// LLVM: call ptr @_Znwm(i64 1)
+
+// LLVM: define internal void @__cxx_global_var_init.1()
+// LLVM:   call ptr @_Znwm(i64 1)
+
+// LLVM: define void @_GLOBAL__sub_I_global_new.cpp()
+// LLVM:   call void @__cxx_global_var_init()
+// LLVM:   call void @__cxx_global_var_init.1()
+
+struct PackedStruct {
+};
+PackedStruct*const packed_2 = new PackedStruct();
diff --git a/clang/test/CIR/Incubator/CodeGen/globals-neg-index-array.c b/clang/test/CIR/Incubator/CodeGen/globals-neg-index-array.c
new file mode 100644
index 0000000000000..4cfb045ebfff9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/globals-neg-index-array.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct __attribute__((packed)) PackedStruct {
+    char a1;
+    char a2;
+    char a3;
+};
+struct PackedStruct packed[10];
+char *packed_element = &(packed[-2].a3);
+// CHECK: cir.global external @packed = #cir.zero : !cir.array<!rec_PackedStruct x 10> {alignment = 16 : i64} loc(#loc5)
+// CHECK: cir.global external @packed_element = #cir.global_view<@packed, [-2 : i32, 2 : i32]>
+// LLVM: @packed = global [10 x %struct.PackedStruct] zeroinitializer
+// LLVM: @packed_element = global ptr getelementptr inbounds (i8, ptr @packed, i64 -4)
diff --git a/clang/test/CIR/Incubator/CodeGen/globals-ref-globals.c b/clang/test/CIR/Incubator/CodeGen/globals-ref-globals.c
new file mode 100644
index 0000000000000..a4957cc1afa69
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/globals-ref-globals.c
@@ -0,0 +1,116 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+typedef struct {
+   int f0 : 24;
+   int f1;
+   int f2;
+} S;
+
+static S g1 = {2799, 9, 123};
+static int *g2[4] = {&g1.f1, &g1.f1, &g1.f1, &g1.f1};
+static int **g3 = &g2[1];
+static int ***g4 = &g3;
+static int ****g5 = &g4;
+
+static S g6[2] = {{2799, 9, 123}, {2799, 9, 123}};
+static int *g7[2] = {&g6[0].f2, &g6[1].f2};
+static int **g8 = &g7[1];
+
+// CHECK-DAG: !rec_anon_struct = !cir.record<struct  {!u8i, !u8i, !u8i, !u8i, !s32i, !s32i}>
+// CHECK-DAG: !rec_anon_struct1 = !cir.record<struct  {!s8i, !cir.array<!u8i x 3>, !s32i}>
+// CHECK-DAG: !rec_anon_struct2 = !cir.record<struct  {!u8i, !u8i, !u8i, !u8i, !u8i, !u8i, !u8i, !u8i, !rec_S4}>
+// CHECK-DAG: !rec_anon_struct3 = !cir.record<struct  {!s16i, !cir.array<!u8i x 2>, !s32i, !s8i, !cir.array<!u8i x 3>}>
+
+// CHECK-DAG: g1 = #cir.const_record<{#cir.int<239> : !u8i, #cir.int<10> : !u8i, #cir.int<0> : !u8i, #cir.zero : !u8i, #cir.int<9> : !s32i, #cir.int<123> : !s32i}> : !rec_anon_struct
+// CHECK-DAG: g2 = #cir.const_array<[#cir.global_view<@g1, [4]> : !cir.ptr<!rec_anon_struct>, #cir.global_view<@g1, [4]> : !cir.ptr<!rec_anon_struct>, #cir.global_view<@g1, [4]> : !cir.ptr<!rec_anon_struct>, #cir.global_view<@g1, [4]> : !cir.ptr<!rec_anon_struct>]> : !cir.array<!cir.ptr<!s32i> x 4>
+// CHECK-DAG: g3 = #cir.global_view<@g2, [1 : i32]> : !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-DAG: g4 = #cir.global_view<@g3> : !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>
+// CHECK-DAG: g5 = #cir.global_view<@g4> : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>>
+// CHECK-DAG: g6 = #cir.const_array<[#cir.const_record<{#cir.int<239> : !u8i, #cir.int<10> : !u8i, #cir.int<0> : !u8i, #cir.zero : !u8i, #cir.int<9> : !s32i, #cir.int<123> : !s32i}> : !rec_anon_struct, #cir.const_record<{#cir.int<239> : !u8i, #cir.int<10> : !u8i, #cir.int<0> : !u8i, #cir.zero : !u8i, #cir.int<9> : !s32i, #cir.int<123> : !s32i}> : !rec_anon_struct]> : !cir.array<!rec_anon_struct x 2> 
+// CHECK-DAG: g7 = #cir.const_array<[#cir.global_view<@g6, [0, 5]> : !cir.ptr<!s32i>, #cir.global_view<@g6, [1, 5]> : !cir.ptr<!s32i>]> : !cir.array<!cir.ptr<!s32i> x 2> 
+// CHECK-DAG: g8 = #cir.global_view<@g7, [1 : i32]> : !cir.ptr<!cir.ptr<!s32i>> 
+
+// LLVM-DAG: @g1 = internal global { i8, i8, i8, i8, i32, i32 } { i8 -17, i8 10, i8 0, i8 0, i32 9, i32 123 }, align 4
+// LLVM-DAG: @g2 = internal global [4 x ptr] [ptr getelementptr inbounds nuw (i8, ptr @g1, i64 4), ptr getelementptr inbounds nuw (i8, ptr @g1, i64 4), ptr getelementptr inbounds nuw (i8, ptr @g1, i64 4), ptr getelementptr inbounds nuw (i8, ptr @g1, i64 4)], align 16
+// LLVM-DAG: @g3 = internal global ptr getelementptr inbounds nuw (i8, ptr @g2, i64 8), align 8
+// LLVM-DAG: @g4 = internal global ptr @g3, align 8
+// LLVM-DAG: @g5 = internal global ptr @g4, align 8
+// LLVM-DAG: @g6 = internal global [2 x { i8, i8, i8, i8, i32, i32 }] [{ i8, i8, i8, i8, i32, i32 } { i8 -17, i8 10, i8 0, i8 0, i32 9, i32 123 }, { i8, i8, i8, i8, i32, i32 } { i8 -17, i8 10, i8 0, i8 0, i32 9, i32 123 }], align 16
+// LLVM-DAG: @g7 = internal global [2 x ptr] [ptr getelementptr inbounds nuw (i8, ptr @g6, i64 8), ptr getelementptr inbounds nuw (i8, ptr @g6, i64 20)], align 16
+// LLVM-DAG: @g8 = internal global ptr getelementptr inbounds nuw (i8, ptr @g7, i64 8), align 8
+
+typedef struct {
+   char f1;
+   int  f6;
+} S1;
+
+S1 g9 = {1, 42};
+int* g10 = &g9.f6;
+
+#pragma pack(push)
+#pragma pack(1)
+typedef struct {
+   char f1;
+   int  f6;
+} S2;
+#pragma pack(pop)
+
+S2 g11 = {1, 42};
+int* g12 = &g11.f6;
+
+// CHECK-DAG: g9 = #cir.const_record<{#cir.int<1> : !s8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>, #cir.int<42> : !s32i}> : !rec_anon_struct1 {alignment = 4 : i64}
+// CHECK-DAG: g10 = #cir.global_view<@g9, [2 : i32]> : !cir.ptr<!s32i> {alignment = 8 : i64}
+// CHECK-DAG: g11 = #cir.const_record<{#cir.int<1> : !s8i, #cir.int<42> : !s32i}> : !rec_S2 {alignment = 1 : i64}
+// CHECK-DAG: g12 = #cir.global_view<@g11, [1 : i32]> : !cir.ptr<!s32i> {alignment = 8 : i64} 
+
+// LLVM-DAG: @g9 = global { i8, [3 x i8], i32 } { i8 1, [3 x i8] zeroinitializer, i32 42 }, align 4
+// LLVM-DAG: @g10 = global ptr getelementptr inbounds nuw (i8, ptr @g9, i64 4), align 8
+// LLVM-DAG: @g11 = global %struct.S2 <{ i8 1, i32 42 }>, align 1
+// LLVM-DAG: @g12 = global ptr getelementptr inbounds nuw (i8, ptr @g11, i64 1), align 8
+
+
+typedef struct {
+   short f0;
+   int   f1;
+   char  f2;
+} S3;
+
+static S3 g13 = {-1L,0L,1L};
+static S3* g14[2][2] = {{0, &g13}, {&g13, &g13}};
+
+// CHECK-DAG: g13 = #cir.const_record<{#cir.int<-1> : !s16i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>, #cir.int<0> : !s32i, #cir.int<1> : !s8i, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>}> : !rec_anon_struct3
+// CHECK-DAG: g14 = #cir.const_array<[#cir.const_array<[#cir.ptr<null> : !cir.ptr<!rec_S3>, #cir.global_view<@g13> : !cir.ptr<!rec_S3>]> : !cir.array<!cir.ptr<!rec_S3> x 2>, #cir.const_array<[#cir.global_view<@g13> : !cir.ptr<!rec_S3>, #cir.global_view<@g13> : !cir.ptr<!rec_S3>]> : !cir.array<!cir.ptr<!rec_S3> x 2>]> : !cir.array<!cir.array<!cir.ptr<!rec_S3> x 2> x 2>
+
+typedef struct {
+   int  f0;
+   int  f1;
+} S4;
+
+typedef struct {
+   int f0 : 17;
+   int f1 : 5;
+   int f2 : 19;
+   S4 f3;   
+} S5;
+
+static S5 g15 = {187,1,442,{123,321}};
+
+int* g16 = &g15.f3.f1;
+
+// CHECK-DAG: g15 = #cir.const_record<{#cir.int<187> : !u8i, #cir.int<0> : !u8i, #cir.int<2> : !u8i, #cir.zero : !u8i, #cir.int<186> : !u8i, #cir.int<1> : !u8i, #cir.int<0> : !u8i, #cir.zero : !u8i, #cir.const_record<{#cir.int<123> : !s32i, #cir.int<321> : !s32i}> : !rec_S4}> : !rec_anon_struct2 {alignment = 4 : i64}
+// CHECK-DAG: g16 = #cir.global_view<@g15, [8, 1]> : !cir.ptr<!rec_anon_struct2> {alignment = 8 : i64}
+
+// LLVM-DAG: @g15 = internal global { i8, i8, i8, i8, i8, i8, i8, i8, %struct.S4 } { i8 -69, i8 0, i8 2, i8 0, i8 -70, i8 1, i8 0, i8 0, %struct.S4 { i32 123, i32 321 } }, align 4
+// LLVM-DAG: @g16 = global ptr getelementptr inbounds nuw (i8, ptr @g15, i64 12), align 8
+
+void use() {
+    int a = **g3;
+    int b = ***g4; 
+    int c = ****g5; 
+    int d = **g8;
+    S3 s = *g14[1][1];
+    int f = *g16;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/globals.c b/clang/test/CIR/Incubator/CodeGen/globals.c
new file mode 100644
index 0000000000000..76e2f01a85e68
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/globals.c
@@ -0,0 +1,115 @@
+// There seems to be some differences in how constant expressions are evaluated
+// in C vs C++. This causees the code gen for C initialized globals to be a
+// bit different from the C++ version. This test ensures that these differences
+// are accounted for.
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+char string[] = "whatnow";
+// CHECK: cir.global external @string = #cir.const_array<"whatnow\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8>
+char big_string[100000] = "123";
+// CHECK: cir.global external @big_string = #cir.const_array<"123" : !cir.array<!s8i x 3>, trailing_zeros> : !cir.array<!s8i x 100000>
+int sint[] = {123, 456, 789};
+// CHECK: cir.global external @sint = #cir.const_array<[#cir.int<123> : !s32i, #cir.int<456> : !s32i, #cir.int<789> : !s32i]> : !cir.array<!s32i x 3>
+int filler_sint[4] = {1, 2}; // Ensure missing elements are zero-initialized.
+// CHECK: cir.global external @filler_sint = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.array<!s32i x 4>
+int excess_sint[2] = {1, 2, 3, 4}; // Ensure excess elements are ignored.
+// CHECK: cir.global external @excess_sint = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>
+float flt[] = {1.0, 2.0};
+// CHECK: cir.global external @flt = #cir.const_array<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float]> : !cir.array<!cir.float x 2>
+
+// Tentative definition is just a declaration.
+int tentativeB;
+int tentativeB = 1;
+// CHECK: cir.global external @tentativeB = #cir.int<1> : !s32i
+
+// Tentative incomplete definition is just a declaration.
+int tentativeE[];
+int tentativeE[2] = {1, 2};
+// CHECK: cir.global external @tentativeE = #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>
+
+int twoDim[2][2] = {{1, 2}, {3, 4}};
+// CHECK: cir.global external @twoDim = #cir.const_array<[#cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>, #cir.const_array<[#cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 2>]> : !cir.array<!cir.array<!s32i x 2> x 2>
+
+struct {
+  int x;
+  int y[2][2];
+} nestedTwoDim = {1, {{2, 3}, {4, 5}}};
+// CHECK: cir.global external @nestedTwoDim = #cir.const_record<{#cir.int<1> : !s32i, #cir.const_array<[#cir.const_array<[#cir.int<2> : !s32i, #cir.int<3> : !s32i]> : !cir.array<!s32i x 2>, #cir.const_array<[#cir.int<4> : !s32i, #cir.int<5> : !s32i]> : !cir.array<!s32i x 2>]> : !cir.array<!cir.array<!s32i x 2> x 2>}>
+
+struct {
+  char x[3];
+  char y[3];
+  char z[3];
+} nestedString = {"1", "", "\0"};
+// CHECK: cir.global external @nestedString = #cir.const_record<{#cir.const_array<"1" : !cir.array<!s8i x 1>, trailing_zeros> : !cir.array<!s8i x 3>, #cir.zero : !cir.array<!s8i x 3>, #cir.zero : !cir.array<!s8i x 3>}>
+
+struct {
+  char *name;
+} nestedStringPtr = {"1"};
+// CHECK: cir.global external @nestedStringPtr = #cir.const_record<{#cir.global_view<@".str"> : !cir.ptr<!s8i>}>
+
+char *globalPtr = &nestedString.y[1];
+// CHECK: cir.global external @globalPtr = #cir.global_view<@nestedString, [1 : i32, 1 : i32]> : !cir.ptr<!s8i>
+
+const int i = 12;
+int i2 = i;
+struct { int i; } i3 = {i};
+// CHECK: cir.global external @i2 = #cir.int<12> : !s32i
+// CHECK: cir.global external @i3 = #cir.const_record<{#cir.int<12> : !s32i}> : !rec_anon2E3
+
+int a[10][10][10];
+int *a2 = &a[3][0][8];
+struct { int *p; } a3 = {&a[3][0][8]};
+// CHECK: cir.global external @a2 = #cir.global_view<@a, [3 : i32, 0 : i32, 8 : i32]> : !cir.ptr<!s32i>
+// CHECK: cir.global external @a3 = #cir.const_record<{#cir.global_view<@a, [3 : i32, 0 : i32, 8 : i32]> : !cir.ptr<!s32i>}> : !rec_anon2E4
+
+int p[10];
+int *p1 = &p[0];
+struct { int *x; } p2 = {&p[0]};
+// CHECK: cir.global external @p1 = #cir.global_view<@p> : !cir.ptr<!s32i>
+// CHECK: cir.global external @p2 = #cir.const_record<{#cir.global_view<@p> : !cir.ptr<!s32i>}> : !rec_anon2E5
+
+int q[10];
+int *q1 = q;
+struct { int *x; } q2 = {q};
+// CHECK: cir.global external @q1 = #cir.global_view<@q> : !cir.ptr<!s32i>
+// CHECK: cir.global external @q2 = #cir.const_record<{#cir.global_view<@q> : !cir.ptr<!s32i>}> : !rec_anon2E6
+
+int foo() {
+    extern int optind;
+    return optind;
+}
+// CHECK: cir.global "private" external @optind : !s32i
+// CHECK: cir.func {{.*@foo}}
+// CHECK:   {{.*}} = cir.get_global @optind : !cir.ptr<!s32i>
+
+struct Glob {
+  double a[42];
+  int pad1[3];
+  double b[42];
+} glob;
+
+double *const glob_ptr = &glob.b[1];
+// CHECK: cir.global constant external @glob_ptr = #cir.global_view<@glob, [2 : i32, 1 : i32]> : !cir.ptr<!cir.double>
+
+// TODO: test tentatives with internal linkage.
+
+// Use a tentative definition in an initializer.
+struct A {
+  struct A *x;
+} tentativeF[];
+struct A useTentative = {tentativeF};
+// CHECK: cir.global external @tentativeF = #cir.zero
+// CHECK: cir.global external @useTentative = #cir.const_record<{#cir.global_view<@tentativeF> : !cir.ptr<!rec_A>}>
+
+// Tentative definition is THE definition. Should be zero-initialized.
+int tentativeA;
+float tentativeC;
+int tentativeD[];
+float zeroInitFlt[2];
+// CHECK: cir.global external @tentativeA = #cir.int<0> : !s32i
+// CHECK: cir.global external @tentativeC = #cir.fp<0.000000e+00> : !cir.float
+// CHECK: cir.global external @tentativeD = #cir.zero : !cir.array<!s32i x 1>
+// CHECK: cir.global external @zeroInitFlt = #cir.zero : !cir.array<!cir.float x 2>
diff --git a/clang/test/CIR/Incubator/CodeGen/globals.cpp b/clang/test/CIR/Incubator/CodeGen/globals.cpp
new file mode 100644
index 0000000000000..4a9912136e478
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/globals.cpp
@@ -0,0 +1,137 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int a = 3;
+const int b = 4; // unless used wont be generated
+
+unsigned long int c = 2;
+int d = a;
+bool e;
+float y = 3.4;
+double w = 4.3;
+char x = '3';
+unsigned char rgb[3] = {0, 233, 33};
+char alpha[4] = "abc";
+const char *s = "example";
+const char *s1 = "example1";
+const char *s2 = "example";
+
+void use_global() {
+  int li = a;
+}
+
+bool bool_global() {
+  return e;
+}
+
+void use_global_string() {
+  unsigned char c = s2[0];
+}
+
+template <typename T>
+T func() {
+  return T();
+}
+
+int use_func() { return func<int>(); }
+
+// CHECK: module {{.*}} {
+// CHECK-NEXT: cir.global external @a = #cir.int<3> : !s32i
+// CHECK-NEXT: cir.global external @c = #cir.int<2> : !u64i
+// CHECK-NEXT: cir.global external @d = #cir.int<0> : !s32i
+
+// CHECK-NEXT: cir.func internal private @__cxx_global_var_init()
+// CHECK-NEXT:   [[TMP0:%.*]] = cir.get_global @d : !cir.ptr<!s32i>
+// CHECK-NEXT:   [[TMP1:%.*]] = cir.get_global @a : !cir.ptr<!s32i>
+// CHECK-NEXT:   [[TMP2:%.*]] = cir.load{{.*}} [[TMP1]] : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.store{{.*}} [[TMP2]], [[TMP0]] : !s32i, !cir.ptr<!s32i>
+
+// CHECK: cir.global external @e = #false
+// CHECK-NEXT: cir.global external @y = #cir.fp<3.400000e+00> : !cir.float
+// CHECK-NEXT: cir.global external @w = #cir.fp<4.300000e+00> : !cir.double
+// CHECK-NEXT: cir.global external @x = #cir.int<51> : !s8i
+// CHECK-NEXT: cir.global external @rgb = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<233> : !u8i, #cir.int<33> : !u8i]> : !cir.array<!u8i x 3>
+// CHECK-NEXT: cir.global external @alpha = #cir.const_array<"abc\00" : !cir.array<!s8i x 4>> : !cir.array<!s8i x 4>
+
+// CHECK-NEXT: cir.global "private" constant cir_private dso_local @".str" = #cir.const_array<"example\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+// CHECK-NEXT: cir.global external @s = #cir.global_view<@".str"> : !cir.ptr<!s8i>
+
+// CHECK-NEXT: cir.global "private" constant cir_private dso_local @".str.1" = #cir.const_array<"example1\00" : !cir.array<!s8i x 9>> : !cir.array<!s8i x 9> {alignment = 1 : i64}
+// CHECK-NEXT: cir.global external @s1 = #cir.global_view<@".str.1"> : !cir.ptr<!s8i>
+
+// CHECK-NEXT: cir.global external @s2 = #cir.global_view<@".str"> : !cir.ptr<!s8i>
+
+//      CHECK: cir.func {{.*}} @_Z10use_globalv()
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["li", init] {alignment = 4 : i64}
+// CHECK-NEXT:     %1 = cir.get_global @a : !cir.ptr<!s32i>
+// CHECK-NEXT:     %2 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     cir.store{{.*}} %2, %0 : !s32i, !cir.ptr<!s32i>
+
+//      CHECK: cir.func {{.*}} @_Z17use_global_stringv()
+// CHECK-NEXT:   %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["c", init] {alignment = 1 : i64}
+// CHECK-NEXT:   %1 = cir.get_global @s2 : !cir.ptr<!cir.ptr<!s8i>>
+// CHECK-NEXT:   %2 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK-NEXT:   %3 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   %4 = cir.ptr_stride %2, %3 : (!cir.ptr<!s8i>, !s32i) -> !cir.ptr<!s8i>
+// CHECK-NEXT:   %5 = cir.load{{.*}} %4 : !cir.ptr<!s8i>, !s8i
+// CHECK-NEXT:   %6 = cir.cast integral %5 : !s8i -> !u8i
+// CHECK-NEXT:   cir.store{{.*}} %6, %0 : !u8i, !cir.ptr<!u8i>
+// CHECK-NEXT:   cir.return
+
+//      CHECK:  cir.func {{.*}} @_Z4funcIiET_v() -> !s32i
+// CHECK-NEXT:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:    %1 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:    cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:    %2 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:    cir.return %2 : !s32i
+// CHECK-NEXT:  }
+// CHECK-NEXT:  cir.func {{.*}} @_Z8use_funcv() -> !s32i
+// CHECK-NEXT:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:    %1 = cir.call @_Z4funcIiET_v() : () -> !s32i
+// CHECK-NEXT:    cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:    %2 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:    cir.return %2 : !s32i
+// CHECK-NEXT:  }
+
+
+char string[] = "whatnow";
+// CHECK: cir.global external @string = #cir.const_array<"whatnow\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8>
+unsigned uint[] = {255};
+// CHECK: cir.global external @uint = #cir.const_array<[#cir.int<255> : !u32i]> : !cir.array<!u32i x 1>
+short sshort[] = {11111, 22222};
+// CHECK: cir.global external @sshort = #cir.const_array<[#cir.int<11111> : !s16i, #cir.int<22222> : !s16i]> : !cir.array<!s16i x 2>
+int sint[] = {123, 456, 789};
+// CHECK: cir.global external @sint = #cir.const_array<[#cir.int<123> : !s32i, #cir.int<456> : !s32i, #cir.int<789> : !s32i]> : !cir.array<!s32i x 3>
+long long ll[] = {999999999, 0, 0, 0};
+// CHECK: cir.global external @ll = #cir.const_array<[#cir.int<999999999> : !s64i, #cir.int<0> : !s64i, #cir.int<0> : !s64i, #cir.int<0> : !s64i]> : !cir.array<!s64i x 4>
+
+void get_globals() {
+  // CHECK: cir.func {{.*}} @_Z11get_globalsv()
+  char *s = string;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @string : !cir.ptr<!cir.array<!s8i x 8>>
+  // CHECK: %{{[0-9]+}} = cir.cast array_to_ptrdecay %[[RES]] : !cir.ptr<!cir.array<!s8i x 8>> -> !cir.ptr<!s8i>
+  unsigned *u = uint;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @uint : !cir.ptr<!cir.array<!u32i x 1>>
+  // CHECK: %{{[0-9]+}} = cir.cast array_to_ptrdecay %[[RES]] : !cir.ptr<!cir.array<!u32i x 1>> -> !cir.ptr<!u32i>
+  short *ss = sshort;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @sshort : !cir.ptr<!cir.array<!s16i x 2>>
+  // CHECK: %{{[0-9]+}} = cir.cast array_to_ptrdecay %[[RES]] : !cir.ptr<!cir.array<!s16i x 2>> -> !cir.ptr<!s16i>
+  int *si = sint;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @sint : !cir.ptr<!cir.array<!s32i x 3>>
+  // CHECK: %{{[0-9]+}} = cir.cast array_to_ptrdecay %[[RES]] : !cir.ptr<!cir.array<!s32i x 3>> -> !cir.ptr<!s32i>
+  long long *l = ll;
+  // CHECK: %[[RES:[0-9]+]] = cir.get_global @ll : !cir.ptr<!cir.array<!s64i x 4>>
+  // CHECK: %{{[0-9]+}} = cir.cast array_to_ptrdecay %[[RES]] : !cir.ptr<!cir.array<!s64i x 4>> -> !cir.ptr<!s64i>
+}
+
+// Should generate extern global variables.
+extern int externVar;
+int testExternVar(void) { return externVar; }
+// CHECK: cir.global "private" external @externVar : !s32i
+// CHECK: cir.func {{.*}} @{{.+}}testExternVar
+// CHECK:   cir.get_global @externVar : !cir.ptr<!s32i>
+
+// Should constant initialize global with constant address.
+int var = 1;
+int *constAddr = &var;
+// CHECK-DAG: cir.global external @constAddr = #cir.global_view<@var> : !cir.ptr<!s32i>
diff --git a/clang/test/CIR/Incubator/CodeGen/gnu-extension.c b/clang/test/CIR/Incubator/CodeGen/gnu-extension.c
new file mode 100644
index 0000000000000..ef6aed0a0f76c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/gnu-extension.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int foo(void) { return __extension__ 0b101010; }
+
+//CHECK: cir.func {{.*}} @foo()
+//CHECK-NEXT:    [[ADDR:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+//CHECK-NEXT:    [[VAL:%.*]] = cir.const #cir.int<42> : !s32i
+//CHECK-NEXT:    cir.store{{.*}} [[VAL]], [[ADDR]] : !s32i, !cir.ptr<!s32i>
+//CHECK-NEXT:    [[LOAD_VAL:%.*]] = cir.load{{.*}} [[ADDR]] : !cir.ptr<!s32i>, !s32i
+//CHECK-NEXT:    cir.return [[LOAD_VAL]] : !s32i
+
+void bar(void) {
+  __extension__ bar;
+}
+
+//CHECK:  cir.func {{.*}} @bar()
+//CHECK:    {{.*}} = cir.get_global @bar : !cir.ptr<!cir.func<()>>
+//CHECK:    cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/gnu-null.cpp b/clang/test/CIR/Incubator/CodeGen/gnu-null.cpp
new file mode 100644
index 0000000000000..d1d15f2007621
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/gnu-null.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+void gnu_null_expr() {
+  long a = __null;
+  int *b = __null;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["a", init]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["b", init]
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s64i
+// CIR: cir.store {{.*}} %[[CONST_0]], %[[A_ADDR]] : !s64i, !cir.ptr<!s64i>
+// CIR: %[[CONST_NULL:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CIR: cir.store {{.*}} %[[CONST_NULL]], %[[B_ADDR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca i64, i64 1, align 8
+// LLVM: %[[B_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM: store i64 0, ptr %[[A_ADDR]], align 8
+// LLVM: store ptr null, ptr %[[B_ADDR]], align 8
+
+// OGCG: %[[A_ADDR:.*]] = alloca i64, align 8
+// OGCG: %[[B_ADDR:.*]] = alloca ptr, align 8
+// OGCG: store i64 0, ptr %[[A_ADDR]], align 8
+// OGCG: store ptr null, ptr %[[B_ADDR]], align 8
diff --git a/clang/test/CIR/Incubator/CodeGen/gnu89.c b/clang/test/CIR/Incubator/CodeGen/gnu89.c
new file mode 100644
index 0000000000000..5254576779aa1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/gnu89.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -std=gnu89 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo() {}
+//CHECK: cir.func {{.*@foo}}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/goto.cpp b/clang/test/CIR/Incubator/CodeGen/goto.cpp
new file mode 100644
index 0000000000000..1ed592b2d5f74
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/goto.cpp
@@ -0,0 +1,380 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir-flat -fno-clangir-call-conv-lowering %s -o %t1.cir
+// RUN: FileCheck --input-file=%t1.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir %s -check-prefix=NOFLAT
+
+
+void g0(int a) {
+  int b = a;
+  goto end;
+  b = b + 1;
+end:
+  b = b + 2;
+}
+
+// CHECK:   cir.func {{.*}} @_Z2g0i
+// CHECK-NEXT  %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT  %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+// CHECK-NEXT  cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT  %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT  cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT  cir.br ^bb2
+// CHECK-NEXT ^bb1:  // no predecessors
+// CHECK-NEXT   %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT   %4 = cir.const 1 : !s32i
+// CHECK-NEXT   %5 = cir.binop(add, %3, %4) : !s32i
+// CHECK-NEXT   cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT   cir.br ^bb2
+// CHECK-NEXT ^bb2:  // 2 preds: ^bb0, ^bb1
+// CHECK-NEXT   %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT   %7 = cir.const 2 : !s32i
+// CHECK-NEXT   %8 = cir.binop(add, %6, %7) : !s32i
+// CHECK-NEXT   cir.store %8, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT   cir.return
+
+void g1(int a) {
+  int x = 0;
+  goto end;
+end:
+  int y = a + 2;
+}
+
+// Make sure alloca for "y" shows up in the entry block
+// CHECK: cir.func {{.*}} @_Z2g1i(%arg0: !s32i
+// CHECK-NEXT: %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK-NEXT: %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// CHECK-NEXT: cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+
+int g2() {
+  int b = 1;
+  goto end;
+  b = b + 1;
+end:
+  b = b + 2;
+  return 1;
+}
+
+// Make sure (1) we don't get dangling unused cleanup blocks
+//           (2) generated returns consider the function type
+
+// CHECK: cir.func {{.*}} @_Z2g2v() -> !s32i
+
+// CHECK:     cir.br ^bb2
+// CHECK-NEXT:   ^bb1:  // no predecessors
+// CHECK:   ^bb2:  // 2 preds: ^bb0, ^bb1
+
+// CHECK:     [[R:%[0-9]+]] = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     [[R]] : !s32i
+// CHECK-NEXT:   }
+
+void g3() {
+label:
+  goto label;
+}
+
+// CHECK:  cir.func {{.*}} @_Z2g3v
+// CHECK:    cir.br ^bb1
+// CHECK:  ^bb1:
+// CHECK:    cir.br ^bb1
+
+int shouldNotGenBranchRet(int x) {
+  if (x > 5)
+    goto err;
+  return 0;
+err:
+  return -1;
+}
+// NOFLAT:  cir.func {{.*}} @_Z21shouldNotGenBranchReti
+// NOFLAT:    cir.if %8 {
+// NOFLAT:      cir.goto "err"
+// NOFLAT:    }
+// NOFLAT:  ^bb1:
+// NOFLAT:    %3 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// NOFLAT:    cir.return %3 : !s32i
+// NOFLAT:  ^bb2:  // no predecessors
+// NOFLAT:    cir.label "err"
+
+int shouldGenBranch(int x) {
+  if (x > 5)
+    goto err;
+  x++;
+err:
+  return -1;
+}
+// NOFLAT:  cir.func {{.*}} @_Z15shouldGenBranchi
+// NOFLAT:    cir.if %9 {
+// NOFLAT:      cir.goto "err"
+// NOFLAT:    }
+// NOFLAT:    cir.br ^bb1
+// NOFLAT:  ^bb1:
+// NOFLAT:    cir.label "err"
+
+void severalLabelsInARow(int a) {
+  int b = a;
+  goto end1;
+  b = b + 1;
+  goto end2;
+end1:
+end2:
+  b = b + 2;
+}
+// NOFLAT:  cir.func {{.*}} @_Z19severalLabelsInARowi
+// NOFLAT:  ^bb[[#BLK1:]]:
+// NOFLAT:    cir.label "end1"
+// NOFLAT:    cir.br ^bb[[#BLK2:]]
+// NOFLAT:  ^bb[[#BLK2]]:
+// NOFLAT:    cir.label "end2"
+
+void severalGotosInARow(int a) {
+  int b = a;
+  goto end;
+  goto end;
+end:
+  b = b + 2;
+}
+// NOFLAT:  cir.func {{.*}} @_Z18severalGotosInARowi
+// NOFLAT:    cir.goto "end"
+// NOFLAT:  ^bb[[#BLK1:]]:
+// NOFLAT:    cir.goto "end"
+// NOFLAT:  ^bb[[#BLK2:]]:
+// NOFLAT:    cir.label "end"
+
+
+void labelWithoutMatch() {
+end:
+  return;
+}
+// NOFLAT:  cir.func {{.*}} @_Z17labelWithoutMatchv()
+// NOFLAT:    cir.label "end"
+// NOFLAT:    cir.return
+// NOFLAT:  }
+
+
+int jumpIntoLoop(int* ar) {
+
+  if (ar)
+    goto label;
+  return -1;
+
+  while (ar) {
+  label:
+    ++ar;
+  }
+
+  return 0;
+}
+
+// CHECK:  cir.func {{.*}} @_Z12jumpIntoLoopPi
+// CHECK:    cir.brcond {{.*}} ^bb[[#BLK2:]], ^bb[[#BLK3:]]
+// CHECK:  ^bb[[#BLK2]]:
+// CHECK:    cir.br ^bb[[#BODY:]]
+// CHECK:  ^bb[[#BLK3]]:
+// CHECK:    cir.br ^bb[[#BLK4:]]
+// CHECK:  ^bb[[#BLK4]]:
+// CHECK:    cir.br ^bb[[#RETURN:]]
+// CHECK:  ^bb[[#RETURN]]:
+// CHECK:    cir.return
+// CHECK:  ^bb[[#BLK5:]]:
+// CHECK:    cir.br ^bb[[#BLK6:]]
+// CHECK:  ^bb[[#BLK6]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond {{.*}} ^bb[[#BLK8:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BLK8]]:
+// CHECK:    cir.br ^bb[[#BODY]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.br ^bb[[#BLK7:]]
+// CHECK:  ^bb[[#BLK7]]:
+// CHECK:    cir.br ^bb[[#RETURN]]
+// CHECK: }
+
+
+
+int jumpFromLoop(int* ar) {
+
+  if (!ar) {
+err:
+    return -1;
+  }
+
+  while (ar) {
+    if (*ar == 42)
+      goto err;
+    ++ar;
+  }
+
+  return 0;
+}
+// CHECK:  cir.func {{.*}} @_Z12jumpFromLoopPi
+// CHECK:    cir.brcond {{.*}} ^bb[[#LABELERR:]], ^bb[[#BLK4:]]
+// CHECK:  ^bb[[#LABELERR]]:
+// CHECK:    cir.br ^bb[[#RETURN1:]]
+// CHECK:  ^bb[[#RETURN1]]:
+// CHECK:    cir.return
+// CHECK:  ^bb[[#BLK4]]:
+// CHECK:    cir.br ^bb[[#BLK5:]]
+// CHECK:  ^bb[[#BLK5]]:
+// CHECK:    cir.br ^bb[[#BLK6:]]
+// CHECK:  ^bb[[#BLK6]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond {{.*}} ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#IF42:]]
+// CHECK:  ^bb[[#IF42]]:
+// CHECK:    cir.brcond {{.*}} ^bb[[#IF42TRUE:]], ^bb[[#IF42FALSE:]]
+// CHECK:  ^bb[[#IF42TRUE]]:
+// CHECK:    cir.br ^bb[[#RETURN1]]
+// CHECK:  ^bb[[#IF42FALSE]]:
+// CHECK:    cir.br ^bb[[#BLK11:]]
+// CHECK:  ^bb[[#BLK11]]:
+// CHECK:    cir.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.br ^bb[[#RETURN2:]]
+// CHECK:  ^bb[[#RETURN2]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+
+void flatLoopWithNoTerminatorInFront(int* ptr) {
+
+  if (ptr)
+    goto loop;
+
+  do {
+    if (!ptr)
+      goto end;
+  loop:
+      ptr++;
+  } while(ptr);
+
+  end:
+  ;
+}
+
+// CHECK:  cir.func {{.*}} @_Z31flatLoopWithNoTerminatorInFrontPi
+// CHECK:    cir.brcond {{.*}} ^bb[[#BLK2:]], ^bb[[#BLK3:]]
+// CHECK:  ^bb[[#BLK2]]:
+// CHECK:    cir.br ^bb[[#LABEL_LOOP:]]
+// CHECK:  ^bb[[#BLK3]]:
+// CHECK:    cir.br ^bb[[#BLK4:]]
+// CHECK:  ^bb[[#BLK4]]:
+// CHECK:    cir.br ^bb[[#BLK5:]]
+// CHECK:  ^bb[[#BLK5]]:
+// CHECK:    cir.br ^bb[[#BODY:]]
+// CHECK:  ^bb[[#COND:]]:
+// CHECK:    cir.brcond {{.*}} ^bb[[#BODY]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#BLK8:]]
+// CHECK:  ^bb[[#BLK8]]:
+// CHECK:    cir.br ^bb[[#BLK9:]]
+// CHECK:  ^bb[[#BLK9]]:
+// CHECK:    cir.brcond {{.*}} ^bb[[#BLK10:]], ^bb[[#BLK11:]]
+// CHECK:  ^bb[[#BLK10]]:
+// CHECK:    cir.br ^bb[[#RETURN:]]
+// CHECK:  ^bb[[#BLK11]]:
+// CHECK:    cir.br ^bb[[#BLK12:]]
+// CHECK:  ^bb[[#BLK12]]:
+// CHECK:    cir.br ^bb[[#LABEL_LOOP]]
+// CHECK:  ^bb[[#LABEL_LOOP]]:
+// CHECK:    cir.br ^bb[[#BLK14:]]
+// CHECK:  ^bb[[#BLK14]]:
+// CHECK:    cir.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.br ^bb[[#BLK16:]]
+// CHECK:  ^bb[[#BLK16]]:
+// CHECK:    cir.br ^bb[[#RETURN]]
+// CHECK:  ^bb[[#RETURN]]:
+// CHECK:    cir.return
+// CHECK:  }
+// CHECK:}
+
+struct S {};
+struct S get();
+void bar(struct S);
+
+void foo() {
+  {
+    label:
+      bar(get());
+  }
+}
+
+// NOFLAT: cir.func {{.*}} @_Z3foov()
+// NOFLAT:   cir.scope {
+// NOFLAT:     %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["agg.tmp0"]
+// NOFLAT:     cir.br ^bb1
+// NOFLAT:    ^bb1:
+// NOFLAT:     cir.label "label"
+
+extern "C" void action1();
+extern "C" void action2();
+extern "C" void multiple_non_case(int v) {
+  switch (v) {
+    default:
+        action1();
+      l2:
+        action2();
+        break;
+  }
+}
+
+// NOFLAT: cir.func {{.*}} @multiple_non_case
+// NOFLAT: cir.switch
+// NOFLAT: cir.case(default, []) {
+// NOFLAT: cir.call @action1()
+// NOFLAT: cir.br ^[[BB1:[a-zA-Z0-9]+]]
+// NOFLAT: ^[[BB1]]:
+// NOFLAT: cir.label
+// NOFLAT: cir.call @action2()
+// NOFLAT: cir.break
+
+extern "C" void case_follow_label(int v) {
+  switch (v) {
+    case 1:
+    label:
+    case 2:
+      action1();
+      break;
+    default:
+      action2();
+      goto label;
+  }
+}
+
+// NOFLAT: cir.func {{.*}} @case_follow_label
+// NOFLAT: cir.switch
+// NOFLAT: cir.case(equal, [#cir.int<1> : !s32i]) {
+// NOFLAT: cir.label "label"
+// NOFLAT: cir.case(equal, [#cir.int<2> : !s32i]) {
+// NOFLAT: cir.call @action1()
+// NOFLAT: cir.break
+// NOFLAT: cir.case(default, []) {
+// NOFLAT: cir.call @action2()
+// NOFLAT: cir.goto "label"
+
+extern "C" void default_follow_label(int v) {
+  switch (v) {
+    case 1:
+    case 2:
+      action1();
+      break;
+    label:
+    default:
+      action2();
+      goto label;
+  }
+}
+
+// NOFLAT: cir.func {{.*}} @default_follow_label
+// NOFLAT: cir.switch
+// NOFLAT: cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// NOFLAT: cir.call @action1()
+// NOFLAT: cir.break
+// NOFLAT: cir.label "label"
+// NOFLAT: cir.case(default, []) {
+// NOFLAT: cir.call @action2()
+// NOFLAT: cir.goto "label"
diff --git a/clang/test/CIR/Incubator/CodeGen/hello.c b/clang/test/CIR/Incubator/CodeGen/hello.c
new file mode 100644
index 0000000000000..9d6d8f2c05d5b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/hello.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+int printf(const char *restrict, ...);
+
+int main (void) {
+    printf ("Hello, world!\n");
+    return 0;
+}
+
+// CHECK: cir.func private @printf(!cir.ptr<!s8i>, ...) -> !s32i
+// CHECK: cir.global "private" constant cir_private dso_local @".str" = #cir.const_array<"Hello, world!\0A\00" : !cir.array<!s8i x 15>> : !cir.array<!s8i x 15> {alignment = 1 : i64}
+// CHECK: cir.func {{.*}} @main() -> !s32i
+// CHECK:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:   %1 = cir.get_global @printf : !cir.ptr<!cir.func<(!cir.ptr<!s8i>, ...) -> !s32i>>
+// CHECK:   %2 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 15>>
+// CHECK:   %3 = cir.cast array_to_ptrdecay %2 : !cir.ptr<!cir.array<!s8i x 15>> -> !cir.ptr<!s8i>
+// CHECK:   %4 = cir.call @printf(%3) : (!cir.ptr<!s8i>) -> !s32i
+// CHECK:   %5 = cir.const #cir.int<0> : !s32i
+// CHECK:   cir.store{{.*}} %5, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:   %6 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.return %6 : !s32i
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/hot-attr.cpp b/clang/test/CIR/Incubator/CodeGen/hot-attr.cpp
new file mode 100644
index 0000000000000..639021cf99cf6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/hot-attr.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+__attribute__((hot)) int s0(int a, int b) {
+  int x = a + b;
+  return x;
+}
+
+// CIR:      #[[ATTR0:.+]] = #cir<extra({{{.*}}hot = #cir.hot
+// CIR:      cir.func {{.*}} @_Z2s0ii(
+// CIR-SAME:     -> !s32i extra(#[[ATTR0]])
+
+// LLVM: define dso_local i32 @_Z2s0ii({{.*}} #[[#ATTR1:]] {
+// LLVM: attributes #[[#ATTR1]] = {{.*}} hot
diff --git a/clang/test/CIR/Incubator/CodeGen/if-consteval.cpp b/clang/test/CIR/Incubator/CodeGen/if-consteval.cpp
new file mode 100644
index 0000000000000..97468beb0ac5c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/if-consteval.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -std=c++23 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void should_be_used_1();
+void should_be_used_2();
+void should_be_used_3();
+constexpr void should_not_be_used() {}
+
+constexpr void f() {
+  if consteval {
+    should_not_be_used(); // CHECK-NOT: call {{.*}}should_not_be_used
+  } else {
+    should_be_used_1(); // CHECK: call {{.*}}should_be_used_1
+  }
+
+  if !consteval {
+    should_be_used_2(); // CHECK: call {{.*}}should_be_used_2
+  } else {
+    should_not_be_used(); // CHECK-NOT: call {{.*}}should_not_be_used
+  }
+
+  if consteval {
+    should_not_be_used(); // CHECK-NOT: call {{.*}}should_not_be_used
+  }
+
+  if !consteval {
+    should_be_used_3(); // CHECK: call {{.*}}should_be_used_3
+  }
+}
+
+void g() {
+  f();
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/if-constexpr.cpp b/clang/test/CIR/Incubator/CodeGen/if-constexpr.cpp
new file mode 100644
index 0000000000000..5c54f0be893f1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/if-constexpr.cpp
@@ -0,0 +1,92 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void if0() {
+  int x = 0;
+  if constexpr (0 == 0) {
+    // Declare a variable with same name to be sure we handle the
+    // scopes correctly
+    int x = 2;
+  } else {
+    int x = 3;
+  }
+  if constexpr (0 == 1) {
+    int x = 4;
+  } else {
+    int x = 5;
+  }
+  if constexpr (int x = 7; 8 == 8) {
+    int y = x;
+  } else {
+    int y = 2*x;
+  }
+  if constexpr (int x = 9; 8 == 10) {
+    int y = x;
+  } else {
+    int y = 3*x;
+  }
+  if constexpr (10 == 10) {
+    int x = 20;
+  }
+  if constexpr (10 == 11) {
+    int x = 30;
+  }
+  if constexpr (int x = 70; 80 == 80) {
+    int y = 10*x;
+  }
+  if constexpr (int x = 90; 80 == 100) {
+    int y = 11*x;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z3if0v() {{.*}}
+// CHECK: cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.const #cir.int<2> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %3, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.const #cir.int<5> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %3, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {{.*}}
+// CHECK-NEXT:   %4 = cir.const #cir.int<7> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %4, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT:   %5 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %5, %3 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {{.*}}
+// CHECK-NEXT:   %4 = cir.const #cir.int<9> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %4, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT:   %5 = cir.const #cir.int<3> : !s32i loc({{.*}})
+// CHECK-NEXT:   %6 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i loc({{.*}})
+// CHECK-NEXT:   %7 = cir.binop(mul, %5, %6) nsw : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %7, %3 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.const #cir.int<20> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %3, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {{.*}}
+// CHECK-NEXT:   %4 = cir.const #cir.int<70> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %4, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT:   %5 = cir.const #cir.int<10> : !s32i loc({{.*}})
+// CHECK-NEXT:   %6 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i loc({{.*}})
+// CHECK-NEXT:   %7 = cir.binop(mul, %5, %6) nsw : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %7, %3 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {{.*}}
+// CHECK-NEXT:   %3 = cir.const #cir.int<90> : !s32i loc({{.*}})
+// CHECK-NEXT:   cir.store{{.*}} %3, %2 : !s32i, !cir.ptr<!s32i> loc({{.*}})
+// CHECK-NEXT: } loc({{.*}})
+// CHECK-NEXT: cir.return loc({{.*}})
diff --git a/clang/test/CIR/Incubator/CodeGen/implicit-return.cpp b/clang/test/CIR/Incubator/CodeGen/implicit-return.cpp
new file mode 100644
index 0000000000000..18eb6bad9f991
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/implicit-return.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CHECK-O0
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CHECK-O2
+
+void ret_void() {}
+
+//      CHECK-O0: cir.func {{.*}} @_Z8ret_voidv()
+// CHECK-O0-NEXT:   cir.return
+// CHECK-O0-NEXT: }
+
+//      CHECK-O2: cir.func {{.*}} @_Z8ret_voidv()
+// CHECK-O2-NEXT:   cir.return
+// CHECK-O2-NEXT: }
+
+int ret_non_void() {}
+
+//      CHECK-O0: cir.func {{.*}} @_Z12ret_non_voidv() -> !s32i
+// CHECK-O0-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK-O0-NEXT:   cir.trap
+// CHECK-O0-NEXT: }
+
+//      CHECK-O2: cir.func {{.*}} @_Z12ret_non_voidv() -> !s32i
+// CHECK-O2-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK-O2-NEXT:   cir.unreachable
+// CHECK-O2-NEXT: }
diff --git a/clang/test/CIR/Incubator/CodeGen/inc-bool.cpp b/clang/test/CIR/Incubator/CodeGen/inc-bool.cpp
new file mode 100644
index 0000000000000..5b451bc7ff6ef
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/inc-bool.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++14 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo(bool x) {
+  x++;
+}
+
+// CHECK:  cir.func {{.*}} @_Z3foob(%arg0: !cir.bool loc({{.*}}))
+// CHECK:    [[ALLOC_X:%.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["x", init] {alignment = 1 : i64}
+// CHECK:    cir.store{{.*}} %arg0, [[ALLOC_X]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:    {{.*}} = cir.load{{.*}} [[ALLOC_X]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK:    [[TRUE:%.*]] = cir.const #true
+// CHECK:    cir.store{{.*}} [[TRUE]], [[ALLOC_X]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:    cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/inc-dec.cpp b/clang/test/CIR/Incubator/CodeGen/inc-dec.cpp
new file mode 100644
index 0000000000000..40bebc69162e3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/inc-dec.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+unsigned id0() {
+  unsigned a = 1;
+  return ++a;
+}
+
+// CHECK: cir.func {{.*}} @_Z3id0v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#BEFORE_A:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#AFTER_A:]] = cir.unary(inc, %[[#BEFORE_A]])
+// CHECK: cir.store{{.*}} %[[#AFTER_A]], %[[#A]]
+// CHECK: cir.store{{.*}} %[[#AFTER_A]], %[[#RET]]
+
+
+unsigned id1() {
+  unsigned a = 1;
+  return --a;
+}
+
+// CHECK: cir.func {{.*}} @_Z3id1v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#BEFORE_A:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#AFTER_A:]] = cir.unary(dec, %[[#BEFORE_A]])
+// CHECK: cir.store{{.*}} %[[#AFTER_A]], %[[#A]]
+// CHECK: cir.store{{.*}} %[[#AFTER_A]], %[[#RET]]
+
+unsigned id2() {
+  unsigned a = 1;
+  return a++;
+}
+
+// CHECK: cir.func {{.*}} @_Z3id2v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#BEFORE_A:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#AFTER_A:]] = cir.unary(inc, %[[#BEFORE_A]])
+// CHECK: cir.store{{.*}} %[[#AFTER_A]], %[[#A]]
+// CHECK: cir.store{{.*}} %[[#BEFORE_A]], %[[#RET]]
+
+unsigned id3() {
+  unsigned a = 1;
+  return a--;
+}
+
+// CHECK: cir.func {{.*}} @_Z3id3v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#BEFORE_A:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#AFTER_A:]] = cir.unary(dec, %[[#BEFORE_A]])
+// CHECK: cir.store{{.*}} %[[#AFTER_A]], %[[#A]]
+// CHECK: cir.store{{.*}} %[[#BEFORE_A]], %[[#RET]]
diff --git a/clang/test/CIR/Incubator/CodeGen/inheriting-constructor.cpp b/clang/test/CIR/Incubator/CodeGen/inheriting-constructor.cpp
new file mode 100644
index 0000000000000..9d3d673e6f654
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/inheriting-constructor.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR_ITANIUM --input-file=%t.cir %s
+
+struct A { A(int); virtual ~A(); };
+struct B : A { using A::A; ~B(); };
+B::~B() {}
+
+B b(123);
+
+// CIR_ITANIUM-LABEL: @_ZN1BD2Ev
+// CIR_ITANIUM-LABEL: @_ZN1BD1Ev
+// CIR_ITANIUM-LABEL: @_ZN1BD0Ev
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/init_priority.cpp b/clang/test/CIR/Incubator/CodeGen/init_priority.cpp
new file mode 100644
index 0000000000000..a6780a996f09c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/init_priority.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=LLVM
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=OGCG
+
+// CIR: attributes {cir.global_ctors = [#cir.global_ctor<"__cxx_global_var_init", 101>]
+// LLVM: @llvm.global_ctors = appending constant{{.*}}{ i32 101, ptr @__cxx_global_var_init, ptr null }
+// OGCG: @llvm.global_ctors = appending global{{.*}}{ i32 101, ptr @_GLOBAL__I_000101, ptr null }
+class A {
+public:
+  A(int, int);
+} A __attribute((init_priority(101)))(0, 0);
+
+// CIR-LABEL: cir.func internal private @__cxx_global_var_init() global_ctor(101)  {
+// LLVM-LABEL: define internal void @__cxx_global_var_init() {
+// OGCG-LABEL: define internal void @_GLOBAL__I_000101() {{.*}} section ".text.startup" {
diff --git a/clang/test/CIR/Incubator/CodeGen/initlist-ptr-ptr.cpp b/clang/test/CIR/Incubator/CodeGen/initlist-ptr-ptr.cpp
new file mode 100644
index 0000000000000..87241c5e27dc9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/initlist-ptr-ptr.cpp
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+namespace std {
+template <class b> class initializer_list {
+  const b *array_start;
+  const b *array_end;
+};
+template <class b>
+void f(initializer_list<b>) {;}
+void test() {
+  f({"xy","uv"});
+}
+} // namespace std
+
+// CIR: [[INITLIST_TYPE:!.*]] = !cir.record<class "std::initializer_list<const char *>" {!cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!cir.ptr<!s8i>>}>
+// CIR: cir.func {{.*}} @_ZSt1fIPKcEvSt16initializer_listIT_E(%arg0: [[INITLIST_TYPE]]
+// CIR: [[LOCAL:%.*]] = cir.alloca [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>,
+// CIR: cir.store{{.*}} %arg0, [[LOCAL]] : [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>
+// CIR: cir.return
+
+// CIR: cir.global "private" constant cir_private dso_local [[STR_XY:@.*]] = #cir.const_array<"xy\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>
+// CIR: cir.global "private" constant cir_private dso_local [[STR_UV:@.*]] = #cir.const_array<"uv\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>
+
+// CIR: cir.func {{.*}} @_ZSt4testv()
+// CIR: cir.scope {
+// CIR: [[INITLIST_LOCAL:%.*]] = cir.alloca [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>,
+// CIR: [[LOCAL_ELEM_ARRAY:%.*]] = cir.alloca !cir.array<!cir.ptr<!s8i> x 2>, !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>,
+// CIR: [[ZERO:%.*]] = cir.const #cir.int<0>
+// CIR: [[FIRST_ELEM_PTR:%.*]] = cir.get_element [[LOCAL_ELEM_ARRAY]][[[ZERO]]] : (!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>, !s32i) -> !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[XY_CHAR_ARRAY:%.*]] = cir.get_global [[STR_XY]]  : !cir.ptr<!cir.array<!s8i x 3>>
+// CIR: [[STR_XY_PTR:%.*]] = cir.cast array_to_ptrdecay [[XY_CHAR_ARRAY]] : !cir.ptr<!cir.array<!s8i x 3>> -> !cir.ptr<!s8i>
+// CIR:  cir.store{{.*}} [[STR_XY_PTR]], [[FIRST_ELEM_PTR]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[ONE:%.*]] = cir.const #cir.int<1>
+// CIR: [[NEXT_ELEM_PTR:%.*]] = cir.get_element [[LOCAL_ELEM_ARRAY]][[[ONE]]] : (!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>, !s64i) -> !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[UV_CHAR_ARRAY:%.*]] = cir.get_global [[STR_UV]]  : !cir.ptr<!cir.array<!s8i x 3>>
+// CIR: [[STR_UV_PTR:%.*]] = cir.cast array_to_ptrdecay [[UV_CHAR_ARRAY]] : !cir.ptr<!cir.array<!s8i x 3>> -> !cir.ptr<!s8i>
+// CIR:  cir.store{{.*}} [[STR_UV_PTR]], [[NEXT_ELEM_PTR]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR: [[START_FLD_PTR:%.*]] = cir.get_member [[INITLIST_LOCAL]][0] {name = "array_start"} : !cir.ptr<[[INITLIST_TYPE]]> -> !cir.ptr<!cir.ptr<!cir.ptr<!s8i>>>
+// CIR: [[START_FLD_PTR_AS_PTR_2_CHAR_ARRAY:%.*]] = cir.cast bitcast [[START_FLD_PTR]] : !cir.ptr<!cir.ptr<!cir.ptr<!s8i>>> -> !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>>
+// CIR: cir.store{{.*}} [[LOCAL_ELEM_ARRAY]], [[START_FLD_PTR_AS_PTR_2_CHAR_ARRAY]] : !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>, !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>>
+// CIR: [[ELEM_ARRAY_LEN:%.*]] = cir.const #cir.int<2>
+// CIR: [[END_FLD_PTR:%.*]] = cir.get_member [[INITLIST_LOCAL]][1] {name = "array_end"} : !cir.ptr<[[INITLIST_TYPE]]> -> !cir.ptr<!cir.ptr<!cir.ptr<!s8i>>>
+// CIR: [[LOCAL_ELEM_ARRAY_END:%.*]] = cir.ptr_stride [[LOCAL_ELEM_ARRAY]], [[ELEM_ARRAY_LEN]] : (!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>, !u64i) -> !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>
+// CIR: [[END_FLD_PTR_AS_PTR_2_CHAR_ARRAY:%.*]] = cir.cast bitcast [[END_FLD_PTR]] : !cir.ptr<!cir.ptr<!cir.ptr<!s8i>>> -> !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>>
+// CIR: cir.store{{.*}} [[LOCAL_ELEM_ARRAY_END]], [[END_FLD_PTR_AS_PTR_2_CHAR_ARRAY]] : !cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>, !cir.ptr<!cir.ptr<!cir.array<!cir.ptr<!s8i> x 2>>>
+// CIR: [[ARG:%.*]] = cir.load{{.*}} [[INITLIST_LOCAL]] : !cir.ptr<[[INITLIST_TYPE]]>, [[INITLIST_TYPE]]
+// CIR: cir.call @_ZSt1fIPKcEvSt16initializer_listIT_E([[ARG]]) : ([[INITLIST_TYPE]]) -> ()
+// CIR: }
+// CIR: cir.return
+// CIR: }
+
+// LLVM: %"class.std::initializer_list<const char *>" = type { ptr, ptr }
+
+// LLVM: @.str = private constant [3 x i8] c"xy\00"
+// LLVM: @.str.1 = private constant [3 x i8] c"uv\00"
+
+// LLVM: define linkonce_odr void @_ZSt1fIPKcEvSt16initializer_listIT_E(%"class.std::initializer_list<const char *>" [[ARG0:%.*]])
+// LLVM: [[LOCAL_PTR:%.*]] = alloca %"class.std::initializer_list<const char *>", i64 1, align 8
+// LLVM: store %"class.std::initializer_list<const char *>" [[ARG0]], ptr [[LOCAL_PTR]], align 8
+// LLVM: ret void
+// LLVM: }
+
+// LLVM: define dso_local void @_ZSt4testv()
+// LLVM:  [[INIT_STRUCT:%.*]] = alloca %"class.std::initializer_list<const char *>", i64 1, align 8
+// LLVM:  [[ELEM_ARRAY_PTR:%.*]] = alloca [2 x ptr], i64 1, align 8
+// LLVM: br label %[[SCOPE_START:.*]]
+// LLVM: [[SCOPE_START]]: ; preds = %0
+// LLVM:  [[PTR_FIRST_ELEM:%.*]] = getelementptr [2 x ptr], ptr [[ELEM_ARRAY_PTR]], i32 0, i64 0
+// LLVM:  store ptr @.str, ptr [[PTR_FIRST_ELEM]], align 8
+// LLVM:  [[PTR_SECOND_ELEM:%.*]] = getelementptr [2 x ptr], ptr [[ELEM_ARRAY_PTR]], i32 0, i64 1
+// LLVM:  store ptr @.str.1, ptr [[PTR_SECOND_ELEM]], align 8
+// LLVM:  [[INIT_START_FLD_PTR:%.*]] = getelementptr %"class.std::initializer_list<const char *>", ptr [[INIT_STRUCT]], i32 0, i32 0
+// LLVM:  store ptr [[ELEM_ARRAY_PTR]], ptr [[INIT_START_FLD_PTR]], align 8
+// LLVM:  [[INIT_END_FLD_PTR:%.*]] = getelementptr %"class.std::initializer_list<const char *>", ptr [[INIT_STRUCT]], i32 0, i32 1
+// LLVM:  [[ELEM_ARRAY_END:%.*]] = getelementptr [2 x ptr], ptr [[ELEM_ARRAY_PTR]], i64 2
+// LLVM:  store ptr [[ELEM_ARRAY_END]], ptr [[INIT_END_FLD_PTR]], align 8
+// LLVM:  [[ARG2PASS:%.*]] = load %"class.std::initializer_list<const char *>", ptr [[INIT_STRUCT]], align 8
+// LLVM:  call void @_ZSt1fIPKcEvSt16initializer_listIT_E(%"class.std::initializer_list<const char *>" [[ARG2PASS]])
+// LLVM:  br label %[[SCOPE_END:.*]]
+// LLVM: [[SCOPE_END]]: ; preds = %[[SCOPE_START]]
+// LLVM:  ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/initlist-ptr-unsigned.cpp b/clang/test/CIR/Incubator/CodeGen/initlist-ptr-unsigned.cpp
new file mode 100644
index 0000000000000..6c94eb8a93e4d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/initlist-ptr-unsigned.cpp
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+namespace std {
+template <class b> class initializer_list {
+  const b *c;
+  unsigned long len;
+};
+template <class b>
+void f(initializer_list<b>) {;}
+void test() {
+  f({7});
+}
+} // namespace std
+
+// CIR: [[INITLIST_TYPE:!.*]] = !cir.record<class "std::initializer_list<int>" {!cir.ptr<!s32i>, !u64i}>
+
+// CIR: cir.func {{.*}} @_ZSt1fIiEvSt16initializer_listIT_E(%arg0: [[INITLIST_TYPE]]
+// CIR: [[REG0:%.*]] = cir.alloca [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>,
+// CIR: cir.store{{.*}} %arg0, [[REG0]] : [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>
+// CIR: cir.return
+
+// CIR: cir.func {{.*}} @_ZSt4testv()
+// CIR: cir.scope {
+// CIR: [[LIST_PTR:%.*]] = cir.alloca [[INITLIST_TYPE]], !cir.ptr<[[INITLIST_TYPE]]>,
+// CIR: [[ARRAY:%.*]] = cir.alloca !cir.array<!s32i x 1>, !cir.ptr<!cir.array<!s32i x 1>>,
+// CIR: [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CIR: [[FIRST_ELEM:%.*]] = cir.get_element [[ARRAY]][[[ZERO]]] : (!cir.ptr<!cir.array<!s32i x 1>>, !s32i) -> !cir.ptr<!s32i>
+// CIR: [[SEVEN:%.*]] = cir.const #cir.int<7> : !s32i
+// CIR: cir.store{{.*}} [[SEVEN]], [[FIRST_ELEM]] : !s32i, !cir.ptr<!s32i>
+// CIR: [[FLD_C:%.*]] = cir.get_member [[LIST_PTR]][0] {name = "c"} : !cir.ptr<[[INITLIST_TYPE]]> -> !cir.ptr<!cir.ptr<!s32i>>
+// CIR: [[ARRAY_PTR:%.*]] = cir.cast bitcast [[FLD_C]] : !cir.ptr<!cir.ptr<!s32i>> -> !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
+// CIR: cir.store{{.*}} [[ARRAY]], [[ARRAY_PTR]] : !cir.ptr<!cir.array<!s32i x 1>>, !cir.ptr<!cir.ptr<!cir.array<!s32i x 1>>>
+// CIR: [[LENGTH_ONE:%.*]] = cir.const #cir.int<1>
+// CIR: [[FLD_LEN:%.*]] = cir.get_member [[LIST_PTR]][1] {name = "len"} : !cir.ptr<[[INITLIST_TYPE]]> -> !cir.ptr<!u64i>
+// CIR: cir.store{{.*}} [[LENGTH_ONE]], [[FLD_LEN]] : !u64i, !cir.ptr<!u64i>
+// CIR: [[ARG2PASS:%.*]] = cir.load{{.*}} [[LIST_PTR]] : !cir.ptr<[[INITLIST_TYPE]]>,  [[INITLIST_TYPE]]
+// CIR: cir.call @_ZSt1fIiEvSt16initializer_listIT_E([[ARG2PASS]]) : ([[INITLIST_TYPE]]) -> ()
+// CIR: }
+// CIR: cir.return
+// CIR: }
+
+// LLVM: %"class.std::initializer_list<int>" = type { ptr, i64 }
+// LLVM: define linkonce_odr void @_ZSt1fIiEvSt16initializer_listIT_E(%"class.std::initializer_list<int>" [[ARG:%.*]])
+// LLVM:  [[LOCAL:%.*]] = alloca %"class.std::initializer_list<int>", i64 1, align 8
+// LLVM:  store %"class.std::initializer_list<int>" [[ARG]], ptr [[LOCAL]], align 8
+
+// LLVM: define dso_local void @_ZSt4testv()
+// LLVM:  [[INIT_STRUCT:%.*]] = alloca %"class.std::initializer_list<int>", i64 1, align 8
+// LLVM:  [[ELEM_ARRAY:%.*]] = alloca [1 x i32], i64 1, align 4
+// LLVM: br label %[[SCOPE_START:.*]]
+// LLVM: [[SCOPE_START]]: ; preds = %0
+// LLVM:  [[PTR_FIRST_ELEM:%.*]] = getelementptr [1 x i32], ptr [[ELEM_ARRAY]], i32 0, i64 0
+// LLVM:  store i32 7, ptr [[PTR_FIRST_ELEM]], align 4
+// LLVM:  [[ELEM_ARRAY_PTR:%.*]] = getelementptr %"class.std::initializer_list<int>", ptr [[INIT_STRUCT]], i32 0, i32 0
+// LLVM:  store ptr [[ELEM_ARRAY]], ptr [[ELEM_ARRAY_PTR]], align 8
+// LLVM:  [[INIT_LEN_FLD:%.*]] = getelementptr %"class.std::initializer_list<int>", ptr [[INIT_STRUCT]], i32 0, i32 1
+// LLVM:  store i64 1, ptr [[INIT_LEN_FLD]], align 8
+// LLVM:  [[ARG2PASS:%.*]] = load %"class.std::initializer_list<int>", ptr [[INIT_STRUCT]], align 8
+// LLVM:  call void @_ZSt1fIiEvSt16initializer_listIT_E(%"class.std::initializer_list<int>" [[ARG2PASS]])
+// LLVM:  br label %[[SCOPE_END:.*]]
+// LLVM: [[SCOPE_END]]: ; preds = %[[SCOPE_START]]
+// LLVM:  ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/int-wrap.c b/clang/test/CIR/Incubator/CodeGen/int-wrap.c
new file mode 100644
index 0000000000000..f23e216143fca
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/int-wrap.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fwrapv -fclangir -emit-cir %s -o - 2>&1 | FileCheck %s --check-prefix=WRAP
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - 2>&1 | FileCheck %s --check-prefix=NOWRAP
+
+#define N 42
+
+typedef struct {
+  const char* ptr;
+} A;
+
+// WRAP:   cir.binop(sub, {{.*}}, {{.*}}) : !s32i
+// NOWRAP: cir.binop(sub, {{.*}}, {{.*}}) nsw : !s32i
+void foo(int* ar, int len) {
+  int x = ar[len - N];
+}
+
+// check that the ptr_stride is generated in both cases (i.e. no NYI fails)
+
+// WRAP:    cir.ptr_stride
+// NOWRAP:  cir.ptr_stride
+void bar(A* a, unsigned n) {
+  a->ptr = a->ptr + n;
+}
+
+// WRAP    cir.ptr_stride
+// NOWRAP: cir.ptr_stride
+void baz(A* a) {
+  a->ptr--;
+}
+
+
diff --git a/clang/test/CIR/Incubator/CodeGen/int128.cpp b/clang/test/CIR/Incubator/CodeGen/int128.cpp
new file mode 100644
index 0000000000000..515cff30bb5cf
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/int128.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+// TODO: remove the -fno-clangir-call-conv-lowering flag when ABI lowering for
+//       int128 is supported.
+
+// CHECK-LABEL: @_Z5test1n
+// LLVM-LABEL: @_Z5test1n
+__int128 test1(__int128 x) {
+  return x;
+  // CHECK: cir.return %{{.+}} : !s128i
+  // LLVM: ret i128 %{{.+}}
+}
+
+// CHECK-LABEL: @_Z5test2o
+// LLVM-LABEL: @_Z5test2o
+unsigned __int128 test2(unsigned __int128 x) {
+  return x;
+  // CHECK: cir.return %{{.+}} : !u128i
+  // LLVM: ret i128 %{{.+}}
+}
+
+// CHECK-LABEL: @_Z11unary_arithn
+// LLVM-LABEL: @_Z11unary_arithn
+__int128 unary_arith(__int128 x) {
+  return ++x;
+  // CHECK: %{{.+}} = cir.unary(inc, %{{.+}}) nsw : !s128i, !s128i
+  // LLVM: %{{.+}} = add nsw i128 %{{.+}}, 1
+}
+
+// CHECK-LABEL: @_Z12binary_arithnn
+// LLVM-LABEL: @_Z12binary_arithnn
+__int128 binary_arith(__int128 x, __int128 y) {
+  return x + y;
+  // CHECK: %{{.+}} = cir.binop(add, %{{.+}}, %{{.+}}) nsw : !s128i
+  // LLVM: %{{.+}} = add nsw i128 %{{.+}}, %{{.+}}
+}
+
+volatile int int_var;
+volatile double double_var;
+
+// CHECK-LABEL: @_Z19integral_conversionn
+// LLVM-LABEL: @_Z19integral_conversionn
+__int128 integral_conversion(__int128 x) {
+  int_var = x;
+  // CHECK: %[[#VAL:]] = cir.cast integral %{{.+}} : !s128i -> !s32i
+  // LLVM: %{{.+}} = trunc i128 %{{.+}} to i32
+
+  return int_var;
+  // CHECK: %{{.+}} = cir.cast integral %{{.+}} : !s32i -> !s128i
+  // LLVM: %{{.+}} = sext i32 %{{.+}} to i128
+}
+
+// CHECK-LABEL: @_Z16float_conversionn
+// LLVM-LABEL: @_Z16float_conversionn
+__int128 float_conversion(__int128 x) {
+  double_var = x;
+  // CHECK: %[[#VAL:]] = cir.cast int_to_float %{{.+}} : !s128i -> !cir.double
+  // LLVM: %{{.+}} = sitofp i128 %{{.+}} to double
+
+  return double_var;
+  // CHECK: %{{.+}} = cir.cast float_to_int %{{.+}} : !cir.double -> !s128i
+  // LLVM: %{{.+}} = fptosi double %{{.+}} to i128
+}
+
+// CHECK-LABEL: @_Z18boolean_conversionn
+// LLVM-LABEL: @_Z18boolean_conversionn
+bool boolean_conversion(__int128 x) {
+  return x;
+  // CHECK: %{{.+}} = cir.cast int_to_bool %{{.+}} : !s128i -> !cir.bool
+  // LLVM: %{{.+}} = icmp ne i128 %{{.+}}, 0
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/kr-func-promote.c b/clang/test/CIR/Incubator/CodeGen/kr-func-promote.c
new file mode 100644
index 0000000000000..45d92b34bd836
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/kr-func-promote.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+// CHECK: cir.func{{.*}} @foo(%arg0: !s32i
+// CHECK:   %0 = cir.alloca !s16i, !cir.ptr<!s16i>, ["x", init]
+// CHECK:   %1 = cir.cast integral %arg0 : !s32i -> !s16i
+// CHECK:   cir.store %1, %0 : !s16i, !cir.ptr<!s16i>
+void foo(x) short x; {}
+
+// CHECK: cir.func {{.*}} @bar(%arg0: !cir.double
+// CHECK:   %0 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["f", init]
+// CHECK:   %1 = cir.cast floating %arg0 : !cir.double -> !cir.float
+// CHECK:   cir.store %1, %0 : !cir.float, !cir.ptr<!cir.float>
+void bar(f) float f; {}
diff --git a/clang/test/CIR/Incubator/CodeGen/label-values.c b/clang/test/CIR/Incubator/CodeGen/label-values.c
new file mode 100644
index 0000000000000..bf9bc0b603d90
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/label-values.c
@@ -0,0 +1,275 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir  %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm  %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm  %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+void A(void) {
+  void *ptr = &&A;
+  goto *ptr;
+A:
+  return;
+}
+// CIR:  cir.func {{.*}} @A
+// CIR:    [[PTR:%.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr", init] {alignment = 8 : i64}
+// CIR:    [[BLOCK:%.*]] = cir.blockaddress <@A, "A"> -> !cir.ptr<!void>
+// CIR:    cir.store align(8) [[BLOCK]], [[PTR]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR:    [[BLOCKADD:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:    cir.br ^bb1([[BLOCKADD]] : !cir.ptr<!void>)
+// CIR:  ^bb1([[PHI:%.*]]: !cir.ptr<!void> {{.*}}):  // pred: ^bb0
+// CIR:    cir.indirectbr [[PHI]] : <!void>, [
+// CIR:    ^bb2
+// CIR:    ]
+// CIR:  ^bb2:  // pred: ^bb1
+// CIR:    cir.label "A"
+// CIR:    cir.return
+//
+// LLVM: define dso_local void @A()
+// LLVM:   [[PTR:%.*]] = alloca ptr, i64 1, align 8
+// LLVM:   store ptr blockaddress(@A, %[[A:.*]]), ptr [[PTR]], align 8
+// LLVM:   [[BLOCKADD:%.*]] = load ptr, ptr [[PTR]], align 8
+// LLVM:   br label %[[indirectgoto:.*]]
+// LLVM: [[indirectgoto]]:                                                ; preds = %[[ENTRY:.*]]
+// LLVM:  [[PHI:%.*]] = phi ptr [ [[BLOCKADD]], %[[ENTRY]] ]
+// LLVM:  indirectbr ptr [[PHI]], [label %[[A]]]
+// LLVM: [[A]]:                                                ; preds = %[[indirectgoto]]
+// LLVM:   ret void
+
+// OGCG: define dso_local void @A()
+// OGCG:   [[PTR:%.*]] = alloca ptr, align 8
+// OGCG:   store ptr blockaddress(@A, %A), ptr [[PTR]], align 8
+// OGCG:   [[BLOCKADD:%.*]] = load ptr, ptr [[PTR]], align 8
+// OGCG:   br label %indirectgoto
+// OGCG: A:                                                ; preds = %indirectgoto
+// OGCG:   ret void
+// OGCG: indirectgoto:                                     ; preds = %entry
+// OGCG:   %indirect.goto.dest = phi ptr [ [[BLOCKADD]], %entry ]
+// OGCG:   indirectbr ptr %indirect.goto.dest, [label %A]
+
+void B(void) {
+B:
+  void *ptr = &&B;
+  goto *ptr;
+}
+
+// CIR:  cir.func {{.*}} @B()
+// CIR:    [[PTR:%.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr", init] {alignment = 8 : i64}
+// CIR:    cir.br ^bb1
+// CIR:   ^bb1: // 2 preds: ^bb0, ^bb2
+// CIR:    cir.label "B"
+// CIR:    [[BLOCK:%.*]] = cir.blockaddress <@B, "B"> -> !cir.ptr<!void>
+// CIR:    cir.store align(8) [[BLOCK]], [[PTR]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR:    [[BLOCKADD:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:    cir.br ^bb2([[BLOCKADD]] : !cir.ptr<!void>)
+// CIR:  ^bb2([[PHI:%.*]]: !cir.ptr<!void> {{.*}}):  // pred: ^bb1
+// CIR:    cir.indirectbr [[PHI]] : <!void>, [
+// CIR-NEXT:    ^bb1
+// CIR:    ]
+
+// LLVM: define dso_local void @B
+// LLVM:   %[[PTR:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   br label %[[B:.*]]
+// LLVM: [[B]]:
+// LLVM:   store ptr blockaddress(@B, %[[B]]), ptr %[[PTR]], align 8
+// LLVM:   [[BLOCKADD:%.*]] = load ptr, ptr %[[PTR]], align 8
+// LLVM:   br label %[[indirectgoto:.*]]
+// LLVM: [[indirectgoto]]:
+// LLVM:   [[PHI:%.*]] = phi ptr [ [[BLOCKADD]], %[[B]] ]
+// LLVM:   indirectbr ptr [[PHI]], [label %[[B]]]
+
+// OGCG: define dso_local void @B
+// OGCG:   [[PTR:%.*]] = alloca ptr, align 8
+// OGCG:   br label %B
+// OGCG: B:                                                ; preds = %indirectgoto, %entry
+// OGCG:   store ptr blockaddress(@B, %B), ptr [[PTR]], align 8
+// OGCG:   [[BLOCKADD:%.*]] = load ptr, ptr [[PTR]], align 8
+// OGCG:   br label %indirectgoto
+// OGCG: indirectgoto:                                     ; preds = %B
+// OGCG:   %indirect.goto.dest = phi ptr [ [[BLOCKADD]], %B ]
+// OGCG:   indirectbr ptr %indirect.goto.dest, [label %B]
+
+void C(int x) {
+  void *ptr = (x == 0) ? &&A : &&B;
+  goto *ptr;
+A:
+    return;
+B:
+    return;
+}
+
+// CIR:  cir.func {{.*}} @C
+// CIR:    [[BLOCK1:%.*]] = cir.blockaddress <@C, "A"> -> !cir.ptr<!void>
+// CIR:    [[BLOCK2:%.*]] = cir.blockaddress <@C, "B"> -> !cir.ptr<!void>
+// CIR:    [[COND:%.*]] = cir.select if [[CMP:%.*]] then [[BLOCK1]] else [[BLOCK2]] : (!cir.bool, !cir.ptr<!void>, !cir.ptr<!void>) -> !cir.ptr<!void>
+// CIR:    cir.store align(8) [[COND]], [[PTR:%.*]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR:    [[BLOCKADD:%.*]] = cir.load align(8) [[PTR]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:    cir.br ^bb2([[BLOCKADD]] : !cir.ptr<!void>)
+// CIR:  ^bb1:  // 2 preds: ^bb3, ^bb4
+// CIR:    cir.return
+// CIR:  ^bb2([[PHI:%.*]]: !cir.ptr<!void> {{.*}}):  // pred: ^bb0
+// CIR:    cir.indirectbr [[PHI]] : <!void>, [
+// CIR-NEXT:    ^bb3,
+// CIR-NEXT:    ^bb4
+// CIR:    ]
+// CIR:  ^bb3:  // pred: ^bb2
+// CIR:    cir.label "A"
+// CIR:    cir.br ^bb1
+// CIR:  ^bb4:  // pred: ^bb2
+// CIR:    cir.label "B"
+// CIR:    cir.br ^bb1
+
+// LLVM: define dso_local void @C(i32 %0)
+// LLVM:   [[COND:%.*]] = select i1 [[CMP:%.*]], ptr blockaddress(@C, %[[A:.*]]), ptr blockaddress(@C, %[[B:.*]])
+// LLVM:   store ptr [[COND]], ptr [[PTR:%.*]], align 8
+// LLVM:   [[BLOCKADD:%.*]] = load ptr, ptr [[PTR]], align 8
+// LLVM:   br label %[[indirectgoto:.*]]
+// LLVM: [[RET:.*]]:
+// LLVM:   ret void
+// LLVM: [[indirectgoto]]:
+// LLVM:   [[PHI:%.*]] = phi ptr [ [[BLOCKADD]], %[[ENTRY:.*]] ]
+// LLVM:   indirectbr ptr [[PHI]], [label %[[A]], label %[[B]]]
+// LLVM: [[A]]:
+// LLVM:   br label %[[RET]]
+// LLVM: [[B]]:
+// LLVM:   br label %[[RET]]
+
+// OGCG: define dso_local void @C
+// OGCG:   [[COND:%.*]] = select i1 [[CMP:%.*]], ptr blockaddress(@C, %A), ptr blockaddress(@C, %B)
+// OGCG:   store ptr [[COND]], ptr [[PTR:%.*]], align 8
+// OGCG:   [[BLOCKADD:%.*]] = load ptr, ptr [[PTR]], align 8
+// OGCG:   br label %indirectgoto
+// OGCG: A:                                                ; preds = %indirectgoto
+// OGCG:   br label %return
+// OGCG: B:                                                ; preds = %indirectgoto
+// OGCG:   br label %return
+// OGCG: return:                                           ; preds = %B, %A
+// OGCG:   ret void
+// OGCG: indirectgoto:                                     ; preds = %entry
+// OGCG:   %indirect.goto.dest = phi ptr [ [[BLOCKADD]], %entry ]
+// OGCG:   indirectbr ptr %indirect.goto.dest, [label %A, label %B]
+
+void D(void) {
+  void *ptr = &&A;
+  void *ptr2 = &&A;
+  goto *ptr2;
+A:
+  void *ptr3 = &&A;
+  return;
+}
+
+// CIR:  cir.func {{.*}} @D
+// CIR:    %[[PTR:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr", init]
+// CIR:    %[[PTR2:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr2", init]
+// CIR:    %[[PTR3:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr3", init]
+// CIR:    %[[BLK1:.*]] = cir.blockaddress <@D, "A"> -> !cir.ptr<!void>
+// CIR:    cir.store align(8) %[[BLK1]], %[[PTR]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR:    %[[BLK2:.*]] = cir.blockaddress <@D, "A"> -> !cir.ptr<!void>
+// CIR:    cir.store align(8) %[[BLK2]], %[[PTR2]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR:    %[[BLOCKADD:.*]] = cir.load align(8) %[[PTR2]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:    cir.br ^bb1(%[[BLOCKADD]] : !cir.ptr<!void>)
+// CIR:  ^bb1([[PHI:%*.]]: !cir.ptr<!void> {{.*}}):  // pred: ^bb0
+// CIR:    cir.indirectbr [[PHI]] : <!void>, [
+// CIR-DAG:    ^bb2,
+// CIR-DAG:    ^bb2,
+// CIR-DAG:    ^bb2
+// CIR:    ]
+// CIR:  ^bb2:  // 3 preds: ^bb1, ^bb1, ^bb1
+// CIR:    cir.label "A"
+// CIR:    %[[BLK3:.*]] = cir.blockaddress <@D, "A"> -> !cir.ptr<!void>
+// CIR:    cir.store align(8) %[[BLK3]], %[[PTR3]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CIR:    cir.return
+
+// LLVM: define dso_local void @D
+// LLVM:   %[[PTR:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[PTR2:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[PTR3:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   store ptr blockaddress(@D, %[[A:.*]]), ptr %[[PTR]], align 8
+// LLVM:   store ptr blockaddress(@D, %[[A]]), ptr %[[PTR2]], align 8
+// LLVM:   %[[BLOCKADD:.*]] = load ptr, ptr %[[PTR2]], align 8
+// LLVM:   br label %[[indirectgoto:.*]]
+// LLVM: [[indirectgoto]]:
+// LLVM:   [[PHI:%.*]] = phi ptr [ %[[BLOCKADD]], %[[ENTRY:.*]] ]
+// LLVM:   indirectbr ptr [[PHI]], [label %[[A]], label %[[A]], label %[[A]]]
+// LLVM: [[A]]:
+// LLVM:   store ptr blockaddress(@D, %[[A]]), ptr %[[PTR3]], align 8
+// LLVM:   ret void
+
+// OGCG: define dso_local void @D
+// OGCG:   %[[PTR:.*]] = alloca ptr, align 8
+// OGCG:   %[[PTR2:.*]] = alloca ptr, align 8
+// OGCG:   %[[PTR3:.*]] = alloca ptr, align 8
+// OGCG:   store ptr blockaddress(@D, %A), ptr %[[PTR]], align 8
+// OGCG:   store ptr blockaddress(@D, %A), ptr %[[PTR2]], align 8
+// OGCG:   %[[BLOCKADD:.*]] = load ptr, ptr %[[PTR2]], align 8
+// OGCG:   br label %indirectgoto
+// OGCG: A:                                                ; preds = %indirectgoto, %indirectgoto, %indirectgoto
+// OGCG:   store ptr blockaddress(@D, %A), ptr %[[PTR3]], align 8
+// OGCG:   ret void
+// OGCG: indirectgoto:                                     ; preds = %entry
+// OGCG:   %indirect.goto.dest = phi ptr [ %[[BLOCKADD]], %entry ]
+// OGCG:   indirectbr ptr %indirect.goto.dest, [label %A, label %A, label %A]
+
+// This test checks that CIR preserves insertion order of blockaddresses
+// for indirectbr, even if some were resolved immediately and others later.
+void E(void) {
+  void *ptr = &&D;
+  void *ptr2 = &&C;
+A:
+B:
+  void *ptr3 = &&B;
+  void *ptr4 = &&A;
+C:
+D:
+  return;
+}
+
+//CIR:  cir.func {{.*}} @E()
+//CIR:  ^bb1({{.*}}: !cir.ptr<!void> {{.*}}):  // no predecessors
+//CIR:    cir.indirectbr {{.*}} poison : <!void>, [
+//CIR-NEXT:    ^bb5,
+//CIR-NEXT:    ^bb4,
+//CIR-NEXT:    ^bb3,
+//CIR-NEXT:    ^bb2
+//CIR:    ]
+//CIR:  ^bb2:  // 2 preds: ^bb0, ^bb1
+//CIR:    cir.label "A" loc(#loc65)
+//CIR:  ^bb3:  // 2 preds: ^bb1, ^bb2
+//CIR:    cir.label "B" loc(#loc66)
+//CIR:  ^bb4:  // 2 preds: ^bb1, ^bb3
+//CIR:    cir.label "C"
+//CIR:  ^bb5:  // 2 preds: ^bb1, ^bb4
+//CIR:    cir.label "D"
+
+// LLVM: define dso_local void @E()
+// LLVM:   store ptr blockaddress(@E, %[[D:.*]])
+// LLVM:   store ptr blockaddress(@E, %[[C:.*]])
+// LLVM:   br label %[[A:.*]]
+// LLVM: [[indirectgoto:.*]]:                                                ; No predecessors!
+// LLVM:   indirectbr ptr poison, [label %[[D]], label %[[C]], label %[[B:.*]], label %[[A]]]
+// LLVM: [[A]]:
+// LLVM:   br label %[[B]]
+// LLVM: [[B]]:
+// LLVM:   store ptr blockaddress(@E, %[[B]]), ptr %3, align 8
+// LLVM:   store ptr blockaddress(@E, %[[A]]), ptr %4, align 8
+// LLVM:   br label %8
+// LLVM: [[C]]:
+// LLVM:   br label %9
+// LLVM: [[D]]:
+
+// OGCG: define dso_local void @E() #0 {
+// OGCG:   store ptr blockaddress(@E, %D), ptr %ptr, align 8
+// OGCG:   store ptr blockaddress(@E, %C), ptr %ptr2, align 8
+// OGCG:   br label %A
+// OGCG: A:                                                ; preds = %indirectgoto, %entry
+// OGCG:   br label %B
+// OGCG: B:                                                ; preds = %indirectgoto, %A
+// OGCG:   store ptr blockaddress(@E, %B), ptr %ptr3, align 8
+// OGCG:   store ptr blockaddress(@E, %A), ptr %ptr4, align 8
+// OGCG:   br label %C
+// OGCG: C:                                                ; preds = %B, %indirectgoto
+// OGCG:   br label %D
+// OGCG: D:                                                ; preds = %C, %indirectgoto
+// OGCG:   ret void
+// OGCG: indirectgoto:                                     ; No predecessors!
+// OGCG:   indirectbr ptr poison, [label %D, label %C, label %B, label %A]
diff --git a/clang/test/CIR/Incubator/CodeGen/lalg.c b/clang/test/CIR/Incubator/CodeGen/lalg.c
new file mode 100644
index 0000000000000..14ebd807d002c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/lalg.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o - | FileCheck %s
+
+double dot() {
+  double x = 0.0;
+  double y = 0.0f;
+  double result = x * y;
+  return result;
+}
+
+//      CHECK: %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["x", init]
+// CHECK-NEXT: %2 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["y", init]
+// CHECK-NEXT: %3 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["result", init]
+// CHECK-NEXT: %4 = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CHECK-NEXT: cir.store{{.*}} %4, %1 : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %5 = cir.const #cir.fp<0.000000e+00> : !cir.float
+// CHECK-NEXT: %6 = cir.cast floating %5 : !cir.float -> !cir.double
+// CHECK-NEXT: cir.store{{.*}} %6, %2 : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %7 = cir.load{{.*}} %1 : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %8 = cir.load{{.*}} %2 : !cir.ptr<!cir.double>, !cir.double
+// CHECK-NEXT: %9 = cir.binop(mul, %7, %8) : !cir.double
diff --git a/clang/test/CIR/Incubator/CodeGen/lambda.cpp b/clang/test/CIR/Incubator/CodeGen/lambda.cpp
new file mode 100644
index 0000000000000..35cfd054db34f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/lambda.cpp
@@ -0,0 +1,392 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-return-stack-address -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir  -emit-llvm -o - %s \
+// RUN: | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+void fn() {
+  auto a = [](){};
+  a();
+}
+
+//      CHECK-DAG: !rec_A = !cir.record<struct "A" {!s32i}>
+//      CHECK: !rec_anon2E0 = !cir.record<class "anon.0" padded {!u8i}>
+//      CHECK-DAG: !rec_anon2E7 = !cir.record<class "anon.7" {!rec_A}>
+//      CHECK-DAG: !rec_anon2E8 = !cir.record<class "anon.8" {!cir.ptr<!rec_A>}>
+//  CHECK-DAG: module
+
+//      CHECK: cir.func no_inline lambda optnone internal private dso_local @_ZZ2fnvENK3$_0clEv{{.*}})
+
+//      CHECK:   cir.func {{.*}} @_Z2fnv()
+// CHECK-NEXT:     %0 = cir.alloca !rec_anon2E0, !cir.ptr<!rec_anon2E0>, ["a"]
+//      CHECK:   cir.call @_ZZ2fnvENK3$_0clEv
+
+// LLVM-LABEL:  _ZZ2fnvENK3$_0clEv
+// LLVM-SAME: (ptr [[THIS:%.*]])
+// LLVM: [[THIS_ADDR:%.*]] = alloca ptr, i64 1, align 8
+// LLVM: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// LLVM: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// LLVM: ret void
+
+// LLVM-LABEL: _Z2fnv
+// LLVM:  [[a:%.*]] = alloca %class.anon.0, i64 1, align 1
+// FIXME: parameter attributes should be emitted
+// LLVM:  call void @"_ZZ2fnvENK3$_0clEv"(ptr [[a]])
+// COM: LLVM:  call void @"_ZZ2fnvENK3$_0clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[a]])
+// LLVM:  ret void
+
+void l0() {
+  int i;
+  auto a = [&](){ i = i + 1; };
+  a();
+}
+
+// CHECK: cir.func no_inline lambda optnone internal private dso_local @_ZZ2l0vENK3$_0clEv({{.*}})
+
+// CHECK: %0 = cir.alloca !cir.ptr<!rec_anon2E2>, !cir.ptr<!cir.ptr<!rec_anon2E2>>, ["this", init] {alignment = 8 : i64}
+// CHECK: cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_anon2E2>, !cir.ptr<!cir.ptr<!rec_anon2E2>>
+// CHECK: %1 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_anon2E2>>, !cir.ptr<!rec_anon2E2>
+// CHECK: %2 = cir.get_member %1[0] {name = "i"} : !cir.ptr<!rec_anon2E2> -> !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %3 = cir.load{{.*}} %2 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %4 = cir.load{{.*}} %3 : !cir.ptr<!s32i>, !s32i
+// CHECK: %5 = cir.const #cir.int<1> : !s32i
+// CHECK: %6 = cir.binop(add, %4, %5) nsw : !s32i
+// CHECK: %7 = cir.get_member %1[0] {name = "i"} : !cir.ptr<!rec_anon2E2> -> !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %8 = cir.load{{.*}} %7 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: cir.store{{.*}} %6, %8 : !s32i, !cir.ptr<!s32i>
+
+// CHECK-LABEL: _Z2l0v
+
+// LLVM-LABEL: _ZZ2l0vENK3$_0clEv
+// LLVM-SAME: (ptr [[THIS:%.*]])
+// LLVM: [[THIS_ADDR:%.*]] = alloca ptr, i64 1, align 8
+// LLVM: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// LLVM: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// LLVM: [[I:%.*]] = getelementptr %class.anon.2, ptr [[THIS1]], i32 0, i32 0
+// FIXME: getelementptr argument attributes should be emitted
+// COM: LLVM: [[I:%.*]] = getelementptr inbounds nuw %class.anon.0, ptr [[THIS1]], i32 0, i32 0
+// LLVM: [[TMP0:%.*]] = load ptr, ptr [[I]], align 8
+// LLVM: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// LLVM: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+// LLVM: [[I:%.*]] = getelementptr %class.anon.2, ptr [[THIS1]], i32 0, i32
+// COM: LLVM: [[I:%.*]] = getelementptr inbounds nuw %class.anon.0, ptr [[THIS1]], i32 0, i32 0
+// LLVM: [[TMP4:%.*]] = load ptr, ptr [[I]], align 8
+// LLVM: store i32 [[ADD]], ptr [[TMP4]], align 4
+// LLVM: ret void
+
+// LLVM-LABEL: _Z2l0v
+// LLVM:  [[i:%.*]] = alloca i32, i64 1, align 4
+// LLVM:  [[a:%.*]] = alloca %class.anon.2, i64 1, align 8
+// FIXME: getelementptr argument attributes should be emitted
+// COM: LLVM:  [[TMP0:%.*]] = getelementptr inbounds %class.anon.2, ptr [[a]], i32 0, i32 0
+// LLVM:  [[TMP0:%.*]] = getelementptr %class.anon.2, ptr [[a]], i32 0, i32 0
+// LLVM:  store ptr [[i]], ptr [[TMP0]], align 8
+// FIXME: parameter attributes should be emitted
+// COM: LLVM:  call void @"_ZZ2l0vENK3$_0clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[a]])
+// LLVM:  call void @"_ZZ2l0vENK3$_0clEv"(ptr [[a]])
+// LLVM:  ret void
+
+auto g() {
+  int i = 12;
+  return [&] {
+    i += 100;
+    return i;
+  };
+}
+
+// CHECK-LABEL: @_Z1gv()
+// CHECK: %0 = cir.alloca !rec_anon2E3, !cir.ptr<!rec_anon2E3>, ["__retval"] {alignment = 8 : i64}
+// CHECK: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CHECK: %2 = cir.const #cir.int<12> : !s32i
+// CHECK: cir.store{{.*}} %2, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK: %3 = cir.get_member %0[0] {name = "i"} : !cir.ptr<!rec_anon2E3> -> !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: cir.store{{.*}} %1, %3 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %4 = cir.load{{.*}} %0 : !cir.ptr<!rec_anon2E3>, !rec_anon2E3
+// CHECK: cir.return %4 : !rec_anon2E3
+
+// LLVM-LABEL: @_Z1gv()
+// LLVM: [[retval:%.*]] = alloca %class.anon.3, i64 1, align 8
+// LLVM: [[i:%.*]] = alloca i32, i64 1, align 4
+// LLVM: store i32 12, ptr [[i]], align 4
+// LLVM: [[i_addr:%.*]] = getelementptr %class.anon.3, ptr [[retval]], i32 0, i32 0
+// LLVM: store ptr [[i]], ptr [[i_addr]], align 8
+// LLVM: [[tmp:%.*]] = load %class.anon.3, ptr [[retval]], align 8
+// LLVM: ret %class.anon.3 [[tmp]]
+
+auto g2() {
+  int i = 12;
+  auto lam = [&] {
+    i += 100;
+    return i;
+  };
+  return lam;
+}
+
+// Should be same as above because of NRVO
+// CHECK-LABEL: @_Z2g2v()
+// CHECK-NEXT: %0 = cir.alloca !rec_anon2E4, !cir.ptr<!rec_anon2E4>, ["__retval", init] {alignment = 8 : i64}
+// CHECK-NEXT: %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CHECK-NEXT: %2 = cir.const #cir.int<12> : !s32i
+// CHECK-NEXT: cir.store{{.*}} %2, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %3 = cir.get_member %0[0] {name = "i"} : !cir.ptr<!rec_anon2E4> -> !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT: cir.store{{.*}} %1, %3 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK-NEXT: %4 = cir.load{{.*}} %0 : !cir.ptr<!rec_anon2E4>, !rec_anon2E4
+// CHECK-NEXT: cir.return %4 : !rec_anon2E4
+
+// LLVM-LABEL: @_Z2g2v()
+// LLVM: [[retval:%.*]] = alloca %class.anon.4, i64 1, align 8
+// LLVM: [[i:%.*]] = alloca i32, i64 1, align 4
+// LLVM: store i32 12, ptr [[i]], align 4
+// LLVM: [[i_addr:%.*]] = getelementptr %class.anon.4, ptr [[retval]], i32 0, i32 0
+// LLVM: store ptr [[i]], ptr [[i_addr]], align 8
+// LLVM: [[tmp:%.*]] = load %class.anon.4, ptr [[retval]], align 8
+// LLVM: ret %class.anon.4 [[tmp]]
+
+int f() {
+  return g2()();
+}
+
+// CHECK-LABEL: @_Z1fv()
+// CHECK-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %2 = cir.alloca !rec_anon2E4, !cir.ptr<!rec_anon2E4>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK-NEXT:     %3 = cir.call @_Z2g2v() : () -> !rec_anon2E4
+// CHECK-NEXT:     cir.store{{.*}} %3, %2 : !rec_anon2E4, !cir.ptr<!rec_anon2E4>
+// CHECK-NEXT:     %4 = cir.call @_ZZ2g2vENK3$_0clEv(%2) : (!cir.ptr<!rec_anon2E4>) -> !s32i
+// CHECK-NEXT:     cir.store{{.*}} %4, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   }
+// CHECK-NEXT:   %1 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.return %1 : !s32i
+// CHECK-NEXT: }
+
+// LLVM-LABEL: _ZZ2g2vENK3$_0clEv
+// LLVM-SAME: (ptr [[THIS:%.*]])
+// LLVM: [[THIS_ADDR:%.*]] = alloca ptr, i64 1, align 8
+// LLVM: [[I_SAVE:%.*]] = alloca i32, i64 1, align 4
+// LLVM: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// LLVM: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// LLVM: [[I:%.*]] = getelementptr %class.anon.4, ptr [[THIS1]], i32 0, i32 0
+// LLVM: [[TMP0:%.*]] = load ptr, ptr [[I]], align 8
+// LLVM: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// LLVM: [[ADD:%.*]] = add nsw i32 [[TMP1]], 100
+// LLVM: [[I:%.*]] = getelementptr %class.anon.4, ptr [[THIS1]], i32 0, i32 0
+// LLVM: [[TMP4:%.*]] = load ptr, ptr [[I]], align 8
+// LLVM: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+// LLVM: store i32 [[TMP5]], ptr [[I_SAVE]], align 4
+// LLVM: [[TMP6:%.*]] = load i32, ptr [[I_SAVE]], align 4
+// LLVM: ret i32 [[TMP6]]
+
+// LLVM-LABEL: _Z1fv
+// LLVM: [[ref_tmp0:%.*]] = alloca %class.anon.4, i64 1, align 8
+// LLVM: [[ret_val:%.*]] = alloca i32, i64 1, align 4
+// LLVM: br label %[[scope_bb:[0-9]+]]
+// LLVM: [[scope_bb]]:
+// LLVM: [[tmp0:%.*]] = call %class.anon.4 @_Z2g2v()
+// LLVM: store %class.anon.4 [[tmp0]], ptr [[ref_tmp0]], align 8
+// LLVM: [[tmp1:%.*]] = call i32 @"_ZZ2g2vENK3$_0clEv"(ptr [[ref_tmp0]])
+// LLVM: store i32 [[tmp1]], ptr [[ret_val]], align 4
+// LLVM: br label %[[ret_bb:[0-9]+]]
+// LLVM: [[ret_bb]]:
+// LLVM: [[tmp2:%.*]] = load i32, ptr [[ret_val]], align 4
+// LLVM: ret i32 [[tmp2]]
+
+int g3() {
+  auto* fn = +[](int const& i) -> int { return i; };
+  auto task = fn(3);
+  return task;
+}
+
+// lambda operator()
+// CHECK: cir.func no_inline lambda optnone internal private dso_local @_ZZ2g3vENK3$_0clERKi{{.*}}!s32i extra
+
+// lambda __invoke()
+// CHECK:   cir.func no_inline optnone internal private dso_local @_ZZ2g3vEN3$_08__invokeERKi
+
+// lambda operator int (*)(int const&)()
+// CHECK:   cir.func no_inline optnone internal private dso_local @_ZZ2g3vENK3$_0cvPFiRKiEEv
+
+// CHECK-LABEL: @_Z2g3v()
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:     %1 = cir.alloca !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>>, ["fn", init] {alignment = 8 : i64}
+// CHECK:     %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["task", init] {alignment = 4 : i64}
+
+// 1. Use `operator int (*)(int const&)()` to retrieve the fnptr to `__invoke()`.
+// CHECK:     %3 = cir.scope {
+// CHECK:       %7 = cir.alloca !rec_anon2E5, !cir.ptr<!rec_anon2E5>, ["ref.tmp0"] {alignment = 1 : i64}
+// CHECK:       %8 = cir.call @_ZZ2g3vENK3$_0cvPFiRKiEEv(%7) : (!cir.ptr<!rec_anon2E5>) -> !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>
+// CHECK:       %9 = cir.unary(plus, %8) : !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>, !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>
+// CHECK:       cir.yield %9 : !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>
+// CHECK:     }
+
+// 2. Load ptr to `__invoke()`.
+// CHECK:     cir.store{{.*}} %3, %1 : !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>>
+// CHECK:     %4 = cir.scope {
+// CHECK:       %7 = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp1", init] {alignment = 4 : i64}
+// CHECK:       %8 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>>, !cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>
+// CHECK:       %9 = cir.const #cir.int<3> : !s32i
+// CHECK:       cir.store{{.*}} %9, %7 : !s32i, !cir.ptr<!s32i>
+
+// 3. Call `__invoke()`, which effectively executes `operator()`.
+// CHECK:       %10 = cir.call %8(%7) : (!cir.ptr<!cir.func<(!cir.ptr<!s32i>) -> !s32i>>, !cir.ptr<!s32i>) -> !s32i
+// CHECK:       cir.yield %10 : !s32i
+// CHECK:     }
+
+// CHECK:   }
+
+// lambda operator()
+// LLVM-LABEL: _ZZ2g3vENK3$_0clERKi
+// FIXME: argument attributes should be emitted
+// COM: LLVM-SAME: (ptr noundef nonnull align 1 dereferenceable(1) {{%.*}},
+// COM: LLVM-SAME: ptr noundef nonnull align 4 dereferenceable(4){{%.*}}) #0 align 2
+
+// lambda __invoke()
+// LLVM-LABEL: _ZZ2g3vEN3$_08__invokeERKi
+// LLVM-SAME: (ptr [[i:%.*]])
+// LLVM: [[i_addr:%.*]] = alloca ptr, i64 1, align 8
+// LLVM: [[ret_val:%.*]] = alloca i32, i64 1, align 4
+// LLVM: [[unused_capture:%.*]] = alloca %class.anon.5, i64 1, align 1
+// LLVM: store ptr [[i]], ptr [[i_addr]], align 8
+// LLVM: [[TMP0:%.*]] = load ptr, ptr [[i_addr]], align 8
+// FIXME: call and argument attributes should be emitted
+// COM: LLVM: [[CALL:%.*]] =  call noundef i32 @"_ZZ2g3vENK3$_0clERKi"(ptr noundef nonnull align 1 dereferenceable(1) [[unused_capture]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]])
+// LLVM: [[CALL:%.*]] = call i32 @"_ZZ2g3vENK3$_0clERKi"(ptr [[unused_capture]], ptr [[TMP0]])
+// LLVM: store i32 [[CALL]], ptr [[ret_val]], align 4
+// LLVM: %[[ret:.*]] = load i32, ptr [[ret_val]], align 4
+// LLVM: ret i32 %[[ret]]
+
+// lambda operator int (*)(int const&)()
+// LLVM-LABEL: @"_ZZ2g3vENK3$_0cvPFiRKiEEv"
+// LLVM:  store ptr @"_ZZ2g3vEN3$_08__invokeERKi", ptr [[ret_val:%.*]], align 8
+// LLVM:  [[TMP0:%.*]] = load ptr, ptr [[ret_val]], align 8
+// LLVM:  ret ptr [[TMP0]]
+
+// LLVM-LABEL: _Z2g3v
+// LLVM-DAG: [[ref_tmp0:%.*]] = alloca %class.anon.5, i64 1, align 1
+// LLVM-DAG: [[ref_tmp1:%.*]] = alloca i32, i64 1, align 4
+// LLVM-DAG: [[ret_val:%.*]] = alloca i32, i64 1, align 4
+// LLVM-DAG: [[fn_ptr:%.*]] = alloca ptr, i64 1, align 8
+// LLVM-DAG: [[task:%.*]] = alloca i32, i64 1, align 4
+// LLVM: br label %[[scope0_bb:[0-9]+]]
+
+// LLVM: [[scope0_bb]]: {{.*}}; preds = %0
+// LLVM: [[call:%.*]] = call ptr @"_ZZ2g3vENK3$_0cvPFiRKiEEv"(ptr [[ref_tmp0]])
+// LLVM: br label %[[scope1_before:[0-9]+]]
+
+// LLVM: [[scope1_before]]: {{.*}}; preds = %[[scope0_bb]]
+// LLVM: [[tmp0:%.*]] = phi ptr [ [[call]], %[[scope0_bb]] ]
+// LLVM: br label %[[scope1_bb:[0-9]+]]
+
+// LLVM: [[scope1_bb]]: {{.*}}; preds = %[[scope1_before]]
+// LLVM: [[fn:%.*]] = load ptr, ptr [[fn_ptr]], align 8
+// LLVM: store i32 3, ptr [[ref_tmp1]], align 4
+// LLVM: [[call1:%.*]] = call i32 [[fn]](ptr [[ref_tmp1]])
+// LLVM: br label %[[ret_bb:[0-9]+]]
+
+// LLVM: [[ret_bb]]: {{.*}}; preds = %[[scope1_bb]]
+// LLVM: [[tmp1:%.*]] = phi i32 [ [[call1]], %[[scope1_bb]] ]
+// LLVM: store i32 [[tmp1]], ptr [[task]], align 4
+// LLVM: [[tmp2:%.*]] = load i32, ptr [[task]], align 4
+// LLVM: store i32 [[tmp2]], ptr [[ret_val]], align 4
+// LLVM: [[tmp3:%.*]] = load i32, ptr [[ret_val]], align 4
+// LLVM: ret i32 [[tmp3]]
+
+struct A {
+  int a = 111;
+  int foo() { return [*this] { return a; }(); }
+  int bar() { return [this] { return a; }(); }
+};
+// A's default ctor
+// CHECK-LABEL: _ZN1AC1Ev
+
+// lambda operator() in foo()
+// CHECK-LABEL: _ZZN1A3fooEvENKUlvE_clEv
+// CHECK-SAME: ([[ARG:%.*]]: !cir.ptr<!rec_anon2E7>
+// CHECK: [[ARG_ADDR:%.*]] = cir.alloca !cir.ptr<!rec_anon2E7>, !cir.ptr<!cir.ptr<!rec_anon2E7>>, ["this", init] {alignment = 8 : i64}
+// CHECK: [[RETVAL_ADDR:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK: cir.store{{.*}} [[ARG]], [[ARG_ADDR]] : !cir.ptr<!rec_anon2E7>, !cir.ptr<!cir.ptr<!rec_anon2E7>>
+// CHECK: [[CLS_ANNO7:%.*]] = cir.load{{.*}} [[ARG_ADDR]] : !cir.ptr<!cir.ptr<!rec_anon2E7>>, !cir.ptr<!rec_anon2E7>
+// CHECK: [[STRUCT_A:%.*]] = cir.get_member [[CLS_ANNO7]][0] {name = "this"} : !cir.ptr<!rec_anon2E7> -> !cir.ptr<!rec_A>
+// CHECK: [[a:%.*]] = cir.get_member [[STRUCT_A]][0] {name = "a"} : !cir.ptr<!rec_A> -> !cir.ptr<!s32i> loc(#loc70)
+// CHECK: [[TMP0:%.*]] = cir.load{{.*}} [[a]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store{{.*}} [[TMP0]], [[RETVAL_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CHECK: [[RET_VAL:%.*]] = cir.load{{.*}} [[RETVAL_ADDR]] : !cir.ptr<!s32i>,
+// CHECK: cir.return [[RET_VAL]] : !s32i
+
+// LLVM-LABEL: @_ZZN1A3fooEvENKUlvE_clEv
+// LLVM-SAME: (ptr [[ARG:%.*]])
+// LLVM: [[ARG_ADDR:%.*]]  = alloca ptr, i64 1, align 8
+// LLVM: [[RET:%.*]] = alloca i32, i64 1, align 4
+// LLVM: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8
+// LLVM: [[CLS_ANNO7:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8
+// LLVM: [[STRUCT_A:%.*]] = getelementptr %class.anon.7, ptr [[CLS_ANNO7]], i32 0, i32 0
+// LLVM: [[a:%.*]] = getelementptr %struct.A, ptr [[STRUCT_A]], i32 0, i32 0
+// LLVM: [[TMP0:%.*]] = load i32, ptr [[a]], align 4
+// LLVM: store i32 [[TMP0]], ptr [[RET]], align 4
+// LLVM: [[TMP1:%.*]] = load i32, ptr [[RET]], align 4
+// LLVM: ret i32 [[TMP1]]
+
+// A::foo()
+// CHECK-LABEL: @_ZN1A3fooEv
+// CHECK: [[THIS_ARG:%.*]] = cir.alloca !rec_anon2E7, !cir.ptr<!rec_anon2E7>, ["ref.tmp0"] {alignment = 4 : i64}
+// CHECK: cir.call @_ZZN1A3fooEvENKUlvE_clEv([[THIS_ARG]]) : (!cir.ptr<!rec_anon2E7>) -> !s32i
+
+// LLVM-LABEL: _ZN1A3fooEv
+// LLVM: [[this_in_foo:%.*]] =  alloca %class.anon.7, i64 1, align 4
+// LLVM: call i32 @_ZZN1A3fooEvENKUlvE_clEv(ptr [[this_in_foo]])
+
+// lambda operator() in bar()
+// CHECK-LABEL: _ZZN1A3barEvENKUlvE_clEv
+// CHECK-SAME: ([[ARG2:%.*]]: !cir.ptr<!rec_anon2E8>
+// CHECK: [[ARG2_ADDR:%.*]] = cir.alloca !cir.ptr<!rec_anon2E8>, !cir.ptr<!cir.ptr<!rec_anon2E8>>, ["this", init] {alignment = 8 : i64}
+// CHECK: [[RETVAL_ADDR:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK: cir.store{{.*}} [[ARG2]], [[ARG2_ADDR]] : !cir.ptr<!rec_anon2E8>, !cir.ptr<!cir.ptr<!rec_anon2E8>>
+// CHECK: [[CLS_ANNO8:%.*]] = cir.load{{.*}} [[ARG2_ADDR]] : !cir.ptr<!cir.ptr<!rec_anon2E8>>, !cir.ptr<!rec_anon2E8>
+// CHECK: [[STRUCT_A_PTR:%.*]] = cir.get_member [[CLS_ANNO8]][0] {name = "this"} : !cir.ptr<!rec_anon2E8> -> !cir.ptr<!cir.ptr<!rec_A>>
+// CHECK: [[STRUCT_A:%.*]] = cir.load{{.*}} [[STRUCT_A_PTR]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+// CHECK: [[a:%.*]] = cir.get_member [[STRUCT_A]][0] {name = "a"} : !cir.ptr<!rec_A> -> !cir.ptr<!s32i> loc(#loc70)
+// CHECK: [[TMP0:%.*]] = cir.load{{.*}} [[a]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store{{.*}} [[TMP0]], [[RETVAL_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CHECK: [[RET_VAL:%.*]] = cir.load{{.*}} [[RETVAL_ADDR]] : !cir.ptr<!s32i>
+// CHECK: cir.return [[RET_VAL]] : !s32i
+
+// LLVM-LABEL: _ZZN1A3barEvENKUlvE_clEv
+// LLVM-SAME: (ptr [[ARG2:%.*]])
+// LLVM: [[ARG2_ADDR:%.*]]  = alloca ptr, i64 1, align 8
+// LLVM: [[RET:%.*]] = alloca i32, i64 1, align 4
+// LLVM: store ptr [[ARG2]], ptr [[ARG2_ADDR]], align 8
+// LLVM: [[CLS_ANNO8:%.*]] = load ptr, ptr [[ARG2_ADDR]], align 8
+// LLVM: [[STRUCT_A_PTR:%.*]] = getelementptr %class.anon.8, ptr [[CLS_ANNO8]], i32 0, i32 0
+// LLVM: [[STRUCT_A:%.*]] = load ptr, ptr [[STRUCT_A_PTR]], align 8
+// LLVM: [[a:%.*]] = getelementptr %struct.A, ptr [[STRUCT_A]], i32
+// LLVM: [[TMP0:%.*]] = load i32, ptr [[a]], align 4
+// LLVM: store i32 [[TMP0]], ptr [[RET]], align 4
+// LLVM: [[TMP1:%.*]] = load i32, ptr [[RET]], align 4
+// LLVM: ret i32 [[TMP1]]
+
+// A::bar()
+// CHECK-LABEL: _ZN1A3barEv
+// CHECK: [[THIS_ARG:%.*]] = cir.alloca !rec_anon2E8, !cir.ptr<!rec_anon2E8>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK: cir.call @_ZZN1A3barEvENKUlvE_clEv([[THIS_ARG]])
+
+// LLVM-LABEL: _ZN1A3barEv
+// LLVM: [[this_in_bar:%.*]] =  alloca %class.anon.8, i64 1, align 8
+// LLVM: call i32 @_ZZN1A3barEvENKUlvE_clEv(ptr [[this_in_bar]])
+
+int test_lambda_this1(){
+  struct A clsA;
+  int x = clsA.foo();
+  int y = clsA.bar();
+  return x+y;
+}
+
+// CHECK-LABEL: test_lambda_this1
+// Construct A
+// CHECK: cir.call @_ZN1AC1Ev([[A_THIS:%.*]]) : (!cir.ptr<!rec_A>) -> ()
+// CHECK: cir.call @_ZN1A3fooEv([[A_THIS]]) : (!cir.ptr<!rec_A>) -> !s32i
+// CHECK: cir.call @_ZN1A3barEv([[A_THIS]]) : (!cir.ptr<!rec_A>) -> !s32i
+
+// LLVM-LABEL: test_lambda_this1
+// LLVM: [[A_THIS:%.*]] = alloca %struct.A, i64 1, align 4
+// LLVM: call void @_ZN1AC1Ev(ptr [[A_THIS]])
+// LLVM: call i32 @_ZN1A3fooEv(ptr [[A_THIS]])
+// LLVM: call i32 @_ZN1A3barEv(ptr [[A_THIS]])
diff --git a/clang/test/CIR/Incubator/CodeGen/libc.c b/clang/test/CIR/Incubator/CodeGen/libc.c
new file mode 100644
index 0000000000000..fa0332261b725
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/libc.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -fwrapv
+// RUN: FileCheck --check-prefix=CIR_NO_POISON --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -fwrapv
+// RUN: FileCheck --check-prefix=LLVM_NO_POISON --input-file=%t.ll %s
+
+// Should generate CIR's builtin memcpy op.
+void *memcpy(void *, const void *, unsigned long);
+void testMemcpy(void *dst, const void *src, unsigned long size) {
+  memcpy(dst, src, size);
+  // CHECK: cir.libc.memcpy %{{.+}} bytes from %{{.+}} to %{{.+}} : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+}
+
+// Should generate CIR's builtin memmove op.
+void *memmove(void *, const void *, unsigned long);
+void testMemmove(void *src, const void *dst, unsigned long size) {
+  memmove(dst, src, size);
+  // CHECK: cir.libc.memmove %{{.+}} bytes from %{{.+}} to %{{.+}} : !cir.ptr<!void>, !u64i
+  // LLVM: call void @llvm.memmove.{{.+}}.i64(ptr %{{.+}}, ptr %{{.+}}, i64 %{{.+}}, i1 false)
+}
+
+// Should generate CIR's builtin memset op.
+void *memset(void *, int, unsigned long);
+void testMemset(void *dst, int val, unsigned long size) {
+  memset(dst, val, size);
+  // CHECK: cir.libc.memset %{{.+}} bytes from %{{.+}} set to %{{.+}} : !cir.ptr<!void>, !s32i, !u64i
+  // LLVM: call void @llvm.memset.{{.+}}.i64(ptr %{{.+}}, i8 %{{.+}}, i64 %{{.+}}, i1 false)
+}
+
+double fabs(double);
+double testFabs(double x) {
+  return fabs(x);
+  // CHECK: cir.fabs %{{.+}} : !cir.double
+}
+
+float fabsf(float);
+float testFabsf(float x) {
+  return fabsf(x);
+  // CHECK: cir.fabs %{{.+}} : !cir.float
+}
+
+int abs(int);
+int testAbs(int x) {
+  return abs(x);
+  // CHECK: cir.abs %{{.+}} poison : !s32i
+  // LLVM: %{{.+}} = call i32 @llvm.abs.i32(i32 %{{.+}}, i1 true)
+  // CIR_NO_POISON: cir.abs %{{.+}} : !s32i
+  // LLVM_NO_POISON: %{{.+}} = call i32 @llvm.abs.i32(i32 %{{.+}}, i1 false)
+}
+
+long labs(long);
+long testLabs(long x) {
+  return labs(x);
+  // CHECK: cir.abs %{{.+}} poison : !s64i
+  // LLVM: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 true)
+  // CIR_NO_POISON: cir.abs %{{.+}} : !s64i
+  // LLVM_NO_POISON: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 false)
+}
+
+long long llabs(long long);
+long long testLlabs(long long x) {
+  return llabs(x);
+  // CHECK: cir.abs %{{.+}} poison : !s64i
+  // LLVM: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 true)
+  // CIR_NO_POISON: cir.abs %{{.+}} : !s64i
+  // LLVM_NO_POISON: %{{.+}} = call i64 @llvm.abs.i64(i64 %{{.+}}, i1 false)
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/libcall.cpp b/clang/test/CIR/Incubator/CodeGen/libcall.cpp
new file mode 100644
index 0000000000000..8c54ca8d9644d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/libcall.cpp
@@ -0,0 +1,63 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef __builtin_va_list va_list;
+
+static __inline__ __attribute__((__always_inline__)) __attribute__((__format__(printf, 3, 0)))
+int vsnprintf(char* const __attribute__((pass_object_size(1))) dest, int size, const char* format, va_list ap)
+        __attribute__((overloadable)) {
+    return __builtin___vsnprintf_chk(dest, size, 0, __builtin_object_size(((dest)), (1)), format, ap);
+}
+
+typedef long unsigned int size_t;
+
+size_t __strlen_chk(const char* __s, size_t __n) __attribute__((annotate("introduced_in=" "17")));
+size_t strlen(const char* __s) __attribute__((__pure__));
+static __inline__ __attribute__((__always_inline__))
+size_t strlen(const char* const s __attribute__((pass_object_size(0)))) __attribute__((overloadable)) {
+    size_t bos = __builtin_object_size(((s)), (0));
+
+    if (bos == ((size_t) -1)) {
+        return __builtin_strlen(s);
+    }
+
+    return __strlen_chk(s, bos);
+}
+
+void log(int, const char *, int);
+
+void consume_message(const char *m) {
+  log(3, m, strlen(m));
+}
+
+void t(const char* fmt, ...) {
+  va_list args;
+  __builtin_va_start(args, fmt);
+  const int size = 512;
+  char message[size];
+  vsnprintf(message, size, fmt, args);
+  consume_message(message);
+}
+
+// CHECK: cir.func {{.*}} @_Z15consume_messagePKc(%arg0: !cir.ptr<!s8i>
+// CHECK:   %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["m", init] {alignment = 8 : i64}
+
+// CHECK:   %3 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK:   %4 = cir.objsize max %3 : !cir.ptr<!s8i> -> !u64i
+// CHECK:   %5 = cir.call @_ZL6strlenPKcU17pass_object_size0(%3, %4) : (!cir.ptr<!s8i>, !u64i) -> !u64i
+
+// CHECK: cir.func {{.*}} @__vsnprintf_chk
+// CHECK: cir.func {{.*}} @_ZL9vsnprintfPcU17pass_object_size1iPKcP13__va_list_tag
+
+// Implicit size parameter in arg %1
+//
+// FIXME: tag the param with an attribute to designate the size information.
+//
+// CHECK: %1 = cir.alloca !u64i, !cir.ptr<!u64i>, ["", init] {alignment = 8 : i64}
+
+// CHECK: cir.store %arg1, %1 : !u64i, !cir.ptr<!u64i>
+
+// CHECK: %10 = cir.load{{.*}} %1 : !cir.ptr<!u64i>, !u64i
+// CHECK: %11 = cir.load{{.*}} %3 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK: %12 = cir.load{{.*}} %4 : !cir.ptr<!cir.ptr<!rec___va_list_tag>>, !cir.ptr<!rec___va_list_tag>
+// CHECK: %13 = cir.call @__vsnprintf_chk(%6, %8, %9, %10, %11, %12)
diff --git a/clang/test/CIR/Incubator/CodeGen/link-bitcode-file.c b/clang/test/CIR/Incubator/CodeGen/link-bitcode-file.c
new file mode 100644
index 0000000000000..f453c01d69330
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/link-bitcode-file.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -O1 -triple x86_64-unknown-linux-gpu -fclangir -DBITCODE -emit-llvm-bc -o %t.bc %s
+// RUN: %clang_cc1 -O1 -triple x86_64-unknown-linux-gpu -fclangir -DBITCODE2 -emit-llvm-bc -o %t-2.bc %s
+// RUN: %clang_cc1 -O1 -triple x86_64-unknown-linux-gpu -fclangir -mlink-bitcode-file %t.bc \
+// RUN:     -O3 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -O1 -triple x86_64-unknown-linux-gpu -fclangir -emit-llvm -o - \
+// RUN:     -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %s \
+// RUN:     | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+// RUN: not %clang_cc1 -O1 -triple x86_64-unknown-linux-gpu -fclangir -DBITCODE -O3 -emit-llvm -o - \
+// RUN:     -mlink-bitcode-file %t.bc %s 2>&1 | FileCheck -check-prefix=CHECK-BC %s
+// Make sure we deal with failure to load the file.
+// RUN: not %clang_cc1 -O1 -triple x86_64-unknown-linux-gpu -fclangir -mlink-bitcode-file no-such-file.bc \
+// RUN:    -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
+
+
+int f(void);
+
+#ifdef BITCODE
+
+extern int f2(void);
+// CHECK-BC: error: Linking globals named {{.*}}'f': symbol multiply defined
+int f(void) {
+  f2();
+  return 42;
+}
+
+#elif BITCODE2
+int f2(void) { return 43; }
+#else
+
+// CHECK-NO-BC-LABEL: define{{.*}} i32 @g
+// CHECK-NO-BC: ret i32 42
+int g(void) {
+  return f();
+}
+
+// CHECK-NO-BC-LABEL: define{{.*}} i32 @f
+// CHECK-NO-BC2-LABEL: define{{.*}} i32 @f2
+
+#endif
+
+// CHECK-NO-FILE: fatal error: cannot open file 'no-such-file.bc'
diff --git a/clang/test/CIR/Incubator/CodeGen/linkage.c b/clang/test/CIR/Incubator/CodeGen/linkage.c
new file mode 100644
index 0000000000000..2f2c4ce4af831
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/linkage.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t-O0.cir
+// RUN: FileCheck --input-file=%t-O0.cir %s -check-prefixes=CIR,CIR-O0
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -O1 -o %t-O1.cir
+// RUN: FileCheck --input-file=%t-O1.cir %s -check-prefixes=CIR,CIR-O1
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+
+static int bar(int i) {
+  return i;
+}
+
+int foo(void) {
+  return bar(5);
+}
+
+// CIR-O0:   cir.func no_inline optnone internal private {{.*}} @bar
+// CIR-O1:   cir.func internal private {{.*}} @bar
+// CIR:      cir.func {{.*}} @foo
+
+// LLVM: define internal i32 @bar
+// LLVM: define dso_local i32 @foo
+
+static int var = 0;
+// CIR: cir.global "private" internal dso_local @var = #cir.int<0> : !s32i
+int get_var(void) {
+  return var;
+}
+
+// Should generate available_externally linkage when optimizing.
+inline int availableExternallyMethod(void) { return 0; }
+void callAvailableExternallyMethod(void) { availableExternallyMethod(); }
+// CIR-O0-NOT: cir.func available_externally{{.*}} @availableExternallyMethod
+// CIR-O1:     cir.func inline_hint available_externally{{.*}} @availableExternallyMethod
diff --git a/clang/test/CIR/Incubator/CodeGen/literals.c b/clang/test/CIR/Incubator/CodeGen/literals.c
new file mode 100644
index 0000000000000..a2d8148fe19c7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/literals.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+int literals(void) {
+    char a = 'a'; // char literals are int in C
+    // CHECK: %[[RES:[0-9]+]] = cir.const #cir.int<97> : !s32i
+    // CHECK: %{{[0-9]+}} = cir.cast integral %[[RES]] : !s32i -> !s8i
+
+    return 0;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/literals.cpp b/clang/test/CIR/Incubator/CodeGen/literals.cpp
new file mode 100644
index 0000000000000..87290b888185e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/literals.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+int literals() {
+    char a = 'a'; // char literals have char type in C++
+    // CHECK:  %{{[0-9]+}} = cir.const #cir.int<97> : !s8i
+
+    return 0;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/loop-scope.cpp b/clang/test/CIR/Incubator/CodeGen/loop-scope.cpp
new file mode 100644
index 0000000000000..f0516b965b89d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/loop-scope.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cpp.cir
+// RUN: FileCheck --input-file=%t.cpp.cir %s --check-prefix=CPPSCOPE
+// RUN: %clang_cc1 -x c -std=c11 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.c.cir
+// RUN: FileCheck --input-file=%t.c.cir %s --check-prefix=CSCOPE
+
+void l0(void) {
+  for (int i = 0;;) {
+    int j = 0;
+  }
+}
+
+// CPPSCOPE: cir.func {{.*}} @_Z2l0v()
+// CPPSCOPE-NEXT:   cir.scope {
+// CPPSCOPE-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CPPSCOPE-NEXT:     %1 = cir.const #cir.int<0> : !s32i
+// CPPSCOPE-NEXT:     cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+// CPPSCOPE-NEXT:     cir.for : cond {
+
+//      CPPSCOPE:     } body {
+// CPPSCOPE-NEXT:       cir.scope {
+// CPPSCOPE-NEXT:         %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j", init] {alignment = 4 : i64}
+
+// CSCOPE: cir.func {{.*}} @l0()
+// CSCOPE-NEXT: cir.scope {
+// CSCOPE-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CSCOPE-NEXT:   %1 = cir.const #cir.int<0> : !s32i
+// CSCOPE-NEXT:   cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+// CSCOPE-NEXT:   cir.for : cond {
+
+// CSCOPE:        } body {
+// CSCOPE-NEXT:     cir.scope {
+// CSCOPE-NEXT:       %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j", init] {alignment = 4 : i64}
diff --git a/clang/test/CIR/Incubator/CodeGen/loop.cpp b/clang/test/CIR/Incubator/CodeGen/loop.cpp
new file mode 100644
index 0000000000000..b333eb515a6dd
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/loop.cpp
@@ -0,0 +1,283 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void l0() {
+  for (;;) {
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z2l0v
+// CHECK: cir.for : cond {
+// CHECK:   %[[#TRUE:]] = cir.const #true
+// CHECK:   cir.condition(%[[#TRUE]])
+
+void l1() {
+  int x = 0;
+  for (int i = 0; i < 10; i = i + 1) {
+    x = x + 1;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z2l1v
+// CHECK: cir.for : cond {
+// CHECK-NEXT:   %4 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %5 = cir.const #cir.int<10> : !s32i
+// CHECK-NEXT:   %6 = cir.cmp(lt, %4, %5) : !s32i, !cir.bool
+// CHECK-NEXT:   cir.condition(%6)
+// CHECK-NEXT: } body {
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %4 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %5 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %6 = cir.binop(add, %4, %5) nsw : !s32i
+// CHECK-NEXT:     cir.store{{.*}} %6, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT:   %4 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   %5 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   %6 = cir.binop(add, %4, %5) nsw : !s32i
+// CHECK-NEXT:   cir.store{{.*}} %6, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: }
+
+void l2(bool cond) {
+  int i = 0;
+  while (cond) {
+    i = i + 1;
+  }
+  while (true) {
+    i = i + 1;
+  }
+  while (1) {
+    i = i + 1;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z2l2b
+// CHECK:         cir.scope {
+// CHECK-NEXT:     cir.while {
+// CHECK-NEXT:       %3 = cir.load{{.*}} %0 : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT:       cir.condition(%3)
+// CHECK-NEXT:     } do {
+// CHECK-NEXT:       cir.scope {
+// CHECK-NEXT:         %3 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:         %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:         %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:         cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.while {
+// CHECK-NEXT:       %[[#TRUE:]] = cir.const #true
+// CHECK-NEXT:       cir.condition(%[[#TRUE]])
+// CHECK-NEXT:     } do {
+// CHECK-NEXT:       cir.scope {
+// CHECK-NEXT:         %3 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:         %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:         %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:         cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.while {
+// CHECK-NEXT:       %3 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %4 = cir.cast int_to_bool %3 : !s32i -> !cir.bool
+// CHECK-NEXT:       cir.condition(%4)
+// CHECK-NEXT:     } do {
+// CHECK-NEXT:       cir.scope {
+// CHECK-NEXT:         %3 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:         %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:         %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:         cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+
+void l3(bool cond) {
+  int i = 0;
+  do {
+    i = i + 1;
+  } while (cond);
+  do {
+    i = i + 1;
+  } while (true);
+  do {
+    i = i + 1;
+  } while (1);
+}
+
+// CHECK: cir.func {{.*}} @_Z2l3b
+// CHECK: cir.scope {
+// CHECK-NEXT:   cir.do {
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %3 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:       cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   } while {
+// CHECK-NEXT:     %[[#TRUE:]] = cir.load{{.*}} %0 : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK-NEXT:     cir.condition(%[[#TRUE]])
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   cir.do {
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %3 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:       cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   } while {
+// CHECK-NEXT:     %[[#TRUE:]] = cir.const #true
+// CHECK-NEXT:     cir.condition(%[[#TRUE]])
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT:   cir.do {
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %3 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %4 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %5 = cir.binop(add, %3, %4) nsw : !s32i
+// CHECK-NEXT:       cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   } while {
+// CHECK-NEXT:     %3 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %4 = cir.cast int_to_bool %3 : !s32i -> !cir.bool
+// CHECK-NEXT:     cir.condition(%4)
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+
+void l4() {
+  int i = 0, y = 100;
+  while (true) {
+    i = i + 1;
+    if (i < 10)
+      continue;
+    y = y - 20;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z2l4v
+// CHECK: cir.while {
+// CHECK-NEXT:   %[[#TRUE:]] = cir.const #true
+// CHECK-NEXT:   cir.condition(%[[#TRUE]])
+// CHECK-NEXT: } do {
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %4 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %5 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %6 = cir.binop(add, %4, %5) nsw  : !s32i
+// CHECK-NEXT:     cir.store{{.*}} %6, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %10 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %11 = cir.const #cir.int<10> : !s32i
+// CHECK-NEXT:       %12 = cir.cmp(lt, %10, %11) : !s32i, !cir.bool
+// CHECK-NEXT:       cir.if %12 {
+// CHECK-NEXT:         cir.continue
+// CHECK-NEXT:       }
+// CHECK-NEXT:     }
+
+void l5() {
+  do {
+  } while (0);
+}
+
+// CHECK: cir.func {{.*}} @_Z2l5v()
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.do {
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     } while {
+// CHECK-NEXT:       %0 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:       %1 = cir.cast int_to_bool %0 : !s32i -> !cir.bool
+// CHECK-NEXT:       cir.condition(%1)
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+void l6() {
+  while (true) {
+    return;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z2l6v()
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.while {
+// CHECK-NEXT:       %[[#TRUE:]] = cir.const #true
+// CHECK-NEXT:       cir.condition(%[[#TRUE]])
+// CHECK-NEXT:     } do {
+// CHECK-NEXT:       cir.scope {
+// CHECK-NEXT:         cir.return
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+void unreachable_after_break() {
+  for (;;) {
+    break;
+    int x = 1;
+  }
+}
+
+// CHECK-NEXT: cir.func {{.*}} @_Z23unreachable_after_breakv()
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.for : cond {
+// CHECK-NEXT:       %0 = cir.const #true
+// CHECK-NEXT:       cir.condition(%0)
+// CHECK-NEXT:     } body {
+// CHECK-NEXT:       cir.scope {
+// CHECK-NEXT:         %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK-NEXT:         cir.break
+// CHECK-NEXT:       ^bb1:  // no predecessors
+// CHECK-NEXT:         %1 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:         cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:         cir.yield
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     } step {
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+void unreachable_after_continue() {
+  for (;;) {
+    continue;
+    int x = 1;
+  }
+}
+
+// CHECK-NEXT: cir.func {{.*}} @_Z26unreachable_after_continuev()
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     cir.for : cond {
+// CHECK-NEXT:       %0 = cir.const #true
+// CHECK-NEXT:       cir.condition(%0)
+// CHECK-NEXT:     } body {
+// CHECK-NEXT:       cir.scope {
+// CHECK-NEXT:         %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK-NEXT:         cir.continue
+// CHECK-NEXT:       ^bb1:  // no predecessors
+// CHECK-NEXT:         %1 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:         cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:         cir.yield
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     } step {
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/CodeGen/lvalue-refs.cpp b/clang/test/CIR/Incubator/CodeGen/lvalue-refs.cpp
new file mode 100644
index 0000000000000..7225a09fa2193
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/lvalue-refs.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+struct String {
+  long size;
+};
+
+void split(String &S) {}
+
+// CHECK: cir.func {{.*}} @_Z5splitR6String(%arg0: !cir.ptr<!rec_String>
+// CHECK:     %0 = cir.alloca !cir.ptr<!rec_String>, !cir.ptr<!cir.ptr<!rec_String>>, ["S", init, const]
+
+void foo() {
+  String s;
+  split(s);
+}
+
+// CHECK: cir.func {{.*}} @_Z3foov()
+// CHECK:     %0 = cir.alloca !rec_String, !cir.ptr<!rec_String>, ["s"]
+// CHECK:     cir.call @_Z5splitR6String(%0) : (!cir.ptr<!rec_String>) -> ()
diff --git a/clang/test/CIR/Incubator/CodeGen/materialize-temporary.cpp b/clang/test/CIR/Incubator/CodeGen/materialize-temporary.cpp
new file mode 100644
index 0000000000000..5aac04c32a7e8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/materialize-temporary.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test MaterializeTemporaryExpr when binding const reference to rvalue
+int get_value() { return 42; }
+
+void test_const_ref_binding() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_const_ref_bindingv
+  const int &x = 5;
+  // CHECK: %{{.*}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init]
+  // CHECK: %{{.*}} = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["x", init, const]
+  // CHECK: cir.scope {
+  // CHECK: %{{.*}} = cir.const #cir.int<5> : !s32i
+  // CHECK: cir.store {{.*}} %{{.*}}, %{{.*}} : !s32i, !cir.ptr<!s32i>
+  // CHECK: }
+}
+
+void test_const_ref_expr() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_const_ref_exprv
+  const int &y = get_value();
+  // CHECK: %{{.*}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init]
+  // CHECK: %{{.*}} = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["y", init, const]
+  // CHECK: cir.scope {
+  // CHECK: %{{.*}} = cir.call @{{.*}}get_valuev()
+  // CHECK: }
+}
+
+void test_const_ref_arithmetic() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_const_ref_arithmeticv
+  int a = 10;
+  const int &z = a + 5;
+  // CHECK: %{{.*}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init]
+  // CHECK: %{{.*}} = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["z", init, const]
+  // CHECK: cir.scope {
+  // CHECK: %{{.*}} = cir.load {{.*}} %{{.*}}
+  // CHECK: %{{.*}} = cir.const #cir.int<5> : !s32i
+  // CHECK: %{{.*}} = cir.binop(add, %{{.*}}, %{{.*}})
+  // CHECK: }
+}
+
+struct S {
+  int val;
+  S(int v) : val(v) {}
+};
+
+S make_s() { return S(100); }
+
+void test_const_ref_struct() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_const_ref_structv
+  const S &s = make_s();
+  // Temporary S object should be materialized
+  // CHECK: %{{.*}} = cir.alloca {{.*}}, !cir.ptr<{{.*}}rec_S{{.*}}>, ["ref.tmp0"]
+  // CHECK: %{{.*}} = cir.alloca !cir.ptr<{{.*}}>, !cir.ptr<!cir.ptr<{{.*}}>>, ["s", init, const]
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/member-init-struct.cpp b/clang/test/CIR/Incubator/CodeGen/member-init-struct.cpp
new file mode 100644
index 0000000000000..54a4d0be95a1f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/member-init-struct.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+struct A {int a;};
+struct B {float a;};
+struct C {
+  union {
+    A a;
+    B b[10];
+  };
+  int c;
+  int d[10];
+  void (C::*e)();
+  C() : a(), c(), d(), e() {}
+  C(A x) : a(x) {}
+  C(void (C::*x)(), int y) : b(), c(y), e(x) {}
+};
+
+// CHECK-LABEL:   cir.global external @x = #cir.zero : !rec_A
+A x;
+C a, b(x), c(0, 2);
+
+// CHECK-LABEL: @_ZN1CC2Ev
+// CHECK:   %[[VAL_1:.*]] = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   cir.store{{.*}} %{{.*}}, %[[VAL_1]] : !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>
+// CHECK:   %[[VAL_2:.*]] = cir.load %[[VAL_1]] : !cir.ptr<!cir.ptr<!rec_C>>, !cir.ptr<!rec_C>
+// CHECK:   %[[VAL_3:.*]] = cir.get_member %[[VAL_2]][0] {name = ""} : !cir.ptr<!rec_C> -> !cir.ptr<!rec_anon2E0>
+// CHECK:   %[[VAL_4:.*]] = cir.get_member %[[VAL_3]][0] {name = "a"} : !cir.ptr<!rec_anon2E0> -> !cir.ptr<!rec_A>
+// CHECK:   %[[VAL_5:.*]] = cir.const {{.*}} : !rec_A
+// CHECK:   cir.store{{.*}} %[[VAL_5]], %[[VAL_4]] : !rec_A, !cir.ptr<!rec_A>
+// Trivial default constructor call is lowered away.
+// CHECK:   %[[VAL_6:.*]] = cir.get_member %[[VAL_2]][1] {name = "c"} : !cir.ptr<!rec_C> -> !cir.ptr<!s32i>
+// CHECK:   %[[VAL_7:.*]] = cir.const {{.*}}<0> : !s32i
+// CHECK:   cir.store{{.*}} %[[VAL_7]], %[[VAL_6]] : !s32i, !cir.ptr<!s32i>
+// CHECK:   %[[VAL_8:.*]] = cir.get_member %[[VAL_2]][2] {name = "d"} : !cir.ptr<!rec_C> -> !cir.ptr<!cir.array<!s32i x 10>>
+// CHECK:   %[[VAL_9:.*]] = cir.const {{.*}} : !cir.array<!s32i x 10>
+// CHECK:   cir.store{{.*}} %[[VAL_9]], %[[VAL_8]] : !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>
+// CHECK:   %[[VAL_10:.*]] = cir.get_member %[[VAL_2]][4] {name = "e"} : !cir.ptr<!rec_C> -> !cir.ptr<!cir.method<!cir.func<()> in !rec_C>>
+// CHECK:   %[[VAL_11:.*]] = cir.const #cir.method<null> : !cir.method<!cir.func<()> in !rec_C>
+// CHECK:   cir.store{{.*}} %[[VAL_11]], %[[VAL_10]] : !cir.method<!cir.func<()> in !rec_C>, !cir.ptr<!cir.method<!cir.func<()> in !rec_C>>
+// CHECK:   cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/mms-bitfields.c b/clang/test/CIR/Incubator/CodeGen/mms-bitfields.c
new file mode 100644
index 0000000000000..bf0f8f87666e0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/mms-bitfields.c
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mms-bitfields -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mms-bitfields -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mms-bitfields -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+struct s1 {
+  int       f32 : 2;
+  long long f64 : 30;
+} s1;
+
+// CIR-DAG: !rec_s1 = !cir.record<struct "s1" {!s32i, !s64i} #cir.record.decl.ast>
+// LLVM-DAG: %struct.s1 = type { i32, i64 }
+// OGCG-DAG: %struct.s1 = type { i32, i64 }
+
+struct s2 {
+    int  a : 24;
+    char b;
+    int  c : 30;
+} Clip;
+
+// CIR-DAG: !rec_s2 = !cir.record<struct "s2" {!s32i, !s8i, !s32i} #cir.record.decl.ast>
+// LLVM-DAG: %struct.s2 = type { i32, i8, i32 }
+// OGCG-DAG: %struct.s2 = type { i32, i8, i32 }
+
+#pragma pack (push,1)
+
+struct Inner {
+  unsigned int    A    :  1;
+  unsigned int    B    :  1;
+  unsigned int    C    :  1;
+  unsigned int    D    : 30;
+} Inner;
+
+#pragma pack (pop)
+
+// CIR-DAG: !rec_Inner = !cir.record<struct "Inner" {!u32i, !u32i} #cir.record.decl.ast>
+// LLVM-DAG: %struct.Inner = type { i32, i32 }
+// OGCG-DAG: %struct.Inner = type { i32, i32 }
+
+#pragma pack(push, 1)
+
+union HEADER {
+  struct A {
+    int                                         :  3;  // Bits 2:0
+    int a                                       :  9;  // Bits 11:3
+    int                                         :  12;  // Bits 23:12
+    int b                                       :  17;  // Bits 40:24
+    int                                         :  7;  // Bits 47:41
+    int c                                       :  4;  // Bits 51:48
+    int                                         :  4;  // Bits 55:52
+    int d                                       :  3;  // Bits 58:56
+    int                                         :  5;  // Bits 63:59
+  } Bits;
+} HEADER;
+
+#pragma pack(pop)
+
+// CIR-DAG: !rec_A = !cir.record<struct "A" {!s32i, !s32i, !s32i} #cir.record.decl.ast>
+// CIR-DAG: !rec_HEADER = !cir.record<union "HEADER" {!rec_A} #cir.record.decl.ast>
+// LLVM-DAG: %struct.A = type { i32, i32, i32 }
+// LLVM-DAG: %union.HEADER = type { %struct.A }
+// OGCG-DAG: %struct.A = type { i32, i32, i32 }
+// OGCG-DAG: %union.HEADER = type { %struct.A }
+
diff --git a/clang/test/CIR/Incubator/CodeGen/module-asm.c b/clang/test/CIR/Incubator/CodeGen/module-asm.c
new file mode 100644
index 0000000000000..e6cec5e0ee948
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/module-asm.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o %t.cir 
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK:  cir.module_asm = [".globl bar", ".globl foo"]
+__asm (".globl bar");
+__asm (".globl foo");
diff --git a/clang/test/CIR/Incubator/CodeGen/move.cpp b/clang/test/CIR/Incubator/CodeGen/move.cpp
new file mode 100644
index 0000000000000..074a3fa54f006
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/move.cpp
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+namespace std {
+
+template<typename T> struct remove_reference       { typedef T type; };
+template<typename T> struct remove_reference<T &>  { typedef T type; };
+template<typename T> struct remove_reference<T &&> { typedef T type; };
+
+template<typename T>
+typename remove_reference<T>::type &&move(T &&t) noexcept;
+
+struct string {
+  string();
+};
+
+} // std namespace
+
+// CHECK: ![[StdString:rec_.*]] = !cir.record<struct "std::string" padded {!u8i}>
+
+std::string getstr();
+void emplace(std::string &&s);
+
+void t() {
+  emplace(std::move(getstr()));
+}
+
+// FIXME: we should explicitly model std::move here since it will
+// be useful at least for the lifetime checker.
+
+// CHECK: cir.func {{.*}} @_Z1tv()
+// CHECK:   %[[#Addr:]] = cir.alloca ![[StdString]], {{.*}} ["ref.tmp0"]
+// CHECK:   %[[#RValStr:]] = cir.call @_Z6getstrv() : () -> ![[StdString]]
+// CHECK:   cir.store{{.*}} %[[#RValStr]], %[[#Addr]]
+// CHECK:   cir.call @_Z7emplaceOSt6string(%[[#Addr]])
+// CHECK:   cir.return
+// CHECK: }
+
+struct S {
+  S() = default;
+  S(S&&) = default;
+
+  int val;
+};
+
+// CHECK-LABEL:   cir.func {{.*}} @_ZN1SC1EOS_
+// CHECK-SAME:      special_member<#cir.cxx_ctor<!rec_S, move>>
+
+void test_ctor() {
+// CHECK-LABEL:   cir.func {{.*}} @_Z9test_ctorv()
+// CHECK:           %[[VAR_A:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>
+// CHECK:           %[[VAR_B:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>
+// CHECK:           cir.call @_ZN1SC1EOS_(%[[VAR_B]], %[[VAR_A]]) : (!cir.ptr<!rec_S>, !cir.ptr<!rec_S>) -> ()
+// CHECK:           cir.return
+// CHECK:         }
+
+  S a;
+  S b(std::move(a));
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/ms-intrinsics-other.c b/clang/test/CIR/Incubator/CodeGen/ms-intrinsics-other.c
new file mode 100644
index 0000000000000..d5c9164c6a4ed
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/ms-intrinsics-other.c
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -ffreestanding -fms-extensions -Wno-implicit-function-declaration -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --check-prefix=CIR --input-file=%t.cir
+// RUN: %clang_cc1 -ffreestanding -fms-extensions -Wno-implicit-function-declaration -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck %s --check-prefix=LLVM --input-file=%t.ll
+
+// This test mimics clang/test/CodeGen/ms-intrinsics-other.c, which eventually
+// CIR shall be able to support fully.
+
+unsigned short test__lzcnt16(unsigned short x) {
+  return __lzcnt16(x);
+}
+// CIR-LABEL: test__lzcnt16
+// CIR: {{%.*}} = cir.clz {{%.*}} : !u16i
+// LLVM-LABEL: test__lzcnt16
+// LLVM: {{%.*}} = call i16 @llvm.ctlz.i16(i16 {{%.*}}, i1 false)
+
+unsigned int test__lzcnt(unsigned int x) {
+  return __lzcnt(x);
+}
+// CIR-LABEL: test__lzcnt
+// CIR: {{%.*}} = cir.clz {{%.*}} : !u32i
+// LLVM-LABEL: test__lzcnt
+// LLVM:  {{%.*}} = call i32 @llvm.ctlz.i32(i32 {{%.*}}, i1 false)
+
+unsigned __int64 test__lzcnt64(unsigned __int64 x) {
+  return __lzcnt64(x);
+}
+// CIR-LABEL: test__lzcnt64
+// CIR: {{%.*}} = cir.clz {{%.*}} : !u64i
+// LLVM-LABEL: test__lzcnt64
+// LLVM: {{%.*}} = call i64 @llvm.ctlz.i64(i64 {{%.*}}, i1 false)
+
+unsigned short test__popcnt16(unsigned short x) {
+  return __popcnt16(x);
+}
+// CIR-LABEL: test__popcnt16
+// CIR: {{%.*}} = cir.popcount {{%.*}} : !u16i
+// LLVM-LABEL: test__popcnt16
+// LLVM: {{%.*}} = call i16 @llvm.ctpop.i16(i16 {{%.*}})
+
+unsigned int test__popcnt(unsigned int x) {
+  return __popcnt(x);
+}
+// CIR-LABEL: test__popcnt
+// CIR: {{%.*}} = cir.popcount {{%.*}} : !u32i
+// LLVM-LABEL: test__popcnt
+// LLVM: {{%.*}} = call i32 @llvm.ctpop.i32(i32 {{%.*}})
+
+unsigned __int64 test__popcnt64(unsigned __int64 x) {
+  return __popcnt64(x);
+}
+// CIR-LABEL: test__popcnt64
+// CIR: {{%.*}} = cir.popcount {{%.*}} : !u64i
+// LLVM-LABEL: test__popcnt64
+// LLVM: {{%.*}} = call i64 @llvm.ctpop.i64(i64 {{%.*}})
diff --git a/clang/test/CIR/Incubator/CodeGen/multi-vtable.cpp b/clang/test/CIR/Incubator/CodeGen/multi-vtable.cpp
new file mode 100644
index 0000000000000..c55ed096be83c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/multi-vtable.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+class Mother {
+public:
+ virtual void MotherFoo() {}
+ void simple() { }
+ virtual void MotherFoo2() {}
+};
+
+class Father {
+public:
+ virtual void FatherFoo() {}
+};
+
+class Child : public Mother, public Father {
+public:
+ void MotherFoo() override {}
+};
+
+int main() {
+    Mother *b = new Mother();
+    b->MotherFoo();
+    b->simple();
+    Child *c = new Child();
+    c->MotherFoo();
+    return 0;
+}
+
+// CIR-DAG: ![[VTypeInfoA:rec_.*]] = !cir.record<struct  {!cir.ptr<!u8i>, !cir.ptr<!u8i>}>
+// CIR-DAG: ![[VTypeInfoB:rec_.*]] = !cir.record<struct  {!cir.ptr<!u8i>, !cir.ptr<!u8i>, !u32i, !u32i, !cir.ptr<!u8i>, !s64i, !cir.ptr<!u8i>, !s64i}>
+// CIR-DAG: ![[VPtrTypeMother:rec_.*]] = !cir.record<struct  {!cir.array<!cir.ptr<!u8i> x 4>}>
+// CIR-DAG: ![[VPtrTypeFather:rec_.*]] = !cir.record<struct  {!cir.array<!cir.ptr<!u8i> x 3>}>
+// CIR-DAG: ![[VPtrTypeChild:rec_.*]] = !cir.record<struct  {!cir.array<!cir.ptr<!u8i> x 4>, !cir.array<!cir.ptr<!u8i> x 3>}>
+// CIR-DAG: !rec_Father = !cir.record<class "Father" {!cir.vptr} #cir.record.decl.ast>
+// CIR-DAG: !rec_Mother = !cir.record<class "Mother" {!cir.vptr} #cir.record.decl.ast>
+// CIR-DAG: !rec_Child = !cir.record<class "Child" {!rec_Mother, !rec_Father} #cir.record.decl.ast>
+
+// CIR: cir.func {{.*}} @_ZN6MotherC2Ev(%arg0: !cir.ptr<!rec_Mother>
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV6Mother, address_point = <index = 0, offset = 2>) : !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{[0-9]+}} : !cir.ptr<!rec_Mother> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %2, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+// CIR:   cir.return
+// CIR: }
+
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// LLVM-DAG: define linkonce_odr void @_ZN6MotherC2Ev(ptr %0)
+// LLVM-DAG:   store ptr getelementptr inbounds nuw (i8, ptr @_ZTV6Mother, i64 16), ptr %{{[0-9]+}}, align 8
+// LLVM-DAG:   ret void
+// LLVM-DAG: }
+
+// CIR: cir.func {{.*}} @_ZN5ChildC2Ev(%arg0: !cir.ptr<!rec_Child>
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV5Child, address_point = <index = 0, offset = 2>) : !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %1 : !cir.ptr<!rec_Child> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV5Child, address_point = <index = 1, offset = 2>) : !cir.vptr
+// CIR:   %7 = cir.base_class_addr %1 : !cir.ptr<!rec_Child> nonnull [8] -> !cir.ptr<!rec_Father>
+// CIR:   %8 = cir.vtable.get_vptr %7 : !cir.ptr<!rec_Father> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+// CIR:   cir.return
+// CIR: }
+
+// LLVM-DAG: $_ZTS6Mother = comdat any
+// LLVM-DAG: $_ZTS5Child = comdat any
+// LLVM-DAG: $_ZTS6Father = comdat any
+
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// LLVM-DAG: define linkonce_odr void @_ZN5ChildC2Ev(ptr %0)
+// LLVM-DAG:  store ptr getelementptr inbounds nuw (i8, ptr @_ZTV5Child, i64 16), ptr %{{[0-9]+}}, align 8
+// LLVM-DAG:  %{{[0-9]+}} = getelementptr i8, ptr {{.*}}, i32 8
+// LLVM-DAG:  store ptr getelementptr inbounds nuw (i8, ptr @_ZTV5Child, i64 48), ptr %{{[0-9]+}}, align 8
+// LLVM-DAG:  ret void
+// }
+
+// CIR: cir.func {{.*}} @main() -> !s32i extra(#fn_attr) {
+
+// CIR:   %{{[0-9]+}} = cir.vtable.get_virtual_fn_addr %{{[0-9]+}}[0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Mother>)>>>
+
+// CIR:   %{{[0-9]+}} = cir.vtable.get_virtual_fn_addr %{{[0-9]+}}[0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Child>)>>>
+
+// CIR: }
+
+//   vtable for Mother
+// CIR: cir.global constant linkonce_odr @_ZTV6Mother = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI6Mother> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother9MotherFooEv> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother10MotherFoo2Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>}> : ![[VPtrTypeMother]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTV6Mother = linkonce_odr constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr @_ZTI6Mother, ptr @_ZN6Mother9MotherFooEv, ptr @_ZN6Mother10MotherFoo2Ev] }
+
+//   vtable for __cxxabiv1::__class_type_info
+// CIR: cir.global "private" external @_ZTVN10__cxxabiv117__class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+// LLVM-DAG: @_ZTVN10__cxxabiv117__class_type_infoE = external global ptr
+
+//   typeinfo name for Mother
+// CIR: cir.global constant linkonce_odr comdat @_ZTS6Mother = #cir.const_array<"6Mother" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7> {alignment = 1 : i64}
+// LLVM-DAG: @_ZTS6Mother = linkonce_odr constant [7 x i8] c"6Mother", comdat
+
+//   typeinfo for Mother
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// CIR: cir.global constant external @_ZTI6Mother = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS6Mother> : !cir.ptr<!u8i>}> : ![[VTypeInfoA]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTI6Mother = constant { ptr, ptr } { ptr getelementptr inbounds nuw (i8, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 16), ptr @_ZTS6Mother }
+
+//   vtable for Father
+// CIR: cir.global constant linkonce_odr @_ZTV6Father = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI6Father> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Father9FatherFooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : ![[VPtrTypeFather]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTV6Father = linkonce_odr constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr @_ZTI6Father, ptr @_ZN6Father9FatherFooEv] }
+
+//   vtable for Child
+// CIR: cir.global constant linkonce_odr @_ZTV5Child = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI5Child> : !cir.ptr<!u8i>, #cir.global_view<@_ZN5Child9MotherFooEv> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother10MotherFoo2Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>, #cir.const_array<[#cir.ptr<-8 : i64> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI5Child> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Father9FatherFooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : ![[VPtrTypeChild]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTV5Child = linkonce_odr constant { [4 x ptr], [3 x ptr] } { [4 x ptr] [ptr null, ptr @_ZTI5Child, ptr @_ZN5Child9MotherFooEv, ptr @_ZN6Mother10MotherFoo2Ev], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr @_ZTI5Child, ptr @_ZN6Father9FatherFooEv] }
+
+//   vtable for __cxxabiv1::__vmi_class_type_info
+// CIR: cir.global "private" external @_ZTVN10__cxxabiv121__vmi_class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+// LLVM-DAG: @_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global ptr
+
+//   typeinfo name for Child
+// CIR: cir.global constant linkonce_odr comdat @_ZTS5Child = #cir.const_array<"5Child" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6> {alignment = 1 : i64}
+// LLVM-DAG: @_ZTS5Child = linkonce_odr constant [6 x i8] c"5Child", comdat
+
+//   typeinfo name for Father
+// CIR: cir.global constant linkonce_odr comdat @_ZTS6Father = #cir.const_array<"6Father" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7> {alignment = 1 : i64}
+// LLVM-DAG: @_ZTS6Father = linkonce_odr constant [7 x i8] c"6Father", comdat
+
+//   typeinfo for Father
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// CIR: cir.global constant external @_ZTI6Father = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS6Father> : !cir.ptr<!u8i>}> : !rec_anon_struct {alignment = 8 : i64}
+// LLVM-DAG: @_ZTI6Father = constant { ptr, ptr } { ptr getelementptr inbounds nuw (i8, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 16), ptr @_ZTS6Father }
+
+//   typeinfo for Child
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// CIR: cir.global constant external @_ZTI5Child = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv121__vmi_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS5Child> : !cir.ptr<!u8i>, #cir.int<0> : !u32i, #cir.int<2> : !u32i, #cir.global_view<@_ZTI6Mother> : !cir.ptr<!u8i>, #cir.int<2> : !s64i, #cir.global_view<@_ZTI6Father> : !cir.ptr<!u8i>, #cir.int<2050> : !s64i}> : ![[VTypeInfoB]] {alignment = 8 : i64}
+// LLVM-DAG: @_ZTI5Child = constant { ptr, ptr, i32, i32, ptr, i64, ptr, i64 } { ptr getelementptr inbounds nuw (i8, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 16), ptr @_ZTS5Child, i32 0, i32 2, ptr @_ZTI6Mother, i64 2, ptr @_ZTI6Father, i64 2050 }
diff --git a/clang/test/CIR/Incubator/CodeGen/new-null.cpp b/clang/test/CIR/Incubator/CodeGen/new-null.cpp
new file mode 100644
index 0000000000000..285d56a288600
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/new-null.cpp
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-cir -o %t.cir
+// RUN: FileCheck --input-file=%t.cir -check-prefix=CIR %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --input-file=%t.ll -check-prefix=LLVM %s
+
+// TODO: This file is inspired by clang/test/CodeGenCXX/new.cpp, add all tests from it.
+
+typedef __typeof__(sizeof(0)) size_t;
+
+// Declare an 'operator new' template to tickle a bug in __builtin_operator_new.
+template<typename T> void *operator new(size_t, int (*)(T));
+
+// Ensure that this declaration doesn't cause operator new to lose its
+// 'noalias' attribute.
+void *operator new[](size_t);
+
+namespace std {
+  struct nothrow_t {};
+  enum class align_val_t : size_t { __zero = 0,
+                                  __max = (size_t)-1 };
+}
+std::nothrow_t nothrow;
+
+// Declare the reserved placement operators.
+void *operator new(size_t, void*) throw();
+void operator delete(void*, void*) throw();
+void *operator new[](size_t, void*) throw();
+void operator delete[](void*, void*) throw();
+
+// Declare the replaceable global allocation operators.
+void *operator new(size_t, const std::nothrow_t &) throw();
+void *operator new[](size_t, const std::nothrow_t &) throw();
+void operator delete(void *, const std::nothrow_t &) throw();
+void operator delete[](void *, const std::nothrow_t &) throw();
+
+// Declare some other placemenet operators.
+void *operator new(size_t, void*, bool) throw();
+void *operator new[](size_t, void*, bool) throw();
+
+namespace test15 {
+  struct A { A(); ~A(); };
+  // CIR-DAG:   ![[TEST15A:.*]] = !cir.record<struct "test15::A" padded {!u8i}
+
+  void test0a(void *p) {
+    new (p) A();
+  }
+
+  // CIR-LABEL:   cir.func {{.*}} @_ZN6test156test0bEPv(
+  // CIR-SAME:                                   %[[VAL_0:.*]]: !cir.ptr<!void>
+  // CIR:           %[[VAL_1:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["p", init] {alignment = 8 : i64}
+  // CIR:           cir.store %[[VAL_0]], %[[VAL_1]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // CIR:           %[[VAL_2:.*]] = cir.const #cir.int<1> : !u64i
+  // CIR:           %[[VAL_3:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+  // CIR:           %[[VAL_4:.*]] = cir.const #true
+  // CIR:           %[[VAL_5:.*]] = cir.call @_ZnwmPvb(%[[VAL_2]], %[[VAL_3]], %[[VAL_4]])
+  // CIR:           %[[VAL_6:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+  // CIR:           %[[VAL_7:.*]] = cir.cmp(ne, %[[VAL_5]], %[[VAL_6]]) : !cir.ptr<!void>, !cir.bool
+  // CIR:           %[[VAL_8:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.ptr<!void> -> !cir.ptr<![[TEST15A]]>
+  // CIR:           cir.if %[[VAL_7]] {
+  // CIR:             cir.call @_ZN6test151AC1Ev(%[[VAL_8]]) : (!cir.ptr<![[TEST15A]]>) -> ()
+  // CIR:           }
+  // CIR:           cir.return
+  // CIR:         }
+
+  // LLVM-LABEL: _ZN6test156test0bEPv
+  // LLVM:         %[[VAL_0:.*]] = alloca ptr, i64 1, align 8
+  // LLVM:         store ptr %[[VAL_1:.*]], ptr %[[VAL_0]], align 8
+  // LLVM:         %[[VAL_2:.*]] = load ptr, ptr %[[VAL_0]], align 8
+  // LLVM:         %[[VAL_3:.*]] = call ptr @_ZnwmPvb(i64 1, ptr %[[VAL_2]], i1 true)
+  // LLVM:         %[[VAL_4:.*]] = icmp ne ptr %[[VAL_3]], null
+  // LLVM:         br i1 %[[VAL_4]], label %[[VAL_5:.*]], label %[[VAL_6:.*]]
+  // LLVM:       [[VAL_5]]:                                                ; preds = %[[VAL_7:.*]]
+  // LLVM:         call void @_ZN6test151AC1Ev(ptr %[[VAL_3]])
+  // LLVM:         br label %[[VAL_6]]
+  // LLVM:       [[VAL_6]]:                                                ; preds = %[[VAL_5]], %[[VAL_7]]
+  // LLVM:         ret void
+
+  void test0b(void *p) {
+    new (p, true) A();
+  }
+}
+
+extern "C" void test_basic() {
+  __builtin_operator_delete(__builtin_operator_new(4));
+  // CIR-LABEL: cir.func {{.*}} @test_basic
+  // CIR: [[P:%.*]] = cir.call @_Znwm({{%.*}}) : (!u64i) -> !cir.ptr<!void>
+  // CIR: cir.call @_ZdlPv([[P]]) : (!cir.ptr<!void>) -> ()
+  // CIR: cir.return
+
+  // LLVM-LABEL: define{{.*}} void @test_basic()
+  // LLVM: [[P:%.*]] = call ptr @_Znwm(i64 4)
+  // LLVM: call void @_ZdlPv(ptr [[P]])
+  // LLVM: ret void
+}
+
+extern "C" void test_aligned_alloc() {
+  __builtin_operator_delete(__builtin_operator_new(4, std::align_val_t(4)), std::align_val_t(4));
+
+  // CIR-LABEL: cir.func {{.*}} @test_aligned_alloc
+  // CIR: [[P:%.*]] = cir.call @_ZnwmSt11align_val_t({{%.*}}, {{%.*}}) : (!u64i, !u64i) -> !cir.ptr<!void>
+  // CIR: cir.call @_ZdlPvSt11align_val_t([[P]], {{%.*}}) : (!cir.ptr<!void>, !u64i) -> ()
+  // CIR: cir.return
+
+  // LLVM-LABEL: define{{.*}} void @test_aligned_alloc()
+  // LLVM: [[P:%.*]] = call ptr @_ZnwmSt11align_val_t(i64 4, i64 4)
+  // LLVM: call void @_ZdlPvSt11align_val_t(ptr [[P]], i64 4)
+  // LLVM: ret void
+}
+
+extern "C" void test_sized_delete() {
+  __builtin_operator_delete(__builtin_operator_new(4), 4);
+
+  // CIR-LABEL: cir.func {{.*}} @test_sized_delete
+  // CIR: [[P:%.*]] = cir.call @_Znwm({{%.*}}) : (!u64i) -> !cir.ptr<!void>
+  // CIR: cir.call @_ZdlPvm([[P]], {{%.*}}) : (!cir.ptr<!void>, !u64i) -> ()
+  // CIR: cir.return
+
+  // LLVM-LABEL: define{{.*}} void @test_sized_delete()
+  // LLVM: [[P:%.*]] = call ptr @_Znwm(i64 4)
+  // LLVM: call void @_ZdlPvm(ptr [[P]], i64 4)
+  // LLVM: ret void
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/new.cpp b/clang/test/CIR/Incubator/CodeGen/new.cpp
new file mode 100644
index 0000000000000..6cdb1e4f43a6d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/new.cpp
@@ -0,0 +1,370 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+#include "std-cxx.h"
+
+struct S {
+  S(int, int);
+};
+
+void m(int a, int b) {
+  std::shared_ptr<S> l = std::make_shared<S>(a, b);
+}
+
+// CHECK: cir.func {{.*}} @_ZSt11make_sharedI1SJRiS1_EESt10shared_ptrIT_EDpOT0_(
+// CHECK:   %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["args", init, const] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["args", init, const] {alignment = 8 : i64}
+// CHECK:   %2 = cir.alloca !rec_std3A3Ashared_ptr3CS3E, !cir.ptr<!rec_std3A3Ashared_ptr3CS3E>, ["__retval"] {alignment = 1 : i64}
+// CHECK:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:   cir.store{{.*}} %arg1, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:   cir.scope {
+// CHECK:     %4 = cir.const #cir.int<1> : !u64i
+// CHECK:     %5 = cir.call @_Znwm(%4) : (!u64i) -> !cir.ptr<!void>
+// CHECK:     %6 = cir.cast bitcast %5 : !cir.ptr<!void> -> !cir.ptr<!rec_S>
+// CHECK:     %7 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:     %8 = cir.load{{.*}} %7 : !cir.ptr<!s32i>, !s32i
+// CHECK:     %9 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:     %10 = cir.load{{.*}} %9 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.call @_ZN1SC1Eii(%6, %8, %10) : (!cir.ptr<!rec_S>, !s32i, !s32i) -> ()
+// CHECK:     cir.call @_ZNSt10shared_ptrI1SEC1EPS0_(%2, %6) : (!cir.ptr<!rec_std3A3Ashared_ptr3CS3E>, !cir.ptr<!rec_S>) -> ()
+// CHECK:   }
+
+class B {
+public:
+  void construct(B* __p) {
+      ::new ((void*)__p) B;
+  }
+};
+
+// CHECK: cir.func {{.*}} @_ZN1B9constructEPS_(%arg0: !cir.ptr<!rec_B>
+// CHECK:   %0 = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["__p", init] {alignment = 8 : i64}
+// CHECK:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>
+// CHECK:   cir.store{{.*}} %arg1, %1 : !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>
+// CHECK:   %2 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_B>>, !cir.ptr<!rec_B>
+// CHECK:   %3 = cir.const #cir.int<1> : !u64i
+// CHECK:   %4 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!rec_B>>, !cir.ptr<!rec_B>
+// CHECK:   %5 = cir.cast bitcast %4 : !cir.ptr<!rec_B> -> !cir.ptr<!void>
+// CHECK:   %6 = cir.cast bitcast %5 : !cir.ptr<!void> -> !cir.ptr<!rec_B>
+
+// Trivial default constructor call is lowered away.
+// CHECK:   cir.return
+// CHECK: }
+
+// LLVM-LABEL: define {{.*}} @_ZN1B9constructEPS_
+// LLVM-NOT:     call {{.*}} @_ZN1BC1Ev
+// LLVM:         ret void
+
+// OGCG-LABEL: define {{.*}} @_ZN1B9constructEPS_
+// OGCG-NOT:     call {{.*}} @_ZN1BC1Ev
+// OGCG:         ret void
+
+void t() {
+  B b;
+  b.construct(&b);
+}
+
+
+void t_new_constant_size() {
+  auto p = new double[16];
+}
+
+// In this test, NUM_ELEMENTS isn't used because no cookie is needed and there
+//   are no constructor calls needed.
+
+// CHECK:   cir.func {{.*}} @_Z19t_new_constant_sizev()
+// CHECK:    %0 = cir.alloca !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>, ["p", init] {alignment = 8 : i64}
+// CHECK:    %[[NUM_ELEMENTS:.*]] = cir.const #cir.int<16> : !u64i
+// CHECK:    %[[ALLOCATION_SIZE:.*]] = cir.const #cir.int<128> : !u64i
+// CHECK:    %3 = cir.call @_Znam(%[[ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %4 = cir.cast bitcast %3 : !cir.ptr<!void> -> !cir.ptr<!cir.double>
+// CHECK:    cir.store{{.*}} %4, %0 : !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>
+// CHECK:    cir.return
+// CHECK:  }
+
+void t_new_multidim_constant_size() {
+  auto p = new double[2][3][4];
+}
+
+// As above, NUM_ELEMENTS isn't used.
+
+// CHECK:   cir.func {{.*}} @_Z28t_new_multidim_constant_sizev()
+// CHECK:    %0 = cir.alloca !cir.ptr<!cir.array<!cir.array<!cir.double x 4> x 3>>, !cir.ptr<!cir.ptr<!cir.array<!cir.array<!cir.double x 4> x 3>>>, ["p", init] {alignment = 8 : i64}
+// CHECK:    %[[NUM_ELEMENTS:.*]] = cir.const #cir.int<24> : !u64i
+// CHECK:    %[[ALLOCATION_SIZE:.*]] = cir.const #cir.int<192> : !u64i
+// CHECK:    %3 = cir.call @_Znam(%[[ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %4 = cir.cast bitcast %3 : !cir.ptr<!void> -> !cir.ptr<!cir.double>
+// CHECK:    %5 = cir.cast bitcast %0 : !cir.ptr<!cir.ptr<!cir.array<!cir.array<!cir.double x 4> x 3>>> -> !cir.ptr<!cir.ptr<!cir.double>>
+// CHECK:    cir.store{{.*}} %4, %5 : !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>
+// CHECK:  }
+
+class C {
+  public:
+    ~C();
+};
+
+void t_constant_size_nontrivial() {
+  auto p = new C[3];
+}
+
+// CHECK:  cir.func {{.*}} @_Z26t_constant_size_nontrivialv()
+// CHECK:    %0 = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["p", init] {alignment = 8 : i64}
+// CHECK:    %[[NUM_ELEMENTS:.*]] = cir.const #cir.int<3> : !u64i
+// CHECK:    %[[SIZE_WITHOUT_COOKIE:.*]] = cir.const #cir.int<3> : !u64i
+// CHECK:    %[[ALLOCATION_SIZE:.*]] = cir.const #cir.int<11> : !u64i
+// CHECK:    %4 = cir.call @_Znam(%[[ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %5 = cir.cast bitcast %4 : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CHECK:    cir.store{{.*}} %[[NUM_ELEMENTS]], %5 : !u64i, !cir.ptr<!u64i>
+// CHECK:    %6 = cir.cast bitcast %4 : !cir.ptr<!void> -> !cir.ptr<!u8i>
+// CHECK:    %[[COOKIE_SIZE:.*]] = cir.const #cir.int<8> : !s32i
+// CHECK:    %8 = cir.ptr_stride %6, %[[COOKIE_SIZE]] : (!cir.ptr<!u8i>, !s32i) -> !cir.ptr<!u8i>
+// CHECK:    %9 = cir.cast bitcast %8 : !cir.ptr<!u8i> -> !cir.ptr<!rec_C>
+// CHECK:    cir.store{{.*}} %9, %0 : !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>
+// CHECK:    cir.return
+// CHECK:  }
+
+class D {
+  public:
+    int x;
+    ~D();
+};
+
+void t_constant_size_nontrivial2() {
+  auto p = new D[3];
+}
+
+// In this test SIZE_WITHOUT_COOKIE isn't used, but it would be if there were
+// an initializer.
+
+// CHECK:  cir.func {{.*}} @_Z27t_constant_size_nontrivial2v()
+// CHECK:    %0 = cir.alloca !cir.ptr<!rec_D>, !cir.ptr<!cir.ptr<!rec_D>>, ["p", init] {alignment = 8 : i64}
+// CHECK:    %[[NUM_ELEMENTS:.*]] = cir.const #cir.int<3> : !u64i
+// CHECK:    %[[SIZE_WITHOUT_COOKIE:.*]] = cir.const #cir.int<12> : !u64i
+// CHECK:    %[[ALLOCATION_SIZE:.*]] = cir.const #cir.int<20> : !u64i
+// CHECK:    %4 = cir.call @_Znam(%[[ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %5 = cir.cast bitcast %4 : !cir.ptr<!void> -> !cir.ptr<!u64i>
+// CHECK:    cir.store{{.*}} %[[NUM_ELEMENTS]], %5 : !u64i, !cir.ptr<!u64i>
+// CHECK:    %6 = cir.cast bitcast %4 : !cir.ptr<!void> -> !cir.ptr<!u8i>
+// CHECK:    %[[COOKIE_SIZE:.*]] = cir.const #cir.int<8> : !s32i
+// CHECK:    %8 = cir.ptr_stride %6, %[[COOKIE_SIZE]] : (!cir.ptr<!u8i>, !s32i) -> !cir.ptr<!u8i>
+// CHECK:    %9 = cir.cast bitcast %8 : !cir.ptr<!u8i> -> !cir.ptr<!rec_D>
+// CHECK:    cir.store{{.*}} %9, %0 : !cir.ptr<!rec_D>, !cir.ptr<!cir.ptr<!rec_D>>
+// CHECK:    cir.return
+// CHECK:  }
+
+void t_constant_size_memset_init() {
+  auto p = new int[16] {};
+}
+
+// In this test, NUM_ELEMENTS isn't used because no cookie is needed and there
+//   are no constructor calls needed.
+
+// CHECK:  cir.func {{.*}} @_Z27t_constant_size_memset_initv()
+// CHECK:    %[[NUM_ELEMENTS:.*]] = cir.const #cir.int<16> : !u64i
+// CHECK:    %[[ALLOCATION_SIZE:.*]] = cir.const #cir.int<64> : !u64i
+// CHECK:    %[[ALLOC_PTR:.*]] = cir.call @_Znam(%[[ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %[[ELEM_PTR:.*]] = cir.cast bitcast %[[ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!s32i>
+// CHECK:    %[[VOID_PTR:.*]] = cir.cast bitcast %[[ELEM_PTR]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CHECK:    %[[ZERO:.*]] = cir.const #cir.int<0> : !u8i
+// CHECK:    %[[ZERO_I32:.*]] = cir.cast integral %[[ZERO]] : !u8i -> !s32i
+// CHECK:    cir.libc.memset %[[ALLOCATION_SIZE]] bytes from %[[VOID_PTR]] set to %[[ZERO_I32]] : !cir.ptr<!void>, !s32i, !u64i
+
+void t_constant_size_partial_init() {
+  auto p = new int[16] { 1, 2, 3 };
+}
+
+// CHECK:  cir.func {{.*}} @_Z28t_constant_size_partial_initv()
+// CHECK:    %[[NUM_ELEMENTS:.*]] = cir.const #cir.int<16> : !u64i
+// CHECK:    %[[ALLOCATION_SIZE:.*]] = cir.const #cir.int<64> : !u64i
+// CHECK:    %[[ALLOC_PTR:.*]] = cir.call @_Znam(%[[ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %[[ELEM_0_PTR:.*]] = cir.cast bitcast %[[ALLOC_PTR]] : !cir.ptr<!void> -> !cir.ptr<!s32i>
+// CHECK:    %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:    cir.store{{.*}} %[[CONST_ONE]], %[[ELEM_0_PTR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    %[[OFFSET:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:    %[[ELEM_1_PTR:.*]] = cir.ptr_stride %[[ELEM_0_PTR]], %[[OFFSET]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CHECK:    %[[CONST_TWO:.*]] = cir.const #cir.int<2> : !s32i
+// CHECK:    cir.store{{.*}} %[[CONST_TWO]], %[[ELEM_1_PTR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    %[[OFFSET1:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:    %[[ELEM_2_PTR:.*]] = cir.ptr_stride %[[ELEM_1_PTR]], %[[OFFSET1]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CHECK:    %[[CONST_THREE:.*]] = cir.const #cir.int<3> : !s32i
+// CHECK:    cir.store{{.*}} %[[CONST_THREE]], %[[ELEM_2_PTR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    %[[OFFSET2:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:    %[[ELEM_3_PTR:.*]] = cir.ptr_stride %[[ELEM_2_PTR]], %[[OFFSET2]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CHECK:    %[[INIT_SIZE:.*]] = cir.const #cir.int<12> : !u64i
+// CHECK:    %[[REMAINING_SIZE:.*]] = cir.binop(sub, %[[ALLOCATION_SIZE]], %[[INIT_SIZE]]) : !u64i
+// CHECK:    %[[VOID_PTR:.*]] = cir.cast bitcast %[[ELEM_3_PTR]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CHECK:    %[[ZERO:.*]] = cir.const #cir.int<0> : !u8i
+// CHECK:    %[[ZERO_I32:.*]] = cir.cast integral %[[ZERO]] : !u8i -> !s32i
+// CHECK:    cir.libc.memset %[[REMAINING_SIZE]] bytes from %[[VOID_PTR]] set to %[[ZERO_I32]] : !cir.ptr<!void>, !s32i, !u64i
+
+void t_new_var_size(size_t n) {
+  auto p = new char[n];
+}
+
+// CHECK:  cir.func {{.*}} @_Z14t_new_var_sizem
+// CHECK:    %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CHECK:    %[[PTR:.*]] = cir.call @_Znam(%[[N]]) : (!u64i)
+
+void t_new_var_size2(int n) {
+  auto p = new char[n];
+}
+
+// CHECK:  cir.func {{.*}} @_Z15t_new_var_size2i
+// CHECK:    %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CHECK:    %[[N_SIZE_T:.*]] = cir.cast integral %[[N]] : !s32i -> !u64i
+// CHECK:    %[[PTR:.*]] = cir.call @_Znam(%[[N_SIZE_T]]) : (!u64i)
+
+void t_new_var_size3(size_t n) {
+  auto p = new double[n];
+}
+
+// CHECK:  cir.func {{.*}} @_Z15t_new_var_size3m
+// CHECK:    %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CHECK:    %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CHECK:    %[[RESULT:.*]], %[[OVERFLOW:.*]] = cir.binop.overflow(mul, %[[N]], %[[ELEMENT_SIZE]]) : !u64i, (!u64i, !cir.bool)
+// CHECK:    %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
+// CHECK:    %[[ALLOC_SIZE:.*]] = cir.select if %[[OVERFLOW]] then %[[ALL_ONES]] else %[[RESULT]] : (!cir.bool, !u64i, !u64i)
+// CHECK:    %[[PTR:.*]] = cir.call @_Znam(%[[ALLOC_SIZE]]) : (!u64i)
+
+void t_new_var_size4(int n) {
+  auto p = new double[n];
+}
+
+// CHECK:  cir.func {{.*}} @_Z15t_new_var_size4i
+// CHECK:    %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CHECK:    %[[N_SIZE_T:.*]] = cir.cast integral %[[N]] : !s32i -> !u64i
+// CHECK:    %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CHECK:    %[[RESULT:.*]], %[[OVERFLOW:.*]] = cir.binop.overflow(mul, %[[N_SIZE_T]], %[[ELEMENT_SIZE]]) : !u64i, (!u64i, !cir.bool)
+// CHECK:    %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
+// CHECK:    %[[ALLOC_SIZE:.*]] = cir.select if %[[OVERFLOW]] then %[[ALL_ONES]] else %[[RESULT]] : (!cir.bool, !u64i, !u64i)
+// CHECK:    %[[PTR:.*]] = cir.call @_Znam(%[[ALLOC_SIZE]]) : (!u64i)
+
+void t_new_var_size5(int n) {
+  auto p = new double[n][2][3];
+}
+
+// NUM_ELEMENTS isn't used in this case because there is no cookie.
+
+// CHECK:  cir.func {{.*}} @_Z15t_new_var_size5i
+// CHECK:    %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CHECK:    %[[N_SIZE_T:.*]] = cir.cast integral %[[N]] : !s32i -> !u64i
+// CHECK:    %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<48> : !u64i
+// CHECK:    %[[RESULT:.*]], %[[OVERFLOW:.*]] = cir.binop.overflow(mul, %[[N_SIZE_T]], %[[ELEMENT_SIZE]]) : !u64i, (!u64i, !cir.bool)
+// CHECK:    %[[NUM_ELEMENTS_MULTIPLIER:.*]] = cir.const #cir.int<6>
+// CHECK:    %[[NUM_ELEMENTS:.*]] = cir.binop(mul, %[[N_SIZE_T]], %[[NUM_ELEMENTS_MULTIPLIER]]) : !u64i
+// CHECK:    %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
+// CHECK:    %[[ALLOC_SIZE:.*]] = cir.select if %[[OVERFLOW]] then %[[ALL_ONES]] else %[[RESULT]] : (!cir.bool, !u64i, !u64i)
+// CHECK:    %[[PTR:.*]] = cir.call @_Znam(%[[ALLOC_SIZE]]) : (!u64i)
+
+void t_new_var_size6(int n) {
+  auto p = new double[n] { 1, 2, 3 };
+}
+
+// CHECK:  cir.func {{.*}} @_Z15t_new_var_size6i
+// CHECK:    %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CHECK:    %[[N_SIZE_T:.*]] = cir.cast integral %[[N]] : !s32i -> !u64i
+// CHECK:    %[[MIN_SIZE:.*]] = cir.const #cir.int<3> : !u64i
+// CHECK:    %[[LT_MIN_SIZE:.*]] = cir.cmp(lt, %[[N_SIZE_T]], %[[MIN_SIZE]]) : !u64i, !cir.bool
+// CHECK:    %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CHECK:    %[[RESULT:.*]], %[[OVERFLOW:.*]] = cir.binop.overflow(mul, %[[N_SIZE_T]], %[[ELEMENT_SIZE]]) : !u64i, (!u64i, !cir.bool)
+// CHECK:    %[[ANY_OVERFLOW:.*]] = cir.binop(or, %[[LT_MIN_SIZE]], %[[OVERFLOW]]) : !cir.bool
+// CHECK:    %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
+// CHECK:    %[[ALLOC_SIZE:.*]] = cir.select if %[[ANY_OVERFLOW]] then %[[ALL_ONES]] else %[[RESULT]] : (!cir.bool, !u64i, !u64i)
+// CHECK:    %[[PTR:.*]] = cir.call @_Znam(%[[ALLOC_SIZE]]) : (!u64i)
+
+void t_new_var_size7(__int128 n) {
+  auto p = new double[n];
+}
+
+// CHECK:  cir.func {{.*}} @_Z15t_new_var_size7n
+// CHECK:    %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CHECK:    %[[N_SIZE_T:.*]] = cir.cast integral %[[N]] : !s128i -> !u64i
+// CHECK:    %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CHECK:    %[[RESULT:.*]], %[[OVERFLOW:.*]] = cir.binop.overflow(mul, %[[N_SIZE_T]], %[[ELEMENT_SIZE]]) : !u64i, (!u64i, !cir.bool)
+// CHECK:    %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
+// CHECK:    %[[ALLOC_SIZE:.*]] = cir.select if %[[OVERFLOW]] then %[[ALL_ONES]] else %[[RESULT]] : (!cir.bool, !u64i, !u64i)
+// CHECK:    %[[PTR:.*]] = cir.call @_Znam(%[[ALLOC_SIZE]]) : (!u64i)
+
+void t_new_var_size_nontrivial(size_t n) {
+  auto p = new D[n];
+}
+
+// CHECK:  cir.func {{.*}} @_Z25t_new_var_size_nontrivialm
+// CHECK:    %[[N:.*]] = cir.load{{.*}} %[[ARG_ALLOCA:.*]]
+// CHECK:    %[[ELEMENT_SIZE:.*]] = cir.const #cir.int<4> : !u64i
+// CHECK:    %[[SIZE_WITHOUT_COOKIE:.*]], %[[OVERFLOW:.*]] = cir.binop.overflow(mul, %[[N]], %[[ELEMENT_SIZE]]) : !u64i, (!u64i, !cir.bool)
+// CHECK:    %[[COOKIE_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CHECK:    %[[SIZE:.*]], %[[OVERFLOW2:.*]] = cir.binop.overflow(add, %[[SIZE_WITHOUT_COOKIE]], %[[COOKIE_SIZE]]) : !u64i, (!u64i, !cir.bool)
+// CHECK:    %[[ANY_OVERFLOW:.*]] = cir.binop(or, %[[OVERFLOW]], %[[OVERFLOW2]]) : !cir.bool
+// CHECK:    %[[ALL_ONES:.*]] = cir.const #cir.int<18446744073709551615> : !u64i
+// CHECK:    %[[ALLOC_SIZE:.*]] = cir.select if %[[ANY_OVERFLOW]] then %[[ALL_ONES]] else %[[SIZE]] : (!cir.bool, !u64i, !u64i)
+// CHECK:    %[[PTR:.*]] = cir.call @_Znam(%[[ALLOC_SIZE]]) : (!u64i)
+
+void t_multidim_init() {
+  auto *p = new int[2][3] { {1, 2, 3}, {4, 5, 6}};
+}
+
+// CHECK:  cir.func {{.*}} @_Z15t_multidim_initv()
+// CHECK:    %[[NUM_ELEMENTS:.*]] = cir.const #cir.int<6> : !u64i
+// CHECK:    %[[ALLOCATION_SIZE:.*]] = cir.const #cir.int<24> : !u64i
+// CHECK:    %[[NEW_PTR:.*]] = cir.call @_Znam(%2) : (!u64i) -> !cir.ptr<!void>
+// CHECK:    %[[ELEMENT_PTR:.*]] = cir.cast bitcast %[[NEW_PTR]] : !cir.ptr<!void> -> !cir.ptr<!s32i>
+// CHECK:    %[[ARRAY_ELEM0_PTR:.*]] = cir.cast bitcast %[[ELEMENT_PTR]] : !cir.ptr<!s32i> -> !cir.ptr<!cir.array<!s32i x 3>>
+// CHECK:    %[[OFFSET0:.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:    %[[ELEM_00_PTR:.*]] = cir.get_element %[[ARRAY_ELEM0_PTR]][%[[OFFSET0]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!s32i>
+// CHECK:    %[[ELEM_00_VAL:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:    cir.store{{.*}} %[[ELEM_00_VAL]], %[[ELEM_00_PTR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    %[[OFFSET1:.*]] = cir.const #cir.int<1> : !s64i
+// CHECK:    %[[ELEM_01_PTR:.*]] = cir.get_element %[[ARRAY_ELEM0_PTR]][%[[OFFSET1]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s64i) -> !cir.ptr<!s32i>
+// CHECK:    %[[ELEM_01_VAL:.*]] = cir.const #cir.int<2> : !s32i
+// CHECK:    cir.store{{.*}} %[[ELEM_01_VAL]], %[[ELEM_01_PTR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    %[[OFFSET2:.*]] = cir.const #cir.int<2> : !s64i
+// CHECK:    %[[ELEM_02_PTR:.*]] = cir.get_element %[[ARRAY_ELEM0_PTR]][%[[OFFSET2]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s64i) -> !cir.ptr<!s32i>
+// CHECK:    %[[ELEM_02_VAL:.*]] = cir.const #cir.int<3> : !s32i
+// CHECK:    cir.store{{.*}} %[[ELEM_02_VAL]], %[[ELEM_02_PTR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    %[[OFFSET3:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK:    %[[ARRAY_ELEM1_PTR:.*]] = cir.ptr_stride %[[ARRAY_ELEM0_PTR]], %[[OFFSET3]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!cir.array<!s32i x 3>>
+// CHECK:    %[[OFFSET4:.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:    %[[ELEM_10_PTR:.*]] = cir.get_element %[[ARRAY_ELEM1_PTR]][%[[OFFSET4]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!s32i>
+// CHECK:    %[[ELEM_10_VAL:.*]] = cir.const #cir.int<4> : !s32i
+// CHECK:    cir.store{{.*}} %[[ELEM_10_VAL]], %[[ELEM_10_PTR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    %[[OFFSET5:.*]] = cir.const #cir.int<1> : !s64i
+// CHECK:    %[[ELEM_11_PTR:.*]] = cir.get_element %[[ARRAY_ELEM1_PTR]][%[[OFFSET5]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s64i) -> !cir.ptr<!s32i>
+// CHECK:    %[[ELEM_11_VAL:.*]] = cir.const #cir.int<5> : !s32i
+// CHECK:    cir.store{{.*}} %[[ELEM_11_VAL]], %[[ELEM_11_PTR]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    %[[OFFSET6:.*]] = cir.const #cir.int<2> : !s64i
+// CHECK:    %[[ELEM_12_PTR:.*]] = cir.get_element %[[ARRAY_ELEM1_PTR]][%[[OFFSET6]]] : (!cir.ptr<!cir.array<!s32i x 3>>, !s64i) -> !cir.ptr<!s32i>
+// CHECK:    %[[ELEM_12_VAL:.*]] = cir.const #cir.int<6> : !s32i
+// CHECK:    cir.store{{.*}} %[[ELEM_12_VAL]], %[[ELEM_12_PTR]] : !s32i, !cir.ptr<!s32i>
+
+void test_new_with_complex_type() {
+  _Complex float *a = new _Complex float{1.0f, 2.0f};
+}
+
+// CHECK: cir.func{{.*}} @_Z26test_new_with_complex_typev
+// CHECK:   %[[A_ADDR:.*]] = cir.alloca !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>, ["a", init]
+// CHECK:   %[[COMPLEX_SIZE:.*]] = cir.const #cir.int<8> : !u64i
+// CHECK:   %[[NEW_COMPLEX:.*]] = cir.call @_Znwm(%[[COMPLEX_SIZE]]) : (!u64i) -> !cir.ptr<!void>
+// CHECK:   %[[COMPLEX_PTR:.*]] = cir.cast bitcast %[[NEW_COMPLEX]] : !cir.ptr<!void> -> !cir.ptr<!cir.complex<!cir.float>>
+// CHECK:   %[[COMPLEX_VAL:.*]] = cir.const #cir.complex<#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float> : !cir.complex<!cir.float>
+// CHECK:   cir.store{{.*}} %[[COMPLEX_VAL]], %[[COMPLEX_PTR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+// CHECK:   cir.store{{.*}} %[[COMPLEX_PTR]], %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.ptr<!cir.ptr<!cir.complex<!cir.float>>>
+
+// LLVM: define{{.*}} void @_Z26test_new_with_complex_typev
+// LLVM:   %[[A_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[NEW_COMPLEX:.*]] = call ptr @_Znwm(i64 8)
+// LLVM:   store { float, float } { float 1.000000e+00, float 2.000000e+00 }, ptr %[[NEW_COMPLEX]], align 8
+// LLVM:   store ptr %[[NEW_COMPLEX]], ptr %[[A_ADDR]], align 8
+
+// OGCG: define{{.*}} void @_Z26test_new_with_complex_typev
+// OGCG:   %[[A_ADDR:.*]] = alloca ptr, align 8
+// OGCG:   %[[NEW_COMPLEX:.*]] = call noalias noundef nonnull ptr @_Znwm(i64 noundef 8)
+// OGCG:   %[[COMPLEX_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[NEW_COMPLEX]], i32 0, i32 0
+// OGCG:   %[[COMPLEX_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[NEW_COMPLEX]], i32 0, i32 1
+// OGCG:   store float 1.000000e+00, ptr %[[COMPLEX_REAL_PTR]], align 8
+// OGCG:   store float 2.000000e+00, ptr %[[COMPLEX_IMAG_PTR]], align 4
+// OGCG:   store ptr %[[NEW_COMPLEX]], ptr %[[A_ADDR]], align 8
diff --git a/clang/test/CIR/Incubator/CodeGen/no-common.c b/clang/test/CIR/Incubator/CodeGen/no-common.c
new file mode 100644
index 0000000000000..61ecea1916369
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/no-common.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir %s -emit-cir -o - | FileCheck %s -check-prefix=CHECK-DEFAULT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir %s -fno-common -emit-cir -o - | FileCheck %s -check-prefix=CHECK-DEFAULT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir %s -fcommon -emit-cir -o - | FileCheck %s -check-prefix=CHECK-COMMON
+
+// CHECK-COMMON: cir.global common @x
+// CHECK-DEFAULT: cir.global external @x
+int x;
+
+// CHECK-COMMON: cir.global external @ABC
+// CHECK-DEFAULT: cir.global external @ABC
+typedef void* (*fn_t)(long a, long b, char *f, int c);
+fn_t ABC __attribute__ ((nocommon));
+
+// CHECK-COMMON: cir.global common @y
+// CHECK-DEFAULT: cir.global common @y
+int y __attribute__((common));
diff --git a/clang/test/CIR/Incubator/CodeGen/no-pie.c b/clang/test/CIR/Incubator/CodeGen/no-pie.c
new file mode 100644
index 0000000000000..0639fd34c3633
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/no-pie.c
@@ -0,0 +1,11 @@
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-PIE -S -Xclang -emit-cir %s -o %t1.cir
+// RUN: FileCheck --input-file=%t1.cir %s -check-prefix=CIR
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-PIE -S -Xclang -emit-llvm %s -o %t1.ll
+// RUN: FileCheck --input-file=%t1.ll %s -check-prefix=LLVM
+
+extern int var;
+int get() {
+  return var;
+}
+// CIR: cir.global "private" external dso_local @var : !s32i {alignment = 4 : i64}
+// LLVM: @var = external dso_local global i32
diff --git a/clang/test/CIR/Incubator/CodeGen/no-proto-fun-ptr.c b/clang/test/CIR/Incubator/CodeGen/no-proto-fun-ptr.c
new file mode 100644
index 0000000000000..e291c2fe7408b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/no-proto-fun-ptr.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s
+
+void empty();
+
+void check_noproto_ptr() {
+  void (*fun)(void) = empty;
+}
+
+// CHECK:  cir.func {{.*}} no_proto {{.*}} @check_noproto_ptr()
+// CHECK:    [[ALLOC:%.*]] = cir.alloca !cir.ptr<!cir.func<()>>, !cir.ptr<!cir.ptr<!cir.func<()>>>, ["fun", init] {alignment = 8 : i64}
+// CHECK:    [[GGO:%.*]] = cir.get_global @empty : !cir.ptr<!cir.func<()>>
+// CHECK:    cir.store{{.*}} [[GGO]], [[ALLOC]] : !cir.ptr<!cir.func<()>>, !cir.ptr<!cir.ptr<!cir.func<()>>>
+// CHECK:    cir.return
+
+void empty(void) {}
+
+void buz() {
+  void (*func)();
+  (*func)();
+}
+
+// CHECK:  cir.func {{.*}} @buz()
+// CHECK:    [[FNPTR_ALLOC:%.*]] = cir.alloca !cir.ptr<!cir.func<(...)>>, !cir.ptr<!cir.ptr<!cir.func<(...)>>>, ["func"] {alignment = 8 : i64}
+// CHECK:    [[FNPTR:%.*]] = cir.load deref{{.*}} [[FNPTR_ALLOC]] : !cir.ptr<!cir.ptr<!cir.func<(...)>>>, !cir.ptr<!cir.func<(...)>>
+// CHECK:    [[CAST:%.*]] = cir.cast bitcast %1 : !cir.ptr<!cir.func<(...)>> -> !cir.ptr<!cir.func<()>>
+// CHECK:    cir.call [[CAST]]() : (!cir.ptr<!cir.func<()>>) -> ()
+// CHECK:    cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/no-proto-is-void.cpp b/clang/test/CIR/Incubator/CodeGen/no-proto-is-void.cpp
new file mode 100644
index 0000000000000..fcb73e304a118
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/no-proto-is-void.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -std=c2x -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Both CXX and C2X don't support no-prototype functions. They default to void.
+int noProto();
+// CHECK: cir.func {{.*}} @{{.*}}noProto{{.*}}() -> !s32i
+int test(int x) {
+  return noProto();
+  // CHECK {{.+}} = cir.call @{{.*}}noProto{{.*}}() : () -> !s32i
+}
+int noProto() { return 0; }
diff --git a/clang/test/CIR/Incubator/CodeGen/no-prototype.c b/clang/test/CIR/Incubator/CodeGen/no-prototype.c
new file mode 100644
index 0000000000000..79903c8c37a23
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/no-prototype.c
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+//===----------------------------------------------------------------------===//
+// DEFINED BEHAVIOUR
+//===----------------------------------------------------------------------===//
+
+// No-proto definition followed by a correct call.
+int noProto0(x) int x; { return x; }
+// CHECK: cir.func {{.*}} no_proto {{.*}} @noProto0(%arg0: !s32i loc({{.*}})) -> !s32i
+int test0(int x) {
+  // CHECK: cir.func {{.*}} @test0
+  return noProto0(x); // We know the definition. Should be a direct call.
+  // CHECK: %{{.+}} = cir.call @noProto0(%{{.+}})
+}
+
+// Declaration without prototype followed by its definition, then a correct call.
+//
+// Prototyped definition overrides no-proto declaration before any call is made,
+// only allowing calls with proper arguments. This is the only case where the
+// definition is not marked as no-proto.
+int noProto1();
+int noProto1(int x) { return x; }
+// CHECK: cir.func {{.*}} @noProto1(%arg0: !s32i {{.+}}) -> !s32i
+int test1(int x) {
+  // CHECK: cir.func {{.*}} @test1
+  return noProto1(x);
+  // CHECK: %{{.+}} = cir.call @noProto1(%{{[0-9]+}}) : (!s32i) -> !s32i
+}
+
+// Declaration without prototype followed by a correct call, then its definition.
+//
+// Call to no-proto is made before definition, so a variadic call that takes anything
+// is created. Later, when the definition is found, no-proto is replaced.
+int noProto2();
+int test2(int x) {
+  return noProto2(x);
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto2 : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  {{.*}} = cir.call [[GGO]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+}
+int noProto2(int x) { return x; }
+// CHECK: cir.func {{.*}} no_proto {{.*}} @noProto2(%arg0: !s32i loc({{.*}})) -> !s32i
+
+// No-proto declaration without definition (any call here is "correct").
+//
+// Call to no-proto is made before definition, so a variadic call that takes anything
+// is created. Definition is not in the translation unit, so it is left as is.
+int noProto3();
+// cir.func private no_proto @noProto3(...) -> !s32i
+int test3(int x) {
+// CHECK: cir.func {{.*}} @test3
+  return noProto3(x);
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto3 : !cir.ptr<!cir.func<(...) -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast bitcast [[GGO]] : !cir.ptr<!cir.func<(...) -> !s32i>> -> !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+}
+
+
+//===----------------------------------------------------------------------===//
+// UNDEFINED BEHAVIOUR
+//
+// No-proto definitions followed by incorrect calls.
+//===----------------------------------------------------------------------===//
+
+// No-proto definition followed by an incorrect call due to extra args.
+int noProto4() { return 0; }
+// cir.func private no_proto {{.*}} @noProto4() -> !s32i
+int test4(int x) {
+  return noProto4(x); // Even if we know the definition, this should compile.
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto4 : !cir.ptr<!cir.func<() -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast bitcast [[GGO]] : !cir.ptr<!cir.func<() -> !s32i>> -> !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]]({{%.*}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+}
+
+// No-proto definition followed by an incorrect call due to lack of args.
+int noProto5();
+int test5(int x) {
+  return noProto5();
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto5 : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast bitcast [[GGO]] : !cir.ptr<!cir.func<(!s32i) -> !s32i>> -> !cir.ptr<!cir.func<() -> !s32i>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]]() : (!cir.ptr<!cir.func<() -> !s32i>>) -> !s32i
+}
+int noProto5(int x) { return x; }
+// CHECK: cir.func {{.*}} no_proto {{.*}} @noProto5(%arg0: !s32i loc({{.*}})) -> !s32i
diff --git a/clang/test/CIR/Incubator/CodeGen/no-unique-address.cpp b/clang/test/CIR/Incubator/CodeGen/no-unique-address.cpp
new file mode 100644
index 0000000000000..e7a715786a250
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/no-unique-address.cpp
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t-og.ll %s
+
+// Test that [[no_unique_address]] empty fields are handled correctly.
+// These fields are zero-sized and don't occupy space in the struct layout,
+// but we still need to be able to initialize them in constructors.
+// Trivial default constructors for empty fields are lowered away.
+
+struct Empty {};
+
+struct S {
+  int x;
+  [[no_unique_address]] Empty e;
+  S() : x(1), e() {}
+};
+
+void test() {
+  S s;
+}
+
+// The struct should only have space for 'x' (the empty field is zero-sized)
+// CIR-DAG: !rec_S = !cir.record<struct "S" {!s32i}>
+
+// CIR: cir.func {{.*}}linkonce_odr @_ZN1SC2Ev
+// CIR:   cir.store {{.*}} : !s32i, !cir.ptr<!s32i>
+// CIR:   cir.return
+
+// Trivial default constructor call is lowered away, matching OG behavior
+// LLVM-LABEL: define {{.*}} @_ZN1SC2Ev
+// LLVM:   store i32 1
+// LLVM-NOT:   call void @_ZN5EmptyC1Ev
+// LLVM:   ret void
+
+// OGCG-LABEL: define {{.*}} @_ZN1SC2Ev
+// OGCG:   store i32 1
+// OGCG:   ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/noexcept.cpp b/clang/test/CIR/Incubator/CodeGen/noexcept.cpp
new file mode 100644
index 0000000000000..b5c5053ba2c47
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/noexcept.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -std=c++11 %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void may_throw();
+void no_throw() noexcept;
+
+bool test_noexcept_func_false() {
+  return noexcept(may_throw());
+}
+// CHECK-LABEL: cir.func{{.*}} @_Z24test_noexcept_func_falsev
+// CHECK:         %[[CONST:.*]] = cir.const #false
+// CHECK:         cir.return
+
+bool test_noexcept_func_true() {
+  return noexcept(no_throw());
+}
+// CHECK-LABEL: cir.func{{.*}} @_Z23test_noexcept_func_truev
+// CHECK:         %[[CONST:.*]] = cir.const #true
+// CHECK:         cir.return
+
+auto lambda_may_throw = []() {};
+auto lambda_no_throw = []() noexcept {};
+
+bool test_noexcept_lambda_false() {
+  return noexcept(lambda_may_throw());
+}
+// CHECK-LABEL: cir.func{{.*}} @_Z26test_noexcept_lambda_falsev
+// CHECK:         %[[CONST:.*]] = cir.const #false
+// CHECK:         cir.return
+
+bool test_noexcept_lambda_true() {
+  return noexcept(lambda_no_throw());
+}
+// CHECK-LABEL: cir.func{{.*}} @_Z25test_noexcept_lambda_truev
+// CHECK:         %[[CONST:.*]] = cir.const #true
+// CHECK:         cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/non-odr-use-constant.cpp b/clang/test/CIR/Incubator/CodeGen/non-odr-use-constant.cpp
new file mode 100644
index 0000000000000..f0fb4342ac744
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/non-odr-use-constant.cpp
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck %s --input-file=%t.ogcg.ll --check-prefix=OGCG
+//
+// Test non-ODR-use constant expressions
+
+namespace llvm {
+  template<typename ValueTy> class StringMapEntry {};
+  template<typename ValueTy> class StringMapIterBase {
+  public:
+    StringMapEntry<ValueTy>& operator*() const;
+    StringMapIterBase& operator++();
+    friend bool operator!=(const StringMapIterBase& LHS, const StringMapIterBase& RHS);
+  };
+  template<typename ValueTy> class StringMap {
+  public:
+    StringMapIterBase<ValueTy> begin();
+    StringMapIterBase<ValueTy> end();
+  };
+  struct EmptyStringSetTag {};
+  template<class AllocatorTy = int> class StringSet : public StringMap<EmptyStringSetTag> {};
+}
+
+namespace clang {
+  // Static variable that will be referenced without ODR-use in range-for
+  static llvm::StringSet<> BuiltinClasses;
+
+  void EmitBuiltins() {
+    // This range-for iterates over BuiltinClasses without constituting an ODR-use
+    // because it's used in an unevaluated context for the range-for desugaring
+    for (const auto &Entry : BuiltinClasses) {
+    }
+  }
+}
+
+// CIR: cir.global "private" internal dso_local @_ZN5clangL14BuiltinClassesE
+// CIR: cir.func {{.*}}@_ZN5clang12EmitBuiltinsEv()
+// CIR:   %{{.*}} = cir.const #cir.global_view<@_ZN5clangL14BuiltinClassesE>
+
+// LLVM: @_ZN5clangL14BuiltinClassesE = internal global
+// LLVM: define {{.*}}@_ZN5clang12EmitBuiltinsEv()
+// LLVM:   %{{.*}} = alloca ptr
+// LLVM:   store ptr @_ZN5clangL14BuiltinClassesE
+
+// OGCG: @_ZN5clangL14BuiltinClassesE = internal global
+// OGCG: define {{.*}}@_ZN5clang12EmitBuiltinsEv()
+// OGCG:   %{{.*}} = alloca ptr
+// OGCG:   store ptr @_ZN5clangL14BuiltinClassesE
+
+// Test non-reference type NOUR_Constant (local constexpr in lambda)
+struct A { int x, y[2]; int arr[3]; };
+// CIR-DAG: @__const._Z1fi.a
+// LLVM-DAG: @__const._Z1fi.a
+// OGCG-DAG: @__const._Z1fi.a
+int f(int i) {
+  constexpr A a = {1, 2, 3, 4, 5, 6};
+  return [] (int n, int A::*p) {
+    return (n >= 0 ? a.arr[n] : (n == -1 ? a.*p : a.y[2 - n]));
+  }(i, &A::x);
+}
+// CIR: cir.get_global @__const._Z1fi.a
+// LLVM: getelementptr {{.*}} @__const._Z1fi.a
+// OGCG: getelementptr inbounds {{.*}} @__const._Z1fi.a
diff --git a/clang/test/CIR/Incubator/CodeGen/nonzeroinit-struct.cpp b/clang/test/CIR/Incubator/CodeGen/nonzeroinit-struct.cpp
new file mode 100644
index 0000000000000..1c2687353dc22
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/nonzeroinit-struct.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// TODO: Lower #cir.data_member<null> to -1 for LLVM (in the itanium ABI context).
+// RUN-DISABLE: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN-DISABLE: FileCheck --input-file=%t.ll -check-prefix=LLVM %s
+
+struct Other {
+  int x;
+};
+
+struct Trivial {
+  int x;
+  double y;
+  decltype(&Other::x) ptr;
+};
+
+// This case has a trivial default constructor, but can't be zero-initialized.
+Trivial t;
+
+// CHECK: !rec_Trivial = !cir.record<struct "Trivial" {!s32i, !cir.double, !cir.data_member<!s32i in !rec_Other>} #cir.record.decl.ast>
+// CHECK: cir.global external @t = #cir.const_record<{#cir.int<0> : !s32i, #cir.fp<0.000000e+00> : !cir.double,
+// CHECK-SAME: #cir.data_member<null> : !cir.data_member<!s32i in !rec_Other>}> : !rec_Trivial
diff --git a/clang/test/CIR/Incubator/CodeGen/nrvo-eh.cpp b/clang/test/CIR/Incubator/CodeGen/nrvo-eh.cpp
new file mode 100644
index 0000000000000..214f5a086cf3c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/nrvo-eh.cpp
@@ -0,0 +1,127 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -fexceptions -fcxx-exceptions -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -fexceptions -fcxx-exceptions -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fexceptions -fcxx-exceptions -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+#include "std-cxx.h"
+
+std::vector<const char*> test_nrvo() {
+  std::vector<const char*> result;
+  result.push_back("Words bend our thinking to infinite paths of self-delusion");
+  return result;
+}
+
+// CIR: ![[VEC:.*]] = !cir.record<class "std::vector<const char *>" {!cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!cir.ptr<!s8i>>}>
+
+// CIR: cir.func {{.*}} @_Z9test_nrvov() -> ![[VEC]]
+// CIR:   %[[RESULT:.*]] = cir.alloca ![[VEC]], !cir.ptr<![[VEC]]>, ["__retval", init]
+// CIR:   %[[NRVO_FLAG:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["nrvo"]
+// CIR:   %[[FALSE:.*]] = cir.const #false
+// CIR:   cir.store{{.*}} %[[FALSE]], %[[NRVO_FLAG]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR:   cir.call @_ZNSt6vectorIPKcEC1Ev(%[[RESULT]]) : (!cir.ptr<![[VEC]]>) -> ()
+// CIR:   cir.scope {
+// CIR:     %[[REF_TMP:.*]] = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["ref.tmp0"]
+// CIR:     %[[STR:.*]] = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 59>>
+// CIR:     %[[PTR_DECAY:.*]] = cir.cast array_to_ptrdecay %[[STR]] : !cir.ptr<!cir.array<!s8i x 59>> -> !cir.ptr<!s8i>
+// CIR:     cir.store{{.*}} %[[PTR_DECAY]], %[[REF_TMP]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR:     cir.try synthetic cleanup {
+// CIR:       cir.call exception @_ZNSt6vectorIPKcE9push_backEOS1_(%[[RESULT]], %[[REF_TMP]]) : (!cir.ptr<![[VEC]]>, !cir.ptr<!cir.ptr<!s8i>>) -> () cleanup {
+// CIR:         cir.call @_ZNSt6vectorIPKcED1Ev(%[[RESULT]]) : (!cir.ptr<!rec_std3A3Avector3Cconst_char_2A3E>) -> ()
+// CIR:         cir.yield
+// CIR:       }
+// CIR:       cir.yield
+// CIR:     } catch [#cir.unwind {
+// CIR:       cir.resume
+// CIR:     }]
+// CIR:   }
+// CIR:   %[[TRUE:.*]] = cir.const #true
+// CIR:   cir.store{{.*}} %[[TRUE]], %[[NRVO_FLAG]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR:   %[[NRVO_FLAG_VAL:.*]] = cir.load{{.*}} %[[NRVO_FLAG]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR:   %[[NOT_NRVO:.*]] = cir.unary(not, %[[NRVO_FLAG_VAL]]) : !cir.bool, !cir.bool
+// CIR:   cir.if %[[NOT_NRVO]] {
+// CIR:     cir.call @_ZNSt6vectorIPKcED1Ev(%[[RESULT]]) : (!cir.ptr<!rec_std3A3Avector3Cconst_char_2A3E>) -> ()
+// CIR:   }
+// CIR:   %[[RETVAL:.*]] = cir.load{{.*}} %[[RESULT]] : !cir.ptr<![[VEC]]>, ![[VEC]]
+// CIR:   cir.return %[[RETVAL]] : ![[VEC]]
+
+// LLVM: define {{.*}} %[[VEC:.*]] @_Z9test_nrvov()
+// LLVM:   %[[REF_TMP:.*]] = alloca ptr
+// LLVM:   %[[RESULT:.*]] = alloca %[[VEC]]
+// LLVM:   %[[NRVO_FLAG:.*]] = alloca i8
+// LLVM:   store i8 0, ptr %[[NRVO_FLAG]]
+// LLVM:   call void @_ZNSt6vectorIPKcEC1Ev(ptr %[[RESULT]])
+// LLVM:   br label %[[SCOPE:.*]]
+// LLVM: [[SCOPE]]:
+// LLVM:   store ptr @.str, ptr %[[REF_TMP]]
+// LLVM:   br label %[[SYNTHETIC_SCOPE:.*]]
+// LLVM: [[SYNTHETIC_SCOPE]]:
+// LLVM:   invoke void @_ZNSt6vectorIPKcE9push_backEOS1_(ptr %[[RESULT]], ptr %[[REF_TMP]])
+// LLVM:           to label %[[CONTINUE:.*]] unwind label %[[UNWIND:.*]]
+// LLVM: [[CONTINUE]]:
+// LLVM:   br label %[[SYNTH_DONE:.*]]
+// LLVM: [[UNWIND]]:
+// LLVM:   %[[LPAD:.*]] = landingpad { ptr, i32 }
+// LLVM:                     cleanup
+// LLVM:   %[[EXCEPTION:.*]] = extractvalue { ptr, i32 } %[[LPAD]], 0
+// LLVM:   %[[SELECTOR:.*]] = extractvalue { ptr, i32 } %[[LPAD]], 1
+// LLVM:   call void @_ZNSt6vectorIPKcED1Ev(ptr %[[RESULT]])
+// LLVM:   br label %[[RESUME:.*]]
+// LLVM: [[RESUME]]:
+// LLVM:   %[[EXCEPTION_PHI:.*]] = phi ptr [ %[[EXCEPTION]], %[[UNWIND]] ]
+// LLVM:   %[[SELECTOR_PHI:.*]] = phi i32 [ %[[SELECTOR]], %[[UNWIND]] ]
+// LLVM:   %[[EH_PAIR_PART:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXCEPTION_PHI]], 0
+// LLVM:   %[[EH_PAIR:.*]] = insertvalue { ptr, i32 } %[[EH_PAIR_PART]], i32 %[[SELECTOR_PHI]], 1
+// LLVM:   resume { ptr, i32 } %[[EH_PAIR]]
+// LLVM: [[SYNTH_DONE]]:
+// LLVM:   br label %[[DONE:.*]]
+// LLVM: [[DONE]]:
+// LLVM:   store i8 1, ptr %[[NRVO_FLAG]]
+// LLVM:   %[[NRVO_FLAG_VAL:.*]] = load i8, ptr %[[NRVO_FLAG]]
+// LLVM:   %[[NRVO_FLAG_BOOL:.*]] = trunc i8 %[[NRVO_FLAG_VAL]] to i1
+// LLVM:   %[[NOT_NRVO:.*]] = xor i1 %[[NRVO_FLAG_BOOL]], true
+// LLVM:   br i1 %[[NOT_NRVO]], label %[[NRVO_UNUSED:.*]], label %[[END:.*]]
+// LLVM: [[NRVO_UNUSED]]:
+// LLVM:   call void @_ZNSt6vectorIPKcED1Ev(ptr %[[RESULT]])
+// LLVM:   br label %[[END]]
+// LLVM: [[END]]:
+// LLVM:   %[[RETVAL:.*]] = load %[[VEC]], ptr %[[RESULT]]
+// LLVM:   ret %[[VEC]] %[[RETVAL]]
+
+// OGCG: define {{.*}} void @_Z9test_nrvov(ptr {{.*}} sret(%[[VEC:.*]]) {{.*}} %[[RESULT:.*]])
+// OGCG:   %[[RESULT_ADDR:.*]] = alloca ptr
+// OGCG:   %[[NRVO_FLAG:.*]] = alloca i1
+// OGCG:   %[[REF_TMP:.*]] = alloca ptr
+// OGCG:   %[[EXN_SLOT:.*]] = alloca ptr
+// OGCG:   %[[SELECTOR_SLOT:.*]] = alloca i32
+// OGCG:   store ptr %[[RESULT]], ptr %[[RESULT_ADDR]]
+// OGCG:   store i1 false, ptr %[[NRVO_FLAG]]
+// OGCG:   call void @_ZNSt6vectorIPKcEC1Ev(ptr {{.*}} %[[RESULT]])
+// OGCG:   store ptr @.str, ptr %[[REF_TMP]]
+// OGCG:   invoke void @_ZNSt6vectorIPKcE9push_backEOS1_(ptr {{.*}} %[[RESULT]], ptr {{.*}} %[[REF_TMP]])
+// OGCG:           to label %[[CONTINUE:.*]] unwind label %[[UNWIND:.*]]
+// OGCG: [[CONTINUE]]:
+// OGCG:   store i1 true, ptr %[[NRVO_FLAG]]
+// OGCG:   %[[NRVO_FLAG_VAL:.*]] = load i1, ptr %[[NRVO_FLAG]]
+// OGCG:   br i1 %[[NRVO_FLAG_VAL]], label %[[END:.*]], label %[[NRVO_UNUSED:.*]]
+// OGCG: [[UNWIND]]:
+// OGCG:   %[[LPAD:.*]] = landingpad { ptr, i32 }
+// OGCG:                     cleanup
+// OGCG:   %[[EXCEPTION:.*]] = extractvalue { ptr, i32 } %[[LPAD]], 0
+// OGCG:   store ptr %[[EXCEPTION]], ptr %[[EXN_SLOT]]
+// OGCG:   %[[SELECTOR:.*]] = extractvalue { ptr, i32 } %[[LPAD]], 1
+// OGCG:   store i32 %[[SELECTOR]], ptr %[[SELECTOR_SLOT]]
+// OGCG:   call void @_ZNSt6vectorIPKcED1Ev(ptr {{.*}} %[[RESULT]])
+// OGCG:   br label %[[RESUME:.*]]
+// OGCG: [[NRVO_UNUSED]]:
+// OGCG:   call void @_ZNSt6vectorIPKcED1Ev(ptr {{.*}} %[[RESULT]])
+// OGCG:   br label %[[END]]
+// OGCG: [[END]]
+// OGCG:   ret void
+// OGCG: [[RESUME:.*]]
+// OGCG:   %[[EXN:.*]] = load ptr, ptr %[[EXN_SLOT]]
+// OGCG:   %[[SEL:.*]] = load i32, ptr %[[SELECTOR_SLOT]]
+// OGCG:   %[[EH_PAIR_PART:.*]] = insertvalue { ptr, i32 } poison, ptr %[[EXN]], 0
+// OGCG:   %[[EH_PAIR:.*]] = insertvalue { ptr, i32 } %[[EH_PAIR_PART]], i32 %[[SEL]], 1
+// OGCG:   resume { ptr, i32 } %[[EH_PAIR]]
diff --git a/clang/test/CIR/Incubator/CodeGen/nrvo.cpp b/clang/test/CIR/Incubator/CodeGen/nrvo.cpp
new file mode 100644
index 0000000000000..0e8bb76062f37
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/nrvo.cpp
@@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+#include "std-cxx.h"
+
+std::vector<const char*> test_nrvo() {
+  std::vector<const char*> result;
+  result.push_back("Words bend our thinking to infinite paths of self-delusion");
+  return result;
+}
+
+// CIR: ![[VEC:.*]] = !cir.record<class "std::vector<const char *>" {!cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!cir.ptr<!s8i>>}>
+
+// CIR: cir.func {{.*}} @_Z9test_nrvov() -> ![[VEC]]
+// CIR:   %[[RESULT:.*]] = cir.alloca ![[VEC]], !cir.ptr<![[VEC]]>, ["__retval", init]
+// CIR:   %[[NRVO_FLAG:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["nrvo"]
+// CIR:   %[[FALSE:.*]] = cir.const #false
+// CIR:   cir.store{{.*}} %[[FALSE]], %[[NRVO_FLAG]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR:   cir.call @_ZNSt6vectorIPKcEC1Ev(%[[RESULT]]) : (!cir.ptr<![[VEC]]>) -> ()
+// CIR:   cir.scope {
+// CIR:     %[[REF_TMP:.*]] = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["ref.tmp0"]
+// CIR:     %[[STR:.*]] = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 59>>
+// CIR:     %[[PTR_DECAY:.*]] = cir.cast array_to_ptrdecay %[[STR]] : !cir.ptr<!cir.array<!s8i x 59>> -> !cir.ptr<!s8i>
+// CIR:     cir.store{{.*}} %[[PTR_DECAY]], %[[REF_TMP]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR:     cir.call @_ZNSt6vectorIPKcE9push_backEOS1_(%[[RESULT]], %[[REF_TMP]]) : (!cir.ptr<![[VEC]]>, !cir.ptr<!cir.ptr<!s8i>>) -> ()
+// CIR:   }
+// CIR:   %[[TRUE:.*]] = cir.const #true
+// CIR:   cir.store{{.*}} %[[TRUE]], %[[NRVO_FLAG]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR:   %[[NRVO_FLAG_VAL:.*]] = cir.load{{.*}} %[[NRVO_FLAG]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR:   %[[NOT_NRVO:.*]] = cir.unary(not, %[[NRVO_FLAG_VAL]]) : !cir.bool, !cir.bool
+// CIR:   cir.if %[[NOT_NRVO]] {
+// CIR:     cir.call @_ZNSt6vectorIPKcED1Ev(%[[RESULT]]) : (!cir.ptr<!rec_std3A3Avector3Cconst_char_2A3E>) -> ()
+// CIR:   }
+// CIR:   %[[RETVAL:.*]] = cir.load{{.*}} %[[RESULT]] : !cir.ptr<![[VEC]]>, ![[VEC]]
+// CIR:   cir.return %[[RETVAL]] : ![[VEC]]
+
+// LLVM: define {{.*}} %[[VEC:.*]] @_Z9test_nrvov()
+// LLVM:   %[[REF_TMP:.*]] = alloca ptr
+// LLVM:   %[[RESULT:.*]] = alloca %[[VEC]]
+// LLVM:   %[[NRVO_FLAG:.*]] = alloca i8
+// LLVM:   store i8 0, ptr %[[NRVO_FLAG]]
+// LLVM:   call void @_ZNSt6vectorIPKcEC1Ev(ptr %[[RESULT]])
+// LLVM:   br label %[[SCOPE:.*]]
+// LLVM: [[SCOPE]]:
+// LLVM:   store ptr @.str, ptr %[[REF_TMP]]
+// LLVM:   call void @_ZNSt6vectorIPKcE9push_backEOS1_(ptr %[[RESULT]], ptr %[[REF_TMP]])
+// LLVM:   br label %[[DONE:.*]]
+// LLVM: [[DONE]]:
+// LLVM:   store i8 1, ptr %[[NRVO_FLAG]]
+// LLVM:   %[[NRVO_FLAG_VAL:.*]] = load i8, ptr %[[NRVO_FLAG]]
+// LLVM:   %[[NRVO_FLAG_BOOL:.*]] = trunc i8 %[[NRVO_FLAG_VAL]] to i1
+// LLVM:   %[[NOT_NRVO:.*]] = xor i1 %[[NRVO_FLAG_BOOL]], true
+// LLVM:   br i1 %[[NOT_NRVO]], label %[[NRVO_UNUSED:.*]], label %[[END:.*]]
+// LLVM: [[NRVO_UNUSED]]:
+// LLVM:   call void @_ZNSt6vectorIPKcED1Ev(ptr %[[RESULT]])
+// LLVM:   br label %[[END]]
+// LLVM: [[END]]:
+// LLVM:   %[[RETVAL:.*]] = load %[[VEC]], ptr %[[RESULT]]
+// LLVM:   ret %[[VEC]] %[[RETVAL]]
+
+// OGCG: define {{.*}} void @_Z9test_nrvov(ptr {{.*}} sret(%[[VEC:.*]]) {{.*}} %[[RESULT:.*]])
+// OGCG:   %[[RESULT_ADDR:.*]] = alloca ptr
+// OGCG:   %[[NRVO_FLAG:.*]] = alloca i1
+// OGCG:   %[[REF_TMP:.*]] = alloca ptr
+// OGCG:   store ptr %[[RESULT]], ptr %[[RESULT_ADDR]]
+// OGCG:   store i1 false, ptr %[[NRVO_FLAG]]
+// OGCG:   call void @_ZNSt6vectorIPKcEC1Ev(ptr {{.*}} %[[RESULT]])
+// OGCG:   store ptr @.str, ptr %[[REF_TMP]]
+// OGCG:   call void @_ZNSt6vectorIPKcE9push_backEOS1_(ptr {{.*}} %[[RESULT]], ptr {{.*}} %[[REF_TMP]])
+// OGCG:   store i1 true, ptr %[[NRVO_FLAG]]
+// OGCG:   %[[NRVO_FLAG_VAL:.*]] = load i1, ptr %[[NRVO_FLAG]]
+// OGCG:   br i1 %[[NRVO_FLAG_VAL]], label %[[END:.*]], label %[[NRVO_UNUSED:.*]]
+// OGCG: [[NRVO_UNUSED]]:
+// OGCG:   call void @_ZNSt6vectorIPKcED1Ev(ptr {{.*}} %[[RESULT]])
+// OGCG:   br label %[[END]]
+// OGCG: [[END]]
+// OGCG:   ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/null-arithmatic-expression.c b/clang/test/CIR/Incubator/CodeGen/null-arithmatic-expression.c
new file mode 100644
index 0000000000000..7c4802df6b5c0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/null-arithmatic-expression.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+#define NULL ((void *)0)
+
+char *foo() {
+  return (char*)NULL + 1;
+}
+
+// CHECK-LABEL:  cir.func {{.*}} @foo
+// CHECK:    [[CONST_1:%[0-9]+]] = cir.const #cir.int<1> : !s32i
+// CHECK:    {{.*}} = cir.cast int_to_ptr [[CONST_1]] : !s32i
+// CHECK:    cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/nullptr-init.cpp b/clang/test/CIR/Incubator/CodeGen/nullptr-init.cpp
new file mode 100644
index 0000000000000..abb1290fdf457
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/nullptr-init.cpp
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-cir -o %t.cir
+// RUN: FileCheck --input-file=%t.cir -check-prefix=CIR %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu %s -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --input-file=%t.ll -check-prefix=LLVM %s
+
+void t1() {
+  int *p1 = nullptr;
+  int *p2 = 0;
+  int *p3 = (int*)0;
+}
+
+// CIR:      cir.func {{.*}} @_Z2t1v()
+// CIR-NEXT:     %[[P1:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p1", init] {alignment = 8 : i64}
+// CIR-NEXT:     %[[P2:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p2", init] {alignment = 8 : i64}
+// CIR-NEXT:     %[[P3:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p3", init] {alignment = 8 : i64}
+// CIR-NEXT:     %[[NULLPTR1:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CIR-NEXT:     cir.store{{.*}} %[[NULLPTR1]], %[[P1]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR-NEXT:     %[[NULLPTR2:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CIR-NEXT:     cir.store{{.*}} %[[NULLPTR2]], %[[P2]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR-NEXT:     %[[NULLPTR3:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CIR-NEXT:     cir.store{{.*}} %[[NULLPTR3]], %[[P3]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR-NEXT:     cir.return
+// CIR-NEXT: }
+
+// LLVM:      define{{.*}} @_Z2t1v()
+// LLVM-NEXT:     %[[P1:.*]] = alloca ptr, i64 1, align 8
+// LLVM-NEXT:     %[[P2:.*]] = alloca ptr, i64 1, align 8
+// LLVM-NEXT:     %[[P3:.*]] = alloca ptr, i64 1, align 8
+// LLVM-NEXT:     store ptr null, ptr %[[P1]], align 8
+// LLVM-NEXT:     store ptr null, ptr %[[P2]], align 8
+// LLVM-NEXT:     store ptr null, ptr %[[P3]], align 8
+// LLVM-NEXT:     ret void
+// LLVM-NEXT: }
+
+// Verify that we're capturing side effects during null pointer initialization.
+int t2() {
+  int x = 0;
+  int *p = (x = 1, nullptr);
+  return x;
+}
+
+// Note: An extra null pointer constant gets emitted as a result of visiting the
+//       compound initialization expression. We could avoid this by capturing
+//       the result of the compound initialization expression and explicitly
+//       casting it to the required type, but a redundant constant seems less
+//       intrusive than a redundant bitcast.
+
+// CIR:       cir.func {{.*}} @_Z2t2v()
+// CIR-NEXT:      %[[RETVAL_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR-NEXT:      %[[X:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CIR-NEXT:      %[[P:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["p", init] {alignment = 8 : i64}
+// CIR-NEXT:      %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT:      cir.store{{.*}} %[[ZERO]], %[[X]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:      %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CIR-NEXT:      cir.store{{.*}} %[[ONE]], %[[X]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:      %[[NULLPTR_EXTRA:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CIR-NEXT:      %[[NULLPTR:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// CIR-NEXT:      cir.store{{.*}} %[[NULLPTR]], %[[P]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CIR-NEXT:      %[[X_VAL:.*]] = cir.load{{.*}} %[[X]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:      cir.store{{.*}} %[[X_VAL]], %[[RETVAL_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:      %[[RETVAL:.*]] = cir.load{{.*}} %[[RETVAL_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:      cir.return %[[RETVAL]] : !s32i
+// CIR-NEXT:  }
+
+// LLVM:      define{{.*}} @_Z2t2v()
+// LLVM-NEXT:     %[[RETVAL_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM-NEXT:     %[[X:.*]] = alloca i32, i64 1, align 4
+// LLVM-NEXT:     %[[P:.*]] = alloca ptr, i64 1, align 8
+// LLVM-NEXT:     store i32 0, ptr %[[X]], align 4
+// LLVM-NEXT:     store i32 1, ptr %[[X]], align 4
+// LLVM-NEXT:     store ptr null, ptr %[[P]], align 8
+// LLVM-NEXT:     %[[X_VAL:.*]] = load i32, ptr %[[X]], align 4
+// LLVM-NEXT:     store i32 %[[X_VAL]], ptr %[[RETVAL_ADDR]], align 4
+// LLVM-NEXT:     %[[RETVAL:.*]] = load i32, ptr %[[RETVAL_ADDR]], align 4
+// LLVM-NEXT:     ret i32 %[[RETVAL]]
+// LLVM-NEXT: }
diff --git a/clang/test/CIR/Incubator/CodeGen/offsetof.c b/clang/test/CIR/Incubator/CodeGen/offsetof.c
new file mode 100644
index 0000000000000..19dfab8d3a1a0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/offsetof.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+#include <stddef.h>
+
+typedef struct {
+  int a;
+  int b;
+} A;
+
+void foo() {
+  offsetof(A, a);
+  offsetof(A, b);
+}
+
+// CHECK:  cir.func {{.*}} @foo()
+// CHECK:    {{.*}} = cir.const #cir.int<0> : !u64i
+// CHECK:    {{.*}} = cir.const #cir.int<4> : !u64i
+// CHECK:    cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/ofstream.cpp b/clang/test/CIR/Incubator/CodeGen/ofstream.cpp
new file mode 100644
index 0000000000000..582574a62182a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/ofstream.cpp
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -I%S/../Inputs -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -I%S/../Inputs -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+#include "std-cxx.h"
+
+namespace std {
+template <class CharT> class basic_ofstream {
+public:
+  basic_ofstream();
+  ~basic_ofstream();
+  explicit basic_ofstream(const char *);
+};
+
+using ofstream = basic_ofstream<char>;
+
+ofstream &operator<<(ofstream &, const string &);
+} // namespace std
+
+void foo(const char *path) {
+  std::ofstream fout1(path);
+  fout1 << path;
+  std::ofstream fout2(path);
+  fout2 << path;
+}
+
+// CIR: cir.func {{.*}} @_Z3fooPKc
+// CIR: %[[V1:.*]] = cir.alloca !rec_std3A3Abasic_ofstream3Cchar3E, !cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>, ["fout1", init] {alignment = 1 : i64}
+// CIR: %[[V2:.*]] = cir.alloca !rec_std3A3Abasic_ofstream3Cchar3E, !cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>, ["fout2", init] {alignment = 1 : i64}
+// CIR: cir.try synthetic cleanup {
+// CIR:   cir.call exception @_ZNSbIcEC1EPKcRKNS_9AllocatorE({{.*}}, {{.*}}, {{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>, !cir.ptr<!s8i>, !cir.ptr<!rec_std3A3Abasic_string3Cchar3E3A3AAllocator>) -> () cleanup {
+// CIR:     cir.call @_ZNSt14basic_ofstreamIcED1Ev(%[[V2]]) : (!cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>) -> ()
+// CIR:     cir.call @_ZNSt14basic_ofstreamIcED1Ev(%[[V1]]) : (!cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>) -> ()
+// CIR:     cir.yield
+// CIR:   }
+// CIR:   cir.yield
+// CIR: } catch [#cir.unwind {
+// CIR:   cir.resume
+// CIR: }]
+// CIR: cir.try synthetic cleanup {
+// CIR:   %[[V10:.*]] = cir.call exception @_ZStlsRSt14basic_ofstreamIcERKSbIcE(%[[V2]], {{.*}}) : (!cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>, !cir.ptr<!rec_std3A3Abasic_string3Cchar3E>) -> !cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E> cleanup {
+// CIR:     cir.call @_ZNSbIcED1Ev({{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>) -> ()
+// CIR:     cir.call @_ZNSt14basic_ofstreamIcED1Ev(%[[V2]]) : (!cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>) -> ()
+// CIR:     cir.call @_ZNSt14basic_ofstreamIcED1Ev(%[[V1]]) : (!cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>) -> ()
+// CIR:     cir.yield
+// CIR:   }
+// CIR:   cir.store{{.*}} %[[V10]], {{.*}} : !cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>, !cir.ptr<!cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>>
+// CIR:   cir.yield
+// CIR: } catch [#cir.unwind {
+// CIR:   cir.resume
+// CIR: }]
+// CIR: cir.call @_ZNSt14basic_ofstreamIcED1Ev(%[[V2]]) : (!cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>) -> ()
+// CIR: cir.call @_ZNSt14basic_ofstreamIcED1Ev(%[[V1]]) : (!cir.ptr<!rec_std3A3Abasic_ofstream3Cchar3E>) -> ()
+// CIR: cir.return
+
+// LLVM: @_Z3fooPKc(ptr {{.*}})
+// LLVM:   %[[V9:.*]] = alloca %"class.std::basic_ofstream<char>", i64 1, align 1
+// LLVM:   %[[V10:.*]] = alloca %"class.std::basic_ofstream<char>", i64 1, align 1
+// LLVM: {{.*}}
+// LLVM:   invoke void @_ZNSbIcEC1EPKcRKNS_9AllocatorE(ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+// LLVM:           to label %[[B56:.*]] unwind label %[[B57:.*]]
+// LLVM: [[B56]]
+// LLVM:   br label {{.*}}
+// LLVM: [[B57]]
+// LLVM:   call void @_ZNSt14basic_ofstreamIcED1Ev(ptr %[[V10]])
+// LLVM:   call void @_ZNSt14basic_ofstreamIcED1Ev(ptr %[[V9]])
+// LLVM:   br label %[[B61:.*]]
+// LLVM: [[B61]]
+// LLVM:   resume { ptr, i32 } {{.*}}
+// LLVM: {{.*}}
+// LLVM:   {{.*}} = invoke ptr @_ZStlsRSt14basic_ofstreamIcERKSbIcE(ptr %[[V10]], ptr {{.*}})
+// LLVM:           to label {{.*}} unwind label %[[B70:.*]]
+// LLVM: [[B70]]
+// LLVM:   call void @_ZNSbIcED1Ev(ptr {{.*}})
+// LLVM:   call void @_ZNSt14basic_ofstreamIcED1Ev(ptr %[[V10]])
+// LLVM:   call void @_ZNSt14basic_ofstreamIcED1Ev(ptr %[[V9]])
+// LLVM:   br label %[[B74:.*]]
+// LLVM: [[B74]]
+// LLVM:   resume { ptr, i32 } {{.*}}
+// LLVM: {{.*}}
+// LLVM:   call void @_ZNSbIcED1Ev(ptr {{.*}})
+// LLVM:   br label %[[B80:.*]]
+// LLVM: [[B80]]
+// LLVM:   call void @_ZNSt14basic_ofstreamIcED1Ev(ptr %[[V10]])
+// LLVM:   call void @_ZNSt14basic_ofstreamIcED1Ev(ptr %[[V9]])
+// LLVM:   ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/opaque.c b/clang/test/CIR/Incubator/CodeGen/opaque.c
new file mode 100644
index 0000000000000..f3b8aab76d5d5
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/opaque.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int foo(int x, short y) {
+  return x ?: y;
+}
+
+// CHECK: cir.func {{.*}} @foo
+// CHECK: %[[Load:.*]] = cir.load
+// CHECK: %[[Bool:.*]] = cir.cast int_to_bool %[[Load]] : !s32i -> !cir.bool loc(#loc8)
+// CHECK: = cir.ternary(%[[Bool]], true {
+// CHECK:   cir.yield %[[Load]]
diff --git a/clang/test/CIR/Incubator/CodeGen/opaque.cpp b/clang/test/CIR/Incubator/CodeGen/opaque.cpp
new file mode 100644
index 0000000000000..613a95e92e792
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/opaque.cpp
@@ -0,0 +1,152 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+void foo() {
+  int a;
+  int b = 1 ?: a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
+// CIR: cir.store{{.*}} %[[CONST_1]], %[[B_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: store i32 1, ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca i32, align 4
+// OGCG: store i32 1, ptr %[[B_ADDR]], align 4
+
+void foo2() {
+  float _Complex a;
+  float _Complex b;
+  float _Complex c = a ?: b;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[A_REAL_BOOL:.*]] = cir.cast float_to_bool %[[A_REAL]] : !cir.float -> !cir.bool
+// CIR: %[[A_IMAG_BOOL:.*]] = cir.cast float_to_bool %[[A_IMAG]] : !cir.float -> !cir.bool
+// CIR: %[[CONST_TRUE:.*]] = cir.const #true
+// CIR: %[[COND:.*]] = cir.select if %[[A_REAL_BOOL]] then %[[CONST_TRUE]] else %[[A_IMAG_BOOL]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+// CIR: %[[RESULT:.*]] = cir.ternary(%[[COND]], true {
+// CIR:   %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR:   cir.yield %[[TMP_A]] : !cir.complex<!cir.float>
+// CIR: }, false {
+// CIR:   %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR:   cir.yield %[[TMP_B]] : !cir.complex<!cir.float>
+// CIR: }) : (!cir.bool) -> !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// LLVM: %[[A_REAL_BOOL:.*]] = fcmp une float %[[A_REAL]], 0.000000e+00
+// LLVM: %[[A_IMAG_BOOL:.*]] = fcmp une float %[[A_IMAG]], 0.000000e+00
+// LLVM: %[[COND:.*]] = or i1 %[[A_REAL_BOOL]], %[[A_IMAG_BOOL]]
+// LLVM: br i1 %[[COND]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// LLVM: [[COND_TRUE]]:
+// LLVM:  %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM:  br label %[[COND_RESULT:.*]]
+// LLVM: [[COND_FALSE]]:
+// LLVM:  %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
+// LLVM:  br label %[[COND_RESULT]]
+// LLVM: [[COND_RESULT]]:
+// LLVM:  %[[RESULT:.*]] = phi { float, float } [ %[[TMP_B]], %[[COND_FALSE]] ], [ %[[TMP_A]], %[[COND_TRUE]] ]
+// LLVM:  store { float, float } %[[RESULT]], ptr %[[C_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[C_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[A_REAL_BOOL:.*]] = fcmp une float %[[A_REAL]], 0.000000e+00
+// OGCG: %[[A_IMAG_BOOL:.*]] = fcmp une float %[[A_IMAG]], 0.000000e+00
+// OGCG: %[[COND:.*]] = or i1 %[[A_REAL_BOOL]], %[[A_IMAG_BOOL]]
+// OGCG: br i1 %tobool2, label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// OGCG: [[COND_TRUE]]:
+// OGCG:  %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG:  %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG:  %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG:  %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG:  br label %[[COND_END:.*]]
+// OGCG: [[COND_FALSE]]:
+// OGCG:  %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG:  %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
+// OGCG:  %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG:  %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
+// OGCG:  br label %[[COND_END]]
+// OGCG: [[COND_END]]:
+// OGCG:  %[[RESULT_REAL:.*]] = phi float [ %[[A_REAL]], %[[COND_TRUE]] ], [ %[[B_REAL]], %[[COND_FALSE]] ]
+// OGCG:  %[[RESULT_IMAG:.*]] = phi float [ %[[A_IMAG]], %[[COND_TRUE]] ], [ %[[B_IMAG]], %[[COND_FALSE]] ]
+// OGCG:  %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0
+// OGCG:  %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1
+// OGCG:  store float %[[RESULT_REAL]], ptr %[[C_REAL_PTR]], align 4
+// OGCG:  store float %[[RESULT_IMAG]], ptr %[[C_IMAG_PTR]], align 4
+
+void foo3() {
+  int a;
+  int b;
+  int c = a ?: b;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["c", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CIR: %[[A_BOOL:.*]] = cir.cast int_to_bool %[[TMP_A]] : !s32i -> !cir.bool
+// CIR: %[[RESULT:.*]] = cir.ternary(%[[A_BOOL]], true {
+// CIR:   %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.yield %[[TMP_A]] : !s32i
+// CIR: }, false {
+// CIR:   %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.yield %[[TMP_B]] : !s32i
+// CIR: }) : (!cir.bool) -> !s32i
+// CIR: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[A_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[C_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// LLVM: %[[COND:.*]] = icmp ne i32 %[[TMP_A]], 0
+// LLVM: br i1 %[[COND]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// LLVM: [[COND_TRUE]]:
+// LLVM:  %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// LLVM:  br label %[[COND_RESULT:.*]]
+// LLVM: [[COND_FALSE]]:
+// LLVM:  %[[TMP_B:.*]] = load i32, ptr %[[B_ADDR]], align 4
+// LLVM:  br label %[[COND_RESULT]]
+// LLVM: [[COND_RESULT]]:
+// LLVM:  %[[RESULT:.*]] = phi i32 [ %[[TMP_B]], %[[COND_FALSE]] ], [ %[[TMP_A]], %[[COND_TRUE]] ]
+// LLVM:  store i32 %[[RESULT]], ptr %[[C_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[C_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG: %[[A_BOOL:.*]] = icmp ne i32 %[[TMP_A]], 0
+// OGCG: br i1 %[[A_BOOL]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+// OGCG: [[COND_TRUE]]:
+// OGCG:  %[[TMP_A:.*]] = load i32, ptr %[[A_ADDR]], align 4
+// OGCG:  br label %[[COND_END:.*]]
+// OGCG: [[COND_FALSE]]:
+// OGCG:  %[[TMP_B:.*]] = load i32, ptr %[[B_ADDR]], align 4
+// OGCG:  br label %[[COND_END]]
+// OGCG: [[COND_END]]:
+// OGCG:  %[[RESULT:.*]] = phi i32 [ %[[TMP_A]], %[[COND_TRUE]] ], [ %[[TMP_B]], %[[COND_FALSE]] ]
+// OGCG:  store i32 %[[RESULT]], ptr %[[C_ADDR]], align 4
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/operators.cpp b/clang/test/CIR/Incubator/CodeGen/operators.cpp
new file mode 100644
index 0000000000000..1d900188f1cee
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/operators.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+class __attribute__((__visibility__("default"))) exception_ptr
+{
+    void* __ptr_;
+public:
+    explicit operator bool() const noexcept {return __ptr_ != nullptr;}
+};
+
+// TODO: for now only check that this doesn't crash, in the future check operator
+// bool codegen.
+
+// CHECK: module
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/optimization-attr.cpp b/clang/test/CIR/Incubator/CodeGen/optimization-attr.cpp
new file mode 100644
index 0000000000000..6af62bff6b35b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/optimization-attr.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CHECK-O0 %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CHECK-O1 %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CHECK-O2 %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O3 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CHECK-O3 %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Os -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CHECK-Os %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Oz -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CHECK-Oz %s
+
+void foo() {}
+
+// CHECK-O0: module
+// CHECK-O0-NOT: cir.opt_info
+
+// CHECK-O1: module
+// CHECK-O1: cir.opt_info = #cir.opt_info<level = 1, size = 0>
+
+// CHECK-O2: module
+// CHECK-O2: cir.opt_info = #cir.opt_info<level = 2, size = 0>
+
+// CHECK-O3: module
+// CHECK-O3: cir.opt_info = #cir.opt_info<level = 3, size = 0>
+
+// CHECK-Os: module
+// CHECK-Os: cir.opt_info = #cir.opt_info<level = 2, size = 1>
+
+// CHECK-Oz: module
+// CHECK-Oz: cir.opt_info = #cir.opt_info<level = 2, size = 2>
diff --git a/clang/test/CIR/Incubator/CodeGen/optnone.cpp b/clang/test/CIR/Incubator/CodeGen/optnone.cpp
new file mode 100644
index 0000000000000..a524aa0461f81
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/optnone.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR-O0
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM-O0
+
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir %s -check-prefix=CIR-O2
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t2.ll
+// RUN: FileCheck --input-file=%t2.ll %s -check-prefix=LLVM-O2
+
+int s0(int a, int b) {
+  int x = a + b;
+  if (x > 0)
+    x = 0;
+  else
+    x = 1;
+  return x;
+}
+
+// CIR-O0: #fn_attr = #cir<extra({nothrow = #cir.nothrow})>
+// CIR-O0:   cir.func {{.*}} optnone {{.*}} @_Z2s0ii(%arg0:{{.*}}, %arg1:{{.*}} -> {{.*}} extra(#fn_attr)
+
+// CIR-O2-NOT: cir.func optnone
+
+// LLVM-O0: define dso_local i32 @_Z2s0ii(i32 %0, i32 %1) #[[#ATTR:]]
+// LLVM-O0: attributes #[[#ATTR]] = { noinline nounwind optnone }
+// LLVM-O2-NOT: attributes #[[#]] = { noinline nounwind optnone }
diff --git a/clang/test/CIR/Incubator/CodeGen/pack-indexing.cpp b/clang/test/CIR/Incubator/CodeGen/pack-indexing.cpp
new file mode 100644
index 0000000000000..4196297950f25
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pack-indexing.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++2c -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++2c -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+int pack_indexing(auto... p) { return p...[0]; }
+
+// CIR: %[[P_0:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["p", init]
+// CIR: %[[P_1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["p", init]
+// CIR: %[[P_2:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["p", init]
+// CIR: %[[RET_VAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR: %[[RESULT:.*]] = cir.load{{.*}} %[[P_0]] : !cir.ptr<!s32i>, !s32i
+// CIR: cir.store{{.*}} %[[RESULT]], %[[RET_VAL]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[TMP:.*]] = cir.load{{.*}} %[[RET_VAL]] : !cir.ptr<!s32i>, !s32i
+// CIR: cir.return %[[TMP]] : !s32i
+
+// LLVM: %[[P_0:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[P_1:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[P_2:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[RET_VAL:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[RESULT:.*]] = load i32, ptr %[[P_0]], align 4
+// LLVM: store i32 %[[RESULT]], ptr %[[RET_VAL]], align 4
+// LLVM: %[[TMP:.*]] = load i32, ptr %[[RET_VAL]], align 4
+// LLVM: ret i32 %[[TMP]]
+
+int foo() { return pack_indexing(1, 2, 3); }
+
+// CIR: %[[RET_VAL:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR: %[[RESULT:.*]] = cir.call @_Z13pack_indexingIJiiiEEiDpT_({{.*}}, {{.*}}, {{.*}}) : (!s32i, !s32i, !s32i) -> !s32i
+// CIR: cir.store{{.*}} %[[RESULT]], %[[RET_VAL]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[TMP:.*]] = cir.load{{.*}} %[[RET_VAL]] : !cir.ptr<!s32i>, !s32i
+// CIR: cir.return %[[TMP]] : !s32i
+
+// LLVM: %[[RET_VAL:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[RESULT:.*]] = call i32 @_Z13pack_indexingIJiiiEEiDpT_(i32 1, i32 2, i32 3)
+// LLVM: store i32 %[[RESULT]], ptr %[[RET_VAL]], align 4
+// LLVM: %[[TMP:.*]] = load i32, ptr %[[RET_VAL]], align 4
+// LLVM: ret i32 %[[TMP]]
diff --git a/clang/test/CIR/Incubator/CodeGen/packed-structs.c b/clang/test/CIR/Incubator/CodeGen/packed-structs.c
new file mode 100644
index 0000000000000..2454065c97c03
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/packed-structs.c
@@ -0,0 +1,133 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+#pragma pack(1)
+
+typedef struct {
+    int  a0;
+    char a1;
+} A;
+
+typedef struct {
+    int  b0;
+    char b1;
+    A a[6];
+} B;
+
+typedef struct {
+    int  c0;
+    char c1;
+} __attribute__((aligned(2))) C;
+
+
+// CIR: !rec_A = !cir.record<struct "A" packed {!s32i, !s8i}>
+// CIR: !rec_C = !cir.record<struct "C" packed padded {!s32i, !s8i, !u8i}>
+// CIR: !rec_D = !cir.record<struct "D" packed padded {!s8i, !u8i, !s32i}
+// CIR: !rec_F = !cir.record<struct "F" packed {!s64i, !s8i}
+// CIR: !rec_E = !cir.record<struct "E" packed {!rec_D
+// CIR: !rec_G = !cir.record<struct "G" {!rec_F
+// CIR: !rec_H = !cir.record<struct "H" {!s32i, !rec_anon2E0
+// CIR: !rec_B = !cir.record<struct "B" packed {!s32i, !s8i, !cir.array<!rec_A x 6>}>
+// CIR: !rec_I = !cir.record<struct "I" packed {!s8i, !rec_H
+// CIR: !rec_J = !cir.record<struct "J" packed {!s8i, !s8i, !s8i, !s8i, !rec_I
+
+// LLVM: %struct.A = type <{ i32, i8 }>
+// LLVM: %struct.B = type <{ i32, i8, [6 x %struct.A] }>
+// LLVM: %struct.C = type <{ i32, i8, i8 }>
+// LLVM: %struct.E = type <{ %struct.D, i32 }>
+// LLVM: %struct.D = type <{ i8, i8, i32 }>
+// LLVM: %struct.G = type { %struct.F, i8 }
+// LLVM: %struct.F = type <{ i64, i8 }>
+// LLVM: %struct.J = type <{ i8, i8, i8, i8, %struct.I, i32 }>
+// LLVM: %struct.I = type <{ i8, %struct.H }>
+// LLVM: %struct.H = type { i32, %union.anon.{{.*}} }
+
+// CIR: cir.func {{.*@foo()}}
+// CIR:  {{.*}} = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a"] {alignment = 1 : i64}
+// CIR:  {{.*}} = cir.alloca !rec_B, !cir.ptr<!rec_B>, ["b"] {alignment = 1 : i64}
+// CIR:  {{.*}} = cir.alloca !rec_C, !cir.ptr<!rec_C>, ["c"] {alignment = 2 : i64}
+
+// LLVM: {{.*}} = alloca %struct.A, i64 1, align 1
+// LLVM: {{.*}} = alloca %struct.B, i64 1, align 1
+// LLVM: {{.*}} = alloca %struct.C, i64 1, align 2
+void foo() {
+    A a;
+    B b;
+    C c;
+}
+
+#pragma pack(2)
+
+typedef struct {
+    char b;
+    int c;
+} D;
+
+typedef struct {
+    D e;
+    int f;
+} E;
+
+// CIR: cir.func {{.*@f1()}}
+// CIR:  %[[E:.*]] = cir.alloca !rec_E, !cir.ptr<!rec_E>, ["a", init] {alignment = 2 : i64}
+// CIR:  %[[ZERO:.*]] = cir.const #cir.zero : !rec_E
+// CIR:  cir.store{{.*}} %[[ZERO]], %[[E]] : !rec_E, !cir.ptr<!rec_E>
+
+// LLVM: {{.*}} = alloca %struct.E, i64 1, align 2
+void f1() {
+    E a = {};
+}
+
+#pragma pack(1)
+
+typedef struct {
+    long b;
+    char c;
+} F;
+
+typedef struct {
+    F e;
+    char f;
+} G;
+
+// CIR: cir.func {{.*@f2()}}
+// CIR:  {{.*}} = cir.alloca !rec_G, !cir.ptr<!rec_G>, ["a", init] {alignment = 1 : i64}
+
+// LLVM: {{.*}} = alloca %struct.G, i64 1, align 1
+void f2() {
+    G a = {};
+}
+
+#pragma pack(1)
+
+typedef struct {
+    int d0;
+    union {
+        char null;
+        int val;
+    } value;
+} H;
+
+typedef struct {
+    char t;
+    H d;
+} I;
+
+typedef struct {
+    char a0;
+    char a1;
+    char a2;
+    char a3;
+    I c;
+    int a;
+} J;
+
+// CIR: cir.func {{.*@f3()}}
+// CIR:  {{.*}} = cir.alloca !rec_J, !cir.ptr<!rec_J>, ["a", init] {alignment = 1 : i64}
+
+// LLVM: {{.*}} = alloca %struct.J, i64 1, align 1
+void f3() {
+    J a = {0};
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/paren-list-init.cpp b/clang/test/CIR/Incubator/CodeGen/paren-list-init.cpp
new file mode 100644
index 0000000000000..aa5bc1ca2f24e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/paren-list-init.cpp
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -std=c++20 -triple aarch64-none-linux-android21 -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -fexceptions -fcxx-exceptions -triple aarch64-none-linux-android21 -Wno-unused-value -fclangir -emit-cir %s -o %t.eh.cir
+// RUN: FileCheck --check-prefix=CIR_EH --input-file=%t.eh.cir %s
+
+struct Vec {
+  Vec();
+  Vec(Vec&&);
+  ~Vec();
+};
+
+struct S1 {
+  Vec v;
+};
+
+// CIR-DAG: ![[VecType:.*]] = !cir.record<struct "Vec" padded {!u8i}>
+// CIR-DAG: ![[S1:.*]] = !cir.record<struct "S1" {![[VecType]]}>
+
+// CIR_EH-DAG: ![[VecType:.*]] = !cir.record<struct "Vec" padded {!u8i}>
+// CIR_EH-DAG: ![[S1:.*]] = !cir.record<struct "S1" {![[VecType]]}>
+
+template <int I>
+void make1() {
+  Vec v;
+  S1((Vec&&) v);
+// CIR: cir.func {{.*}} @_Z5make1ILi0EEvv()
+// CIR:   %[[VEC:.*]] = cir.alloca ![[VecType]], !cir.ptr<![[VecType]]>
+// CIR:   cir.call @_ZN3VecC1Ev(%[[VEC]]) : (!cir.ptr<![[VecType]]>)
+// CIR:   cir.scope {
+// CIR:     %[[AGG_TMP:.*]] = cir.alloca ![[S1]], !cir.ptr<![[S1]]>, ["agg.tmp.ensured"]
+// CIR:     %[[FIELD:.*]] = cir.get_member %[[AGG_TMP]][0] {name = "v"} : !cir.ptr<![[S1]]> -> !cir.ptr<![[VecType]]>
+// CIR:     cir.call @_ZN3VecC1EOS_(%[[FIELD]], %[[VEC]]) : (!cir.ptr<![[VecType]]>, !cir.ptr<![[VecType]]>) -> ()
+// CIR:     cir.call @_ZN2S1D1Ev(%[[AGG_TMP]]) : (!cir.ptr<![[S1]]>) -> ()
+// CIR:   }
+// CIR:   cir.call @_ZN3VecD1Ev(%[[VEC]]) : (!cir.ptr<![[VecType]]>) -> ()
+// CIR:   cir.return
+
+// CIR_EH: cir.func {{.*}} @_Z5make1ILi0EEvv()
+// CIR_EH:  %[[VEC:.*]] = cir.alloca ![[VecType]], !cir.ptr<![[VecType]]>, ["v", init]
+
+// Construct v
+// CIR_EH:  cir.call @_ZN3VecC1Ev(%[[VEC]]) : (!cir.ptr<![[VecType]]>) -> ()
+// CIR_EH:  cir.scope {
+// CIR_EH:    %1 = cir.alloca ![[S1]], !cir.ptr<![[S1]]>, ["agg.tmp.ensured"]
+// CIR_EH:    %2 = cir.get_member %1[0] {name = "v"} : !cir.ptr<![[S1]]> -> !cir.ptr<![[VecType]]>
+// CIR_EH:    cir.try synthetic cleanup {
+
+// Call v move ctor
+// CIR_EH:      cir.call exception @_ZN3VecC1EOS_{{.*}} cleanup {
+
+// Destroy v after v move ctor throws
+// CIR_EH:        cir.call @_ZN3VecD1Ev(%[[VEC]])
+// CIR_EH:        cir.yield
+// CIR_EH:      }
+// CIR_EH:      cir.yield
+// CIR_EH:    } catch [#cir.unwind {
+// CIR_EH:      cir.resume
+// CIR_EH:    }]
+// CIR_EH:    cir.call @_ZN2S1D1Ev(%1) : (!cir.ptr<![[S1]]>) -> ()
+// CIR_EH:  }
+
+// Destroy v after successful cir.try
+// CIR_EH:  cir.call @_ZN3VecD1Ev(%[[VEC]]) : (!cir.ptr<![[VecType]]>) -> ()
+// CIR_EH:  cir.return
+}
+
+void foo() {
+  make1<0>();
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/pass-object-size.c b/clang/test/CIR/Incubator/CodeGen/pass-object-size.c
new file mode 100644
index 0000000000000..f10d0981565a6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pass-object-size.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+void b(void *__attribute__((pass_object_size(0))));
+void e(void *__attribute__((pass_object_size(2))));
+void c() {
+  int a;
+  int d[a];
+  b(d);
+  e(d);
+}
+
+// CIR: cir.func {{.*}} @c()
+// CIR: [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, %{{[0-9]+}} : !u64i, ["vla"] {alignment = 16 : i64}
+// CIR: [[TMP1:%.*]] = cir.cast bitcast [[TMP0]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CIR-NEXT: [[TMP2:%.*]] = cir.objsize max [[TMP1]] : !cir.ptr<!void> -> !u64i
+// CIR-NEXT: cir.call @b([[TMP1]], [[TMP2]]) : (!cir.ptr<!void>, !u64i) -> ()
+// CIR: [[TMP3:%.*]] = cir.cast bitcast [[TMP0]] : !cir.ptr<!s32i> -> !cir.ptr<!void>
+// CIR: [[TMP4:%.*]] = cir.objsize min [[TMP3]] : !cir.ptr<!void> -> !u64i
+// CIR-NEXT: cir.call @e([[TMP3]], [[TMP4]]) : (!cir.ptr<!void>, !u64i) -> ()
+
+// LLVM: define dso_local void @c()
+// LLVM: [[TMP0:%.*]] = alloca i32, i64 %{{[0-9]+}}
+// LLVM: [[TMP1:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[TMP0]], i1 false, i1 true, i1 false)
+// LLVM-NEXT: call void @b(ptr [[TMP0]], i64 [[TMP1]])
+// LLVM: [[TMP2:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[TMP0]], i1 true, i1 true, i1 false)
+// LLVM-NEXT: call void @e(ptr [[TMP0]], i64 [[TMP2]])
diff --git a/clang/test/CIR/Incubator/CodeGen/pointer-arith-ext.c b/clang/test/CIR/Incubator/CodeGen/pointer-arith-ext.c
new file mode 100644
index 0000000000000..e3e604d0b5b94
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pointer-arith-ext.c
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-int-conversions -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-int-conversions -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// GNU extensions
+typedef void (*FP)(void);
+void *f2(void *a, int b) { return a + b; }
+// CIR-LABEL: f2
+// CIR: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR: %[[STRIDE:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: cir.ptr_stride %[[PTR]], %[[STRIDE]] : (!cir.ptr<!void>, !s32i) -> !cir.ptr<!void>
+
+// LLVM-LABEL: f2
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: getelementptr i8, ptr %[[PTR]], i64 %[[STRIDE]]
+
+// These test the same paths above, just make sure it does not crash.
+void *f2_0(void *a, int b) { return &a[b]; }
+void *f2_1(void *a, int b) { return (a += b); }
+void *f3(int a, void *b) { return a + b; }
+
+void *f3_1(int a, void *b) { return (a += b); }
+// CIR-LABEL: @f3_1
+// CIR: %[[NEW_PTR:.*]] = cir.ptr_stride
+// CIR: cir.cast ptr_to_int %[[NEW_PTR]] : !cir.ptr<!void> -> !s32i
+
+// LLVM-LABEL: @f3_1
+// LLVM: %[[NEW_PTR:.*]] = getelementptr
+// LLVM: ptrtoint ptr %[[NEW_PTR]] to i32
+
+void *f4(void *a, int b) { return a - b; }
+// CIR-LABEL: f4
+// CIR: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR: %[[STRIDE:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: %[[SUB:.*]] = cir.unary(minus, %[[STRIDE]]) : !s32i, !s32i
+// CIR: cir.ptr_stride inbounds %[[PTR]], %[[SUB]] : (!cir.ptr<!void>, !s32i) -> !cir.ptr<!void>
+
+// LLVM-LABEL: f4
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: %[[SUB:.*]] = sub i64 0, %[[STRIDE]]
+// LLVM: getelementptr inbounds i8, ptr %[[PTR]], i64 %[[SUB]]
+
+// Similar to f4, just make sure it does not crash.
+void *f4_1(void *a, int b) { return (a -= b); }
+
+FP f5(FP a, int b) { return a + b; }
+// CIR-LABEL: f5
+// CIR: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!cir.func<()>>>, !cir.ptr<!cir.func<()>>
+// CIR: %[[STRIDE:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: cir.ptr_stride inbounds %[[PTR]], %[[STRIDE]] : (!cir.ptr<!cir.func<()>>, !s32i) -> !cir.ptr<!cir.func<()>>
+
+// LLVM-LABEL: f5
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: getelementptr inbounds i8, ptr %[[PTR]], i64 %[[STRIDE]]
+
+// These test the same paths above, just make sure it does not crash.
+FP f5_1(FP a, int b) { return (a += b); }
+FP f6(int a, FP b) { return a + b; }
+FP f6_1(int a, FP b) { return (a += b); }
+
+FP f7(FP a, int b) { return a - b; }
+// CIR-LABEL: f7
+// CIR: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!cir.func<()>>>, !cir.ptr<!cir.func<()>>
+// CIR: %[[STRIDE:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: %[[SUB:.*]] = cir.unary(minus, %[[STRIDE]]) : !s32i, !s32i
+// CIR: cir.ptr_stride inbounds %[[PTR]], %[[SUB]] : (!cir.ptr<!cir.func<()>>, !s32i) -> !cir.ptr<!cir.func<()>>
+
+// LLVM-LABEL: f7
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: %[[SUB:.*]] = sub i64 0, %[[STRIDE]]
+// LLVM: getelementptr inbounds i8, ptr %[[PTR]], i64 %[[SUB]]
+
+// Similar to f7, just make sure it does not crash.
+FP f7_1(FP a, int b) { return (a -= b); }
+
+void *id(void *a) { return a; }
+void f8(void *a, int b) { return *(id(a + b)); }
+// CIR-LABEL: f8
+// CIR: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR: %[[STRIDE:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: cir.ptr_stride inbounds %[[PTR]], %[[STRIDE]] : (!cir.ptr<!void>, !s32i) -> !cir.ptr<!void>
+// CIR: cir.return
+
+// LLVM-LABEL: f8
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: getelementptr inbounds i8, ptr %[[PTR]], i64 %[[STRIDE]]
+// LLVM: ret void
+
+void f8_1(void *a, int b) { return a[b]; }
+// CIR-LABEL: f8_1
+// CIR: %[[PTR:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR: %[[STRIDE:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: cir.ptr_stride %[[PTR]], %[[STRIDE]] : (!cir.ptr<!void>, !s32i) -> !cir.ptr<!void>
+// CIR: cir.return
+
+// LLVM-LABEL: f8_1
+// LLVM: %[[PTR:.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM: %[[TOEXT:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: %[[STRIDE:.*]] = sext i32 %[[TOEXT]] to i64
+// LLVM: getelementptr i8, ptr %[[PTR]], i64 %[[STRIDE]]
+// LLVM: ret void
+
+unsigned char *p(unsigned int x) {
+  unsigned char *p;
+  p += 16-x;
+  return p;
+}
+
+// CIR-LABEL: @p
+// CIR: %[[SUB:.*]] = cir.binop(sub
+// CIR: cir.ptr_stride inbounds|nuw {{.*}}, %[[SUB]] : (!cir.ptr<!u8i>, !u32i) -> !cir.ptr<!u8i>
+
+// LLVM-LABEL: @p
+// LLVM: getelementptr inbounds nuw i8, ptr {{.*}}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/pointer-to-data-member-cast.cpp b/clang/test/CIR/Incubator/CodeGen/pointer-to-data-member-cast.cpp
new file mode 100644
index 0000000000000..3c178159cb5d0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pointer-to-data-member-cast.cpp
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+struct Base1 {
+  int base1_data;
+};
+
+struct Base2 {
+  int base2_data;
+};
+
+struct Derived : Base1, Base2 {
+  int derived_data;
+};
+
+// CIR-LABEL:  @_Z15base_to_derivedM5Base2i
+// LLVM-LABEL: @_Z15base_to_derivedM5Base2i
+auto base_to_derived(int Base2::*ptr) -> int Derived::* {
+  return ptr;
+  // CIR: %{{.+}} = cir.derived_data_member %{{.+}} : !cir.data_member<!s32i in !rec_Base2> [4] -> !cir.data_member<!s32i in !rec_Derived>
+
+  //      LLVM: %[[#src:]] = load i64, ptr %{{.+}}
+  // LLVM-NEXT: %[[#is_null:]] = icmp eq i64 %[[#src]], -1
+  // LLVM-NEXT: %[[#adjusted:]] = add i64 %[[#src]], 4
+  // LLVM-NEXT: %{{.+}} = select i1 %[[#is_null]], i64 -1, i64 %[[#adjusted]]
+}
+
+// CIR-LABEL:  @_Z15derived_to_baseM7Derivedi
+// LLVM-LABEL: @_Z15derived_to_baseM7Derivedi
+auto derived_to_base(int Derived::*ptr) -> int Base2::* {
+  return static_cast<int Base2::*>(ptr);
+  // CIR: %{{.+}} = cir.base_data_member %{{.+}} : !cir.data_member<!s32i in !rec_Derived> [4] -> !cir.data_member<!s32i in !rec_Base2>
+
+  //      LLVM: %[[#src:]] = load i64, ptr %{{.+}}
+  // LLVM-NEXT: %[[#is_null:]] = icmp eq i64 %[[#src]], -1
+  // LLVM-NEXT: %[[#adjusted:]] = sub i64 %[[#src]], 4
+  // LLVM-NEXT: %{{.+}} = select i1 %[[#is_null]], i64 -1, i64 %[[#adjusted]]
+}
+
+// CIR-LABEL:  @_Z27base_to_derived_zero_offsetM5Base1i
+// LLVM-LABEL: @_Z27base_to_derived_zero_offsetM5Base1i
+auto base_to_derived_zero_offset(int Base1::*ptr) -> int Derived::* {
+  return ptr;
+  // CIR: %{{.+}} = cir.derived_data_member %{{.+}} : !cir.data_member<!s32i in !rec_Base1> [0] -> !cir.data_member<!s32i in !rec_Derived>
+
+  // No LLVM instructions emitted for performing a zero-offset cast.
+  // LLVM-NEXT: %[[#src_slot:]] = alloca i64, i64 1
+  // LLVM-NEXT: %[[#ret_slot:]] = alloca i64, i64 1
+  // LLVM-NEXT: store i64 %{{.+}}, ptr %[[#src_slot]]
+  // LLVM-NEXT: %[[#temp:]] = load i64, ptr %[[#src_slot]]
+  // LLVM-NEXT: store i64 %[[#temp]], ptr %[[#ret_slot]]
+  // LLVM-NEXT: %[[#ret:]] = load i64, ptr %[[#ret_slot]]
+  // LLVM-NEXT: ret i64 %[[#ret]]
+}
+
+// CIR-LABEL:  @_Z27derived_to_base_zero_offsetM7Derivedi
+// LLVM-LABEL: @_Z27derived_to_base_zero_offsetM7Derivedi
+auto derived_to_base_zero_offset(int Derived::*ptr) -> int Base1::* {
+  return static_cast<int Base1::*>(ptr);
+  // CIR: %{{.+}} = cir.base_data_member %{{.+}} : !cir.data_member<!s32i in !rec_Derived> [0] -> !cir.data_member<!s32i in !rec_Base1>
+
+  // No LLVM instructions emitted for performing a zero-offset cast.
+  // LLVM-NEXT: %[[#src_slot:]] = alloca i64, i64 1
+  // LLVM-NEXT: %[[#ret_slot:]] = alloca i64, i64 1
+  // LLVM-NEXT: store i64 %{{.+}}, ptr %[[#src_slot]]
+  // LLVM-NEXT: %[[#temp:]] = load i64, ptr %[[#src_slot]]
+  // LLVM-NEXT: store i64 %[[#temp]], ptr %[[#ret_slot]]
+  // LLVM-NEXT: %[[#ret:]] = load i64, ptr %[[#ret_slot]]
+  // LLVM-NEXT: ret i64 %[[#ret]]
+}
+
+struct Foo {
+  int a;
+};
+
+struct Bar {
+  int a;
+};
+
+bool to_bool(int Foo::*x) {
+  return x;
+}
+
+// CIR-LABEL: @_Z7to_boolM3Fooi
+//      CIR:   %[[#x:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.data_member<!s32i in !rec_Foo>>, !cir.data_member<!s32i in !rec_Foo>
+// CIR-NEXT:   %{{.+}} = cir.cast member_ptr_to_bool %[[#x]] : !cir.data_member<!s32i in !rec_Foo> -> !cir.bool
+//      CIR: }
+
+auto bitcast(int Foo::*x) {
+  return reinterpret_cast<int Bar::*>(x);
+}
+
+// CIR-LABEL: @_Z7bitcastM3Fooi
+//      CIR:   %[[#x:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.data_member<!s32i in !rec_Foo>>, !cir.data_member<!s32i in !rec_Foo>
+// CIR-NEXT:   %{{.+}} = cir.cast bitcast %[[#x]] : !cir.data_member<!s32i in !rec_Foo> -> !cir.data_member<!s32i in !rec_Bar>
+//      CIR: }
diff --git a/clang/test/CIR/Incubator/CodeGen/pointer-to-data-member-cmp.cpp b/clang/test/CIR/Incubator/CodeGen/pointer-to-data-member-cmp.cpp
new file mode 100644
index 0000000000000..f3413a01a477c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pointer-to-data-member-cmp.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+struct Foo {
+  int a;
+};
+
+struct Bar {
+  int a;
+};
+
+bool eq(int Foo::*x, int Foo::*y) {
+  return x == y;
+}
+
+// CIR-LABEL: @_Z2eqM3FooiS0_
+//      CIR:   %[[#x:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.data_member<!s32i in !rec_Foo>>, !cir.data_member<!s32i in !rec_Foo>
+// CIR-NEXT:   %[[#y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.data_member<!s32i in !rec_Foo>>, !cir.data_member<!s32i in !rec_Foo>
+// CIR-NEXT:   %{{.+}} = cir.cmp(eq, %[[#x]], %[[#y]]) : !cir.data_member<!s32i in !rec_Foo>, !cir.bool
+//      CIR: }
+
+// LLVM-LABEL: @_Z2eqM3FooiS0_
+//      LLVM:   %[[#x:]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:   %[[#y:]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:   %{{.+}} = icmp eq i64 %[[#x]], %[[#y]]
+//      LLVM: }
+
+bool ne(int Foo::*x, int Foo::*y) {
+  return x != y;
+}
+
+// CIR-LABEL: @_Z2neM3FooiS0_
+//      CIR:   %[[#x:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.data_member<!s32i in !rec_Foo>>, !cir.data_member<!s32i in !rec_Foo>
+// CIR-NEXT:   %[[#y:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.data_member<!s32i in !rec_Foo>>, !cir.data_member<!s32i in !rec_Foo>
+// CIR-NEXT:   %{{.+}} = cir.cmp(ne, %[[#x]], %[[#y]]) : !cir.data_member<!s32i in !rec_Foo>, !cir.bool
+//      CIR: }
+
+// LLVM-LABEL: @_Z2neM3FooiS0_
+//      LLVM:   %[[#x:]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:   %[[#y:]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:   %{{.+}} = icmp ne i64 %[[#x]], %[[#y]]
+//      LLVM: }
diff --git a/clang/test/CIR/Incubator/CodeGen/pointer-to-data-member.cpp b/clang/test/CIR/Incubator/CodeGen/pointer-to-data-member.cpp
new file mode 100644
index 0000000000000..88325722092c9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pointer-to-data-member.cpp
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Point {
+  int x;
+  int y;
+  int z;
+};
+// CHECK-DAG: !rec_Point = !cir.record<struct "Point" {!s32i, !s32i, !s32i}
+
+struct Incomplete;
+// CHECK-DAG: !rec_Incomplete = !cir.record<struct "Incomplete" incomplete>
+
+int Point::*pt_member = &Point::x;
+// CHECK: cir.global external @pt_member = #cir.data_member<0> : !cir.data_member<!s32i in !rec_Point>
+
+auto test1() -> int Point::* {
+  return &Point::y;
+}
+// CHECK: cir.func {{.*}} @_Z5test1v() -> !cir.data_member<!s32i in !rec_Point>
+// CHECK:   %{{.+}} = cir.const #cir.data_member<1> : !cir.data_member<!s32i in !rec_Point>
+// CHECK: }
+
+int test2(const Point &pt, int Point::*member) {
+  return pt.*member;
+}
+// CHECK: cir.func {{.*}} @_Z5test2RK5PointMS_i
+// CHECK:   %{{.+}} = cir.get_runtime_member %{{.+}}[%{{.+}} : !cir.data_member<!s32i in !rec_Point>] : !cir.ptr<!rec_Point> -> !cir.ptr<!s32i>
+// CHECK: }
+
+int test3(const Point *pt, int Point::*member) {
+  return pt->*member;
+}
+// CHECK: cir.func {{.*}} @_Z5test3PK5PointMS_i
+// CHECK:   %{{.+}} = cir.get_runtime_member %{{.+}}[%{{.+}} : !cir.data_member<!s32i in !rec_Point>] : !cir.ptr<!rec_Point> -> !cir.ptr<!s32i>
+// CHECK: }
+
+auto test4(int Incomplete::*member) -> int Incomplete::* {
+  return member;
+}
+// CHECK: cir.func {{.*}} @_Z5test4M10Incompletei(%arg0: !cir.data_member<!s32i in !rec_Incomplete> loc({{.+}})) -> !cir.data_member<!s32i in !rec_Incomplete>
+
+int test5(Incomplete *ic, int Incomplete::*member) {
+  return ic->*member;
+}
+// CHECK: cir.func {{.*}} @_Z5test5P10IncompleteMS_i
+// CHECK: %{{.+}} = cir.get_runtime_member %{{.+}}[%{{.+}} : !cir.data_member<!s32i in !rec_Incomplete>] : !cir.ptr<!rec_Incomplete> -> !cir.ptr<!s32i>
+// CHECK: }
+
+auto test_null() -> int Point::* {
+  return nullptr;
+}
+// CHECK: cir.func {{.*}} @_Z9test_nullv
+// CHECK:   %{{.+}} = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !rec_Point>
+// CHECK: }
+
+auto test_null_incomplete() -> int Incomplete::* {
+  return nullptr;
+}
+// CHECK: cir.func {{.*}} @_Z20test_null_incompletev
+// CHECK:   %{{.+}} = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !rec_Incomplete>
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/pointer-to-member-func.cpp b/clang/test/CIR/Incubator/CodeGen/pointer-to-member-func.cpp
new file mode 100644
index 0000000000000..02a773e738d08
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pointer-to-member-func.cpp
@@ -0,0 +1,242 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+struct Foo {
+  void m1(int);
+  virtual void m2(int);
+  virtual void m3(int);
+};
+
+auto make_non_virtual() -> void (Foo::*)(int) {
+  return &Foo::m1;
+}
+
+// CHECK-LABEL: cir.func {{.*}} @_Z16make_non_virtualv() -> !cir.method<!cir.func<(!s32i)> in !rec_Foo>
+//       CHECK:   %{{.+}} = cir.const #cir.method<@_ZN3Foo2m1Ei> : !cir.method<!cir.func<(!s32i)> in !rec_Foo>
+//       CHECK: }
+
+// LLVM-LABEL: @_Z16make_non_virtualv
+//       LLVM:   store { i64, i64 } { i64 ptrtoint (ptr @_ZN3Foo2m1Ei to i64), i64 0 }, ptr %{{.+}}
+//       LLVM: }
+
+auto make_virtual() -> void (Foo::*)(int) {
+  return &Foo::m3;
+}
+
+// CHECK-LABEL: cir.func {{.*}} @_Z12make_virtualv() -> !cir.method<!cir.func<(!s32i)> in !rec_Foo>
+//       CHECK:   %{{.+}} = cir.const #cir.method<vtable_offset = 8> : !cir.method<!cir.func<(!s32i)> in !rec_Foo>
+//       CHECK: }
+
+// LLVM-LABEL: @_Z12make_virtualv
+//       LLVM:   store { i64, i64 } { i64 9, i64 0 }, ptr %{{.+}}
+//       LLVM: }
+
+auto make_null() -> void (Foo::*)(int) {
+  return nullptr;
+}
+
+// CHECK-LABEL: cir.func {{.*}} @_Z9make_nullv() -> !cir.method<!cir.func<(!s32i)> in !rec_Foo>
+//       CHECK:   %{{.+}} = cir.const #cir.method<null> : !cir.method<!cir.func<(!s32i)> in !rec_Foo>
+//       CHECK: }
+
+// LLVM-LABEL: @_Z9make_nullv
+//       LLVM:   store { i64, i64 } zeroinitializer, ptr %{{.+}}
+//       LLVM: }
+
+void call(Foo *obj, void (Foo::*func)(int), int arg) {
+  (obj->*func)(arg);
+}
+
+// CHECK-LABEL: cir.func {{.*}} @_Z4callP3FooMS_FviEi
+//       CHECK:   %[[CALLEE:.+]], %[[THIS:.+]] = cir.get_method %{{.+}}, %{{.+}} : (!cir.method<!cir.func<(!s32i)> in !rec_Foo>, !cir.ptr<!rec_Foo>) -> (!cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>, !cir.ptr<!void>)
+//  CHECK-NEXT:   %[[#ARG:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!s32i>, !s32i
+//  CHECK-NEXT:   cir.call %[[CALLEE]](%[[THIS]], %[[#ARG]]) : (!cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>, !cir.ptr<!void>, !s32i) -> ()
+//       CHECK: }
+
+// LLVM-LABEL: @_Z4callP3FooMS_FviEi
+//      LLVM:    %[[#obj:]] = load ptr, ptr %{{.+}}
+// LLVM-NEXT:    %[[#memfn_ptr:]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM-NEXT:    %[[#this_adj:]] = extractvalue { i64, i64 } %[[#memfn_ptr]], 1
+// LLVM-NEXT:    %[[#adjusted_this:]] = getelementptr i8, ptr %[[#obj]], i64 %[[#this_adj]]
+// LLVM-NEXT:    %[[#ptr_field:]] = extractvalue { i64, i64 } %[[#memfn_ptr]], 0
+// LLVM-NEXT:    %[[#virt_bit:]] = and i64 %[[#ptr_field]], 1
+// LLVM-NEXT:    %[[#is_virt:]] = icmp eq i64 %[[#virt_bit]], 1
+// LLVM-NEXT:    br i1 %[[#is_virt]], label %[[#block_virt:]], label %[[#block_non_virt:]]
+//      LLVM:  [[#block_virt]]:
+// LLVM-NEXT:    %[[#vtable_ptr:]] = load ptr, ptr %[[#obj]]
+// LLVM-NEXT:    %[[#vtable_offset:]] = sub i64 %[[#ptr_field]], 1
+// LLVM-NEXT:    %[[#vfp_ptr:]] = getelementptr i8, ptr %[[#vtable_ptr]], i64 %[[#vtable_offset]]
+// LLVM-NEXT:    %[[#vfp:]] = load ptr, ptr %[[#vfp_ptr]]
+// LLVM-NEXT:    br label %[[#block_continue:]]
+//      LLVM:  [[#block_non_virt]]:
+// LLVM-NEXT:    %[[#func_ptr:]] = inttoptr i64 %[[#ptr_field]] to ptr
+// LLVM-NEXT:    br label %[[#block_continue]]
+//      LLVM:  [[#block_continue]]:
+// LLVM-NEXT:    %[[#callee_ptr:]] = phi ptr [ %[[#func_ptr]], %[[#block_non_virt]] ], [ %[[#vfp]], %[[#block_virt]] ]
+// LLVM-NEXT:    %[[#arg:]] = load i32, ptr %{{.+}}
+// LLVM-NEXT:    call void %[[#callee_ptr]](ptr %[[#adjusted_this]], i32 %[[#arg]])
+//      LLVM: }
+
+bool cmp_eq(void (Foo::*lhs)(int), void (Foo::*rhs)(int)) {
+  return lhs == rhs;
+}
+
+// CHECK-LABEL: @_Z6cmp_eqM3FooFviES1_
+// CHECK: %{{.+}} = cir.cmp(eq, %{{.+}}, %{{.+}}) : !cir.method<!cir.func<(!s32i)> in !rec_Foo>, !cir.bool
+
+// LLVM-LABEL: @_Z6cmp_eqM3FooFviES1_
+//      LLVM: %[[#lhs:]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM-NEXT: %[[#rhs:]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM-NEXT: %[[#lhs_ptr:]] = extractvalue { i64, i64 } %[[#lhs]], 0
+// LLVM-NEXT: %[[#rhs_ptr:]] = extractvalue { i64, i64 } %[[#rhs]], 0
+// LLVM-NEXT: %[[#ptr_cmp:]] = icmp eq i64 %[[#lhs_ptr]], %[[#rhs_ptr]]
+// LLVM-NEXT: %[[#ptr_null:]] = icmp eq i64 %[[#lhs_ptr]], 0
+// LLVM-NEXT: %[[#lhs_adj:]] = extractvalue { i64, i64 } %[[#lhs]], 1
+// LLVM-NEXT: %[[#rhs_adj:]] = extractvalue { i64, i64 } %[[#rhs]], 1
+// LLVM-NEXT: %[[#adj_cmp:]] = icmp eq i64 %[[#lhs_adj]], %[[#rhs_adj]]
+// LLVM-NEXT: %[[#tmp:]] = or i1 %[[#ptr_null]], %[[#adj_cmp]]
+// LLVM-NEXT: %{{.+}} = and i1 %[[#tmp]], %[[#ptr_cmp]]
+
+bool cmp_ne(void (Foo::*lhs)(int), void (Foo::*rhs)(int)) {
+  return lhs != rhs;
+}
+
+// CHECK-LABEL: @_Z6cmp_neM3FooFviES1_
+// CHECK: %{{.+}} = cir.cmp(ne, %{{.+}}, %{{.+}}) : !cir.method<!cir.func<(!s32i)> in !rec_Foo>, !cir.bool
+
+// LLVM-LABEL: @_Z6cmp_neM3FooFviES1_
+//      LLVM: %[[#lhs:]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM-NEXT: %[[#rhs:]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM-NEXT: %[[#lhs_ptr:]] = extractvalue { i64, i64 } %[[#lhs]], 0
+// LLVM-NEXT: %[[#rhs_ptr:]] = extractvalue { i64, i64 } %[[#rhs]], 0
+// LLVM-NEXT: %[[#ptr_cmp:]] = icmp ne i64 %[[#lhs_ptr]], %[[#rhs_ptr]]
+// LLVM-NEXT: %[[#ptr_null:]] = icmp ne i64 %[[#lhs_ptr]], 0
+// LLVM-NEXT: %[[#lhs_adj:]] = extractvalue { i64, i64 } %[[#lhs]], 1
+// LLVM-NEXT: %[[#rhs_adj:]] = extractvalue { i64, i64 } %[[#rhs]], 1
+// LLVM-NEXT: %[[#adj_cmp:]] = icmp ne i64 %[[#lhs_adj]], %[[#rhs_adj]]
+// LLVM-NEXT: %[[#tmp:]] = and i1 %[[#ptr_null]], %[[#adj_cmp]]
+// LLVM-NEXT: %{{.+}} = or i1 %[[#tmp]], %[[#ptr_cmp]]
+
+struct Bar {
+  void m4();
+};
+
+bool memfunc_to_bool(void (Foo::*func)(int)) {
+  return func;
+}
+
+// CIR-LABEL: @_Z15memfunc_to_boolM3FooFviE
+// CIR:   %{{.+}} = cir.cast member_ptr_to_bool %{{.+}} : !cir.method<!cir.func<(!s32i)> in !rec_Foo> -> !cir.bool
+// CIR: }
+
+// LLVM-LABEL: @_Z15memfunc_to_boolM3FooFviE
+//      LLVM:   %[[#memfunc:]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM-NEXT:   %[[#ptr:]] = extractvalue { i64, i64 } %[[#memfunc]], 0
+// LLVM-NEXT:   %{{.+}} = icmp ne i64 %[[#ptr]], 0
+//      LLVM: }
+
+auto memfunc_reinterpret(void (Foo::*func)(int)) -> void (Bar::*)() {
+  return reinterpret_cast<void (Bar::*)()>(func);
+}
+
+// CIR-LABEL: @_Z19memfunc_reinterpretM3FooFviE
+// CIR:   %{{.+}} = cir.cast bitcast %{{.+}} : !cir.method<!cir.func<(!s32i)> in !rec_Foo> -> !cir.method<!cir.func<()> in !rec_Bar>
+// CIR: }
+
+// LLVM-LABEL: @_Z19memfunc_reinterpretM3FooFviE
+// LLVM-NEXT:   %[[#arg_slot:]] = alloca { i64, i64 }, i64 1
+// LLVM-NEXT:   %[[#ret_slot:]] = alloca { i64, i64 }, i64 1
+// LLVM-NEXT:   store { i64, i64 } %{{.+}}, ptr %[[#arg_slot]]
+// LLVM-NEXT:   %[[#tmp:]] = load { i64, i64 }, ptr %[[#arg_slot]]
+// LLVM-NEXT:   store { i64, i64 } %[[#tmp]], ptr %[[#ret_slot]]
+// LLVM-NEXT:   %[[#ret:]] = load { i64, i64 }, ptr %[[#ret_slot]]
+// LLVM-NEXT:   ret { i64, i64 } %[[#ret]]
+// LLVM-NEXT: }
+
+struct Base1 {
+  int x;
+  virtual void m1(int);
+};
+
+struct Base2 {
+  int y;
+  virtual void m2(int);
+};
+
+struct Derived : Base1, Base2 {
+  virtual void m3(int);
+};
+
+using Base1MemFunc = void (Base1::*)(int);
+using Base2MemFunc = void (Base2::*)(int);
+using DerivedMemFunc = void (Derived::*)(int);
+
+DerivedMemFunc base_to_derived_zero_offset(Base1MemFunc ptr) {
+  return static_cast<DerivedMemFunc>(ptr);
+}
+
+// CIR-LABEL: @_Z27base_to_derived_zero_offsetM5Base1FviE
+// CIR: %{{.+}} = cir.derived_method(%{{.+}} : !cir.method<!cir.func<(!s32i)> in !rec_Base1_>) [0] -> !cir.method<!cir.func<(!s32i)> in !rec_Derived>
+
+// LLVM-LABEL: @_Z27base_to_derived_zero_offsetM5Base1FviE
+// LLVM-NEXT:   %[[#arg_slot:]] = alloca { i64, i64 }, i64 1
+// LLVM-NEXT:   %[[#ret_slot:]] = alloca { i64, i64 }, i64 1
+// LLVM-NEXT:   store { i64, i64 } %{{.+}}, ptr %[[#arg_slot]]
+// LLVM-NEXT:   %[[#tmp:]] = load { i64, i64 }, ptr %[[#arg_slot]]
+// LLVM-NEXT:   store { i64, i64 } %[[#tmp]], ptr %[[#ret_slot]]
+// LLVM-NEXT:   %[[#ret:]] = load { i64, i64 }, ptr %[[#ret_slot]]
+// LLVM-NEXT:   ret { i64, i64 } %[[#ret]]
+// LLVM-NEXT: }
+
+DerivedMemFunc base_to_derived(Base2MemFunc ptr) {
+  return static_cast<DerivedMemFunc>(ptr);
+}
+
+// CIR-LABEL: @_Z15base_to_derivedM5Base2FviE
+// CIR: %{{.+}} = cir.derived_method(%{{.+}} : !cir.method<!cir.func<(!s32i)> in !rec_Base2_>) [16] -> !cir.method<!cir.func<(!s32i)> in !rec_Derived>
+
+// LLVM-LABEL: @_Z15base_to_derivedM5Base2FviE
+//      LLVM: %[[#arg:]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM-NEXT: %[[#adj:]] = extractvalue { i64, i64 } %[[#arg]], 1
+// LLVM-NEXT: %[[#adj_adj:]] = add i64 %[[#adj]], 16
+// LLVM-NEXT: %{{.+}} = insertvalue { i64, i64 } %[[#arg]], i64 %[[#adj_adj]], 1
+
+Base1MemFunc derived_to_base_zero_offset(DerivedMemFunc ptr) {
+  return static_cast<Base1MemFunc>(ptr);
+}
+
+// CIR-LABEL: @_Z27derived_to_base_zero_offsetM7DerivedFviE
+// CIR: %{{.+}} = cir.base_method(%{{.+}} : !cir.method<!cir.func<(!s32i)> in !rec_Derived>) [0] -> !cir.method<!cir.func<(!s32i)> in !rec_Base1_>
+
+// LLVM-LABEL: @_Z27derived_to_base_zero_offsetM7DerivedFviE
+// LLVM-NEXT:   %[[#arg_slot:]] = alloca { i64, i64 }, i64 1
+// LLVM-NEXT:   %[[#ret_slot:]] = alloca { i64, i64 }, i64 1
+// LLVM-NEXT:   store { i64, i64 } %{{.+}}, ptr %[[#arg_slot]]
+// LLVM-NEXT:   %[[#tmp:]] = load { i64, i64 }, ptr %[[#arg_slot]]
+// LLVM-NEXT:   store { i64, i64 } %[[#tmp]], ptr %[[#ret_slot]]
+// LLVM-NEXT:   %[[#ret:]] = load { i64, i64 }, ptr %[[#ret_slot]]
+// LLVM-NEXT:   ret { i64, i64 } %[[#ret]]
+// LLVM-NEXT: }
+
+Base2MemFunc derived_to_base(DerivedMemFunc ptr) {
+  return static_cast<Base2MemFunc>(ptr);
+}
+
+// CIR-LABEL: @_Z15derived_to_baseM7DerivedFviE
+// CIR: %{{.+}} = cir.base_method(%{{.+}} : !cir.method<!cir.func<(!s32i)> in !rec_Derived>) [16] -> !cir.method<!cir.func<(!s32i)> in !rec_Base2_>
+
+// LLVM-LABEL: @_Z15derived_to_baseM7DerivedFviE
+//      LLVM: %[[#arg:]] = load { i64, i64 }, ptr %{{.+}}
+// LLVM-NEXT: %[[#adj:]] = extractvalue { i64, i64 } %[[#arg]], 1
+// LLVM-NEXT: %[[#adj_adj:]] = sub i64 %[[#adj]], 16
+// LLVM-NEXT: %{{.+}} = insertvalue { i64, i64 } %[[#arg]], i64 %[[#adj_adj]], 1
+
+struct HasVTable {
+  virtual void test(void (Foo::*)());
+};
+
+// Ensure that the vfunc pointer to the function involving a pointer-to-member-
+// func could be emitted.
+void HasVTable::test(void (Foo::*)()) {}
diff --git a/clang/test/CIR/Incubator/CodeGen/pointer.cpp b/clang/test/CIR/Incubator/CodeGen/pointer.cpp
new file mode 100644
index 0000000000000..bdf0e2103192b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pointer.cpp
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Global pointer should be zero initialized by default.
+int *ptr;
+// CHECK: cir.global external @ptr = #cir.ptr<null> : !cir.ptr<!s32i>
diff --git a/clang/test/CIR/Incubator/CodeGen/pointers.cpp b/clang/test/CIR/Incubator/CodeGen/pointers.cpp
new file mode 100644
index 0000000000000..82631fcfdf951
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pointers.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Should generate basic pointer arithmetics.
+void foo(int *iptr, char *cptr, unsigned ustride) {
+  *(iptr + 2) = 1;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: cir.ptr_stride inbounds %{{.+}}, %[[#STRIDE]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+  *(cptr + 3) = 1;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+  // CHECK: cir.ptr_stride inbounds %{{.+}}, %[[#STRIDE]] : (!cir.ptr<!s8i>, !s32i) -> !cir.ptr<!s8i>
+  *(iptr - 2) = 1;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+  // CHECK: cir.ptr_stride inbounds %{{.+}}, %[[#NEGSTRIDE]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+  *(cptr - 3) = 1;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+  // CHECK: cir.ptr_stride inbounds %{{.+}}, %[[#NEGSTRIDE]] : (!cir.ptr<!s8i>, !s32i) -> !cir.ptr<!s8i>
+  *(iptr + ustride) = 1;
+  // CHECK: %[[#STRIDE:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+  // CHECK: cir.ptr_stride inbounds|nuw %{{.+}}, %[[#STRIDE]] : (!cir.ptr<!s32i>, !u32i) -> !cir.ptr<!s32i>
+
+  // Must convert unsigned stride to a signed one.
+  *(iptr - ustride) = 1;
+  // CHECK: %[[#STRIDE:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!u32i>, !u32i
+  // CHECK: %[[#SIGNSTRIDE:]] = cir.cast integral %[[#STRIDE]] : !u32i -> !s32i
+  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, !s32i
+  // CHECK: cir.ptr_stride inbounds %{{.+}}, %[[#NEGSTRIDE]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+}
+
+void testPointerSubscriptAccess(int *ptr) {
+// CHECK: testPointerSubscriptAccess
+  ptr[1] = 2;
+  // CHECK: %[[#V1:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CHECK: %[[#V2:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: cir.ptr_stride %[[#V1]], %[[#V2]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+}
+
+void testPointerMultiDimSubscriptAccess(int **ptr) {
+// CHECK: testPointerMultiDimSubscriptAccess
+  ptr[1][2] = 3;
+  // CHECK: %[[#V1:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>, !cir.ptr<!cir.ptr<!s32i>>
+  // CHECK: %[[#V2:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: %[[#V3:]] = cir.ptr_stride %[[#V1]], %[[#V2]] : (!cir.ptr<!cir.ptr<!s32i>>, !s32i) -> !cir.ptr<!cir.ptr<!s32i>>
+  // CHECK: %[[#V4:]] = cir.load{{.*}} %[[#V3]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CHECK: %[[#V5:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: cir.ptr_stride %[[#V4]], %[[#V5]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/pred-info-builtins.c b/clang/test/CIR/Incubator/CodeGen/pred-info-builtins.c
new file mode 100644
index 0000000000000..f130c209446cf
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/pred-info-builtins.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -O0 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR-O0
+// RUN: %clang_cc1 -O2 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR-O2
+
+extern void __attribute__((noinline)) bar(void);
+
+void expect(int x) {
+  if (__builtin_expect(x, 0))
+    bar();
+}
+// CIR-O0: cir.func {{.*}} @expect
+// CIR-O0:   cir.if {{%.*}} {
+// CIR-O0:     cir.call @bar() : () -> ()
+
+// CIR-O2: cir.func {{.*}} @expect
+// CIR-O2:   [[EXPECT:%.*]] = cir.expect({{.*}}, {{.*}}) : !s64i
+// CIR-O2:   [[EXPECT_BOOL:%.*]] = cir.cast int_to_bool [[EXPECT]] : !s64i -> !cir.bool
+// CIR-O2:   cir.if [[EXPECT_BOOL]]
+// CIR-O2:     cir.call @bar() : () -> ()
+
+void expect_with_probability(int x) {
+  if (__builtin_expect_with_probability(x, 1, 0.8))
+    bar();
+}
+// CIR-O0: cir.func {{.*}} @expect_with_probability
+// CIR-O0:   cir.if {{%.*}} {
+// CIR-O0:     cir.call @bar() : () -> ()
+
+// CIR-O2:  cir.func {{.*}} @expect_with_probability
+// CIR-O2:    [[EXPECT:%.*]] = cir.expect({{.*}}, {{.*}}, 8.000000e-01) : !s64i
+// CIR-O2:    [[EXPECT_BOOL:%.*]] = cir.cast int_to_bool [[EXPECT]] : !s64i -> !cir.bool
+// CIR-O2:    cir.if [[EXPECT_BOOL]]
+// CIR-O2:      cir.call @bar() : () -> ()
+
+void unpredictable(int x) {
+  if (__builtin_unpredictable(x > 1))
+    bar();
+// CIR-O0: cir.func {{.*}} @unpredictable
+// CIR-O0:   cir.if {{%.*}} {
+// CIR-O0:     cir.call @bar() : () -> ()
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/predefined.cpp b/clang/test/CIR/Incubator/CodeGen/predefined.cpp
new file mode 100644
index 0000000000000..eaed978df46b2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/predefined.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+extern "C" {
+  void __assert2(const char* __file, int __line, const char* __function, const char* __msg) __attribute__((__noreturn__));
+}
+
+void m() {
+  __assert2("yo.cpp", 79, __PRETTY_FUNCTION__, "doom");
+}
+
+// CHECK: cir.func {{.*}} @_Z1mv()
+// CHECK:     %0 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 7>>
+// CHECK:     %1 = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!s8i x 7>> -> !cir.ptr<!s8i>
+// CHECK:     %2 = cir.const #cir.int<79> : !s32i
+// CHECK:     %3 = cir.get_global @".str.1" : !cir.ptr<!cir.array<!s8i x 9>>
+// CHECK:     %4 = cir.cast array_to_ptrdecay %3 : !cir.ptr<!cir.array<!s8i x 9>> -> !cir.ptr<!s8i>
+// CHECK:     %5 = cir.get_global @".str.2" : !cir.ptr<!cir.array<!s8i x 5>>
+// CHECK:     %6 = cir.cast array_to_ptrdecay %5 : !cir.ptr<!cir.array<!s8i x 5>> -> !cir.ptr<!s8i>
+// CHECK:     cir.call @__assert2(%1, %2, %4, %6) : (!cir.ptr<!s8i>, !s32i, !cir.ptr<!s8i>, !cir.ptr<!s8i>) -> ()
+// CHECK:     cir.return
+// CHECK:   }
diff --git a/clang/test/CIR/Incubator/CodeGen/ptrdiff.c b/clang/test/CIR/Incubator/CodeGen/ptrdiff.c
new file mode 100644
index 0000000000000..2045fee1a70a5
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/ptrdiff.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+int addrcmp(const void* a, const void* b) {
+  // CIR-LABEL: addrcmp
+  // CIR: %[[R:.*]] = cir.ptr_diff
+  // CIR: cir.cast integral  %[[R]] : !s64i -> !s32
+
+  // LLVM-LABEL: addrcmp
+  // LLVM: %[[PTR_A:.*]] = ptrtoint ptr {{.*}} to i64
+  // LLVM: %[[PTR_B:.*]] = ptrtoint ptr {{.*}} to i64
+  // LLVM: %[[SUB:.*]] = sub i64 %[[PTR_A]], %[[PTR_B]]
+  // LLVM-NOT: sdiv
+  // LLVM: trunc i64 %[[SUB]] to i32
+  return *(const void**)a - *(const void**)b;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/ptrdiff.cpp b/clang/test/CIR/Incubator/CodeGen/ptrdiff.cpp
new file mode 100644
index 0000000000000..54a440c9949b2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/ptrdiff.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef unsigned long size_type;
+size_type size(unsigned long *_start, unsigned long *_finish) {
+  return static_cast<size_type>(_finish - _start);
+}
+
+// CHECK: cir.func {{.*}} @_Z4sizePmS_(%arg0: !cir.ptr<!u64i>
+// CHECK:   %3 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:   %4 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+// CHECK:   %5 = cir.ptr_diff %3, %4 : !cir.ptr<!u64i> -> !s64i
+// CHECK:   %6 = cir.cast integral %5 : !s64i -> !u64i
+
+long add(char *a, char *b) {
+  return a - b + 1;
+}
+
+// CHECK: cir.func {{.*}} @_Z3addPcS_(%arg0: !cir.ptr<!s8i>
+//          %5 = cir.ptr_diff %3, %4 : !cir.ptr<!s8i> -> !s64i
+//          %6 = cir.const #cir.int<1> : !s32i
+//          %7 = cir.cast integral %6 : !s32i -> !s64i
+//          %8 = cir.binop(add, %5, %7) : !s64i
diff --git a/clang/test/CIR/Incubator/CodeGen/rangefor.cpp b/clang/test/CIR/Incubator/CodeGen/rangefor.cpp
new file mode 100644
index 0000000000000..1ff8c7978f890
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/rangefor.cpp
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+typedef enum enumy {
+  Unknown = 0,
+  Some = 1000024002,
+} enumy;
+
+typedef struct triple {
+  enumy type;
+  void* __attribute__((__may_alias__)) next;
+  unsigned image;
+} triple;
+
+void init(unsigned numImages) {
+  std::vector<triple> images(numImages);
+  for (auto& image : images) {
+    image = {Some};
+  }
+}
+
+// CHECK-DAG: !rec_triple = !cir.record<struct "triple" {!u32i, !cir.ptr<!void>, !u32i}>
+// CHECK-DAG: ![[VEC:.*]] = !cir.record<class "std::vector<triple>" {!cir.ptr<!rec_triple>, !cir.ptr<!rec_triple>, !cir.ptr<!rec_triple>}>
+// CHECK-DAG: ![[VEC_IT:.*]] = !cir.record<struct "__vector_iterator<triple, triple *, triple &>" {!cir.ptr<!rec_triple>}>
+
+// CHECK: cir.func {{.*}} @_Z4initj(%arg0: !u32i
+// CHECK:   %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["numImages", init] {alignment = 4 : i64}
+// CHECK:   %1 = cir.alloca ![[VEC]], !cir.ptr<![[VEC]]>, ["images", init] {alignment = 8 : i64}
+// CHECK:   cir.store{{.*}} %arg0, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:   %2 = cir.load{{.*}} %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:   %3 = cir.cast integral %2 : !u32i -> !u64i
+// CHECK:   cir.call @_ZNSt6vectorI6tripleEC1Em(%1, %3) : (!cir.ptr<![[VEC]]>, !u64i) -> ()
+// CHECK:   cir.scope {
+// CHECK:     %4 = cir.alloca !cir.ptr<![[VEC]]>, !cir.ptr<!cir.ptr<![[VEC]]>>, ["__range1", init, const] {alignment = 8 : i64}
+// CHECK:     %5 = cir.alloca ![[VEC_IT]], !cir.ptr<![[VEC_IT]]>, ["__begin1", init] {alignment = 8 : i64}
+// CHECK:     %6 = cir.alloca ![[VEC_IT]], !cir.ptr<![[VEC_IT]]>, ["__end1", init] {alignment = 8 : i64}
+// CHECK:     %7 = cir.alloca !cir.ptr<!rec_triple>, !cir.ptr<!cir.ptr<!rec_triple>>, ["image", init, const] {alignment = 8 : i64}
+// CHECK:     cir.store{{.*}} %1, %4 : !cir.ptr<![[VEC]]>, !cir.ptr<!cir.ptr<![[VEC]]>>
+// CHECK:     %8 = cir.load{{.*}} %4 : !cir.ptr<!cir.ptr<![[VEC]]>>, !cir.ptr<![[VEC]]>
+// CHECK:     %9 = cir.call @_ZNSt6vectorI6tripleE5beginEv(%8) : (!cir.ptr<![[VEC]]>) -> ![[VEC_IT]]
+// CHECK:     cir.store{{.*}} %9, %5 : ![[VEC_IT]], !cir.ptr<![[VEC_IT]]>
+// CHECK:     %10 = cir.load{{.*}} %4 : !cir.ptr<!cir.ptr<![[VEC]]>>, !cir.ptr<![[VEC]]>
+// CHECK:     %11 = cir.call @_ZNSt6vectorI6tripleE3endEv(%10) : (!cir.ptr<![[VEC]]>) -> ![[VEC_IT]]
+// CHECK:     cir.store{{.*}} %11, %6 : ![[VEC_IT]], !cir.ptr<![[VEC_IT]]>
+// CHECK:     cir.for : cond {
+// CHECK:       %12 = cir.call @_ZNK17__vector_iteratorI6triplePS0_RS0_EneERKS3_(%5, %6) : (!cir.ptr<![[VEC_IT]]>, !cir.ptr<![[VEC_IT]]>) -> !cir.bool
+// CHECK:       cir.condition(%12)
+// CHECK:     } body {
+// CHECK:       %12 = cir.call @_ZNK17__vector_iteratorI6triplePS0_RS0_EdeEv(%5) : (!cir.ptr<![[VEC_IT]]>) -> !cir.ptr<!rec_triple>
+// CHECK:       cir.store{{.*}} %12, %7 : !cir.ptr<!rec_triple>, !cir.ptr<!cir.ptr<!rec_triple>>
+// CHECK:       cir.scope {
+// CHECK:         %13 = cir.alloca !rec_triple, !cir.ptr<!rec_triple>, ["ref.tmp0"] {alignment = 8 : i64}
+// CHECK:         %14 = cir.const #cir.zero : !rec_triple
+// CHECK:         cir.store{{.*}} %14, %13 : !rec_triple, !cir.ptr<!rec_triple>
+// CHECK:         %15 = cir.get_member %13[0] {name = "type"} : !cir.ptr<!rec_triple> -> !cir.ptr<!u32i>
+// CHECK:         %16 = cir.const #cir.int<1000024002> : !u32i
+// CHECK:         cir.store{{.*}} %16, %15 : !u32i, !cir.ptr<!u32i>
+// CHECK:         %17 = cir.get_member %13[1] {name = "next"} : !cir.ptr<!rec_triple> -> !cir.ptr<!cir.ptr<!void>>
+// CHECK:         %18 = cir.get_member %13[2] {name = "image"} : !cir.ptr<!rec_triple> -> !cir.ptr<!u32i>
+// CHECK:         %19 = cir.load{{.*}} %7 : !cir.ptr<!cir.ptr<!rec_triple>>, !cir.ptr<!rec_triple>
+// CHECK:         %20 = cir.call @_ZN6tripleaSEOS_(%19, %13) : (!cir.ptr<!rec_triple>, !cir.ptr<!rec_triple>) -> !cir.ptr<!rec_triple>
+// CHECK:       }
+// CHECK:       cir.yield
+// CHECK:     } step {
+// CHECK:       %12 = cir.call @_ZN17__vector_iteratorI6triplePS0_RS0_EppEv(%5) : (!cir.ptr<![[VEC_IT]]>) -> !cir.ptr<![[VEC_IT]]>
+// CHECK:       cir.yield
+// CHECK:     }
+// CHECK:   }
+// CHECK:   cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/requires-expr.cpp b/clang/test/CIR/Incubator/CodeGen/requires-expr.cpp
new file mode 100644
index 0000000000000..ff39fae0af4a9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/requires-expr.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test RequiresExpr as a boolean expression
+bool test_requires_simple() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_requires_simplev
+  // CHECK: %{{.*}} = cir.const #true
+  return requires { 1 + 1; };
+}
+
+template <typename T>
+bool test_requires_param() {
+  return requires(T t) { t + 1; };
+}
+
+bool use_requires_param() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}use_requires_paramv
+  // Instantiation with int should succeed
+  return test_requires_param<int>();
+  // CHECK: cir.call @{{.*}}test_requires_paramIiEbv
+}
+
+// Test requires expression with multiple requirements
+bool test_requires_multiple() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_requires_multiplev
+  // CHECK: %{{.*}} = cir.const #true
+  return requires {
+    1 + 1;
+    2 * 2;
+  };
+}
+
+// Test requires expression in if statement
+int test_requires_in_if() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_requires_in_ifv
+  if (requires { 1 + 1; }) {
+    // CHECK: %{{.*}} = cir.const #true
+    // CHECK: cir.if %{{.*}} {
+    return 1;
+  }
+  return 0;
+}
+
+// Test requires expression that should fail
+template <typename T>
+bool test_requires_fail() {
+  return requires { T::nonexistent_member; };
+}
+
+bool use_requires_fail() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}use_requires_failv
+  // Should return false for int (no member named nonexistent_member)
+  return test_requires_fail<int>();
+  // CHECK: cir.call @{{.*}}test_requires_failIiEbv
+}
+
+// Test nested requires
+bool test_nested_requires() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}test_nested_requiresv
+  // CHECK: %{{.*}} = cir.const #true
+  return requires {
+    requires true;
+  };
+}
+
+// Use in constexpr context
+constexpr bool can_add_int = requires(int a, int b) { a + b; };
+
+int use_constexpr_requires() {
+  // CHECK-LABEL: cir.func{{.*}} @{{.*}}use_constexpr_requiresv
+  if (can_add_int) {
+    return 42;
+  }
+  return 0;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/return.cpp b/clang/test/CIR/Incubator/CodeGen/return.cpp
new file mode 100644
index 0000000000000..9ced38fef22c8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/return.cpp
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fno-strict-return -fclangir -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-NOSTRICT
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fno-strict-return -emit-llvm %s -o - | FileCheck %s --check-prefix=OGCG-CHECK-NOSTRICT
+
+int &ret0(int &x) {
+  return x;
+}
+
+// CHECK: cir.func {{.*}} @_Z4ret0Ri
+// CHECK:   %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["x", init, const] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["__retval"] {alignment = 8 : i64}
+// CHECK:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:   %2 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:   cir.store{{.*}} %2, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:   %3 = cir.load{{.*}} %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:   cir.return %3 : !cir.ptr<!s32i>
+
+int unreachable_after_return() {
+  return 0;
+  return 1;
+}
+
+// CHECK: cir.func {{.*}} @_Z24unreachable_after_returnv
+// CHECK-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   %1 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.br ^bb1
+// CHECK-NEXT: ^bb1:  // 2 preds: ^bb0, ^bb2
+// CHECK-NEXT:   %2 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.return %2 : !s32i
+// CHECK-NEXT: ^bb2:  // no predecessors
+// CHECK-NEXT:   %3 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.store{{.*}} %3, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.br ^bb1
+// CHECK-NEXT: }
+
+struct NonTrivialDefaultConstructor {
+  int x;
+
+  NonTrivialDefaultConstructor() { }
+};
+
+// CHECK-NOSTRICT-LABEL: @_Z28nonTrivialDefaultConstructorv
+// OGCG-CHECK-NOSTRICT-LABEL: @_Z28nonTrivialDefaultConstructorv
+NonTrivialDefaultConstructor nonTrivialDefaultConstructor() {
+  // CHECK-NOSTRICT-NOT: call void @llvm.trap
+  // CHECK-NOSTRICT-NOT: unreachable
+  // OGCG-CHECK-NOSTRICT-NOT: call void @llvm.trap
+  // OGCG-CHECK-NOSTRICT-NOT: unreachable
+}
+
+// Functions that return records with non-trivial destructors should always use
+// the -fstrict-return optimization.
+
+struct NonTrivialDestructor {
+  ~NonTrivialDestructor();
+};
+
+// CHECK-NOSTRICT-LABEL: @_Z20nonTrivialDestructorv
+// OGCG-CHECK-NOSTRICT-LABEL: @_Z20nonTrivialDestructorv
+NonTrivialDestructor nonTrivialDestructor() {
+  // CHECK-NOSTRICT: call void @llvm.trap
+  // CHECK-NOSTRICT-NEXT: unreachable
+  // OGCG-CHECK-NOSTRICT: call void @llvm.trap
+  // OGCG-CHECK-NOSTRICT-NEXT: unreachable
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/same-mangled-name.cpp b/clang/test/CIR/Incubator/CodeGen/same-mangled-name.cpp
new file mode 100644
index 0000000000000..58660c8133451
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/same-mangled-name.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// This would previously emit a "definition with same mangled name as another
+// definition" error: https://github.com/llvm/clangir/issues/991.
+namespace N {
+struct S {
+  // CHECK: cir.func {{.*}} @_ZN1N1S3fooEv({{.*}} {
+  void foo() {}
+};
+
+// CHECK: cir.func {{.*}} @_ZN1N1fEv() {{.*}} {
+// CHECK:   cir.call @_ZN1N1S3fooEv(
+void f() { S().foo(); }
+} // namespace N
diff --git a/clang/test/CIR/Incubator/CodeGen/scalar_to_scalar_bitcast.cl b/clang/test/CIR/Incubator/CodeGen/scalar_to_scalar_bitcast.cl
new file mode 100644
index 0000000000000..bdfc902711211
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/scalar_to_scalar_bitcast.cl
@@ -0,0 +1,24 @@
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-cir -fclangir -o - %s | FileCheck %s --check-prefix=CIR
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-llvm -fclangir -o - %s | FileCheck %s --check-prefix=LLVM
+// RUN: %clang -cc1 -triple spirv64-unknown-unknown -cl-std=CL2.0 -finclude-default-header -O0 -emit-llvm -o - %s | FileCheck %s --check-prefix=OG-LLVM
+
+#define as_int(x) __builtin_astype((x), int)
+#define as_float(x) __builtin_astype((x), float)
+
+int float_to_int(float x)
+{
+  return as_int(x);
+}
+
+// CIR: cir.cast bitcast %{{.*}} : !cir.float -> !s32i
+// LLVM: bitcast float %{{.*}} to i32
+// OG-LLVM: bitcast float %{{.*}} to i32
+
+float int_to_float(int x)
+{
+  return as_float(x);
+}
+
+// CIR: cir.cast bitcast %{{.*}} : !s32i -> !cir.float
+// LLVM: bitcast i32 %{{.*}} to float
+// OG-LLVM: bitcast i32 %{{.*}} to float
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/scoped-atomic-load-store.c b/clang/test/CIR/Incubator/CodeGen/scoped-atomic-load-store.c
new file mode 100644
index 0000000000000..a8aa5665ec305
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/scoped-atomic-load-store.c
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+int scoped_load_thread(int *ptr) {
+  return __scoped_atomic_load_n(ptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
+}
+
+// CIR-LABEL: @scoped_load_thread
+// CIR: %[[ATOMIC_LOAD:.*]] = cir.load align(4) syncscope(single_thread) atomic(relaxed) %{{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR: cir.store align(4) %[[ATOMIC_LOAD]], %{{.*}} : !s32i, !cir.ptr<!s32i>
+
+// LLVM-LABEL: @scoped_load_thread
+// LLVM: load atomic i32, ptr %{{.*}} syncscope("singlethread") monotonic, align 4
+
+// OGCG-LABEL: @scoped_load_thread
+// OGCG: load atomic i32, ptr %{{.*}} monotonic, align 4
+
+int scoped_load_system(int *ptr) {
+  return __scoped_atomic_load_n(ptr, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM);
+}
+
+// CIR-LABEL: @scoped_load_system
+// CIR: cir.load align(4) syncscope(system) atomic(seq_cst) %{{.*}} : !cir.ptr<!s32i>, !s32i
+
+// LLVM-LABEL: @scoped_load_system
+// LLVM: load atomic i32, ptr %{{.*}} seq_cst, align 4
+// LLVM-NOT: syncscope(
+
+// OGCG-LABEL: @scoped_load_system
+// OGCG: load atomic i32, ptr %{{.*}} seq_cst, align 4
+// OGCG-NOT: syncscope(
diff --git a/clang/test/CIR/Incubator/CodeGen/shift.cpp b/clang/test/CIR/Incubator/CodeGen/shift.cpp
new file mode 100644
index 0000000000000..6f6a10d34ab08
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/shift.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+unsigned long s(int i, unsigned long x) {
+  return x << i;
+}
+
+// CHECK: cir.shift(left, %3 : !u64i, %4 : !s32i) -> !u64i
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/CodeGen/sizeof-pack.cpp b/clang/test/CIR/Incubator/CodeGen/sizeof-pack.cpp
new file mode 100644
index 0000000000000..e44ac96f05575
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/sizeof-pack.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Test basic sizeof... on type parameter pack
+template<typename ...Types>
+int get_num_types(Types...) {
+  return sizeof...(Types);
+}
+
+// CHECK-LABEL: cir.func{{.*}} @{{.*}}get_num_typesIJifdEEiDpT_
+// CHECK: %{{.*}} = cir.const #cir.int<3> : !u64i
+// CHECK: %{{.*}} = cir.cast integral %{{.*}} : !u64i -> !s32i
+
+template int get_num_types(int, float, double);
+
+// Test sizeof... with empty pack
+template<typename ...Types>
+int get_num_empty(Types...) {
+  return sizeof...(Types);
+}
+
+// CHECK-LABEL: cir.func{{.*}} @{{.*}}get_num_emptyIJEEiDpT_
+// CHECK: %{{.*}} = cir.const #cir.int<0> : !u64i
+
+template int get_num_empty();
+
+// Test sizeof... on non-type parameter pack
+template<int... Vals>
+int count_values() {
+  return sizeof...(Vals);
+}
+
+// CHECK-LABEL: cir.func{{.*}} @{{.*}}count_valuesIJLi1ELi2ELi3ELi4ELi5EEEiv
+// CHECK: %{{.*}} = cir.const #cir.int<5> : !u64i
+
+template int count_values<1, 2, 3, 4, 5>();
diff --git a/clang/test/CIR/Incubator/CodeGen/skip-functions-from-system-headers.cpp b/clang/test/CIR/Incubator/CodeGen/skip-functions-from-system-headers.cpp
new file mode 100644
index 0000000000000..f9511a69fb914
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/skip-functions-from-system-headers.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fclangir-skip-system-headers -I%S/../Inputs %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "skip-this-header.h"
+
+void test() {
+  String s1{};
+  String s2{1};
+  String s3{"abcdefghijklmnop"};
+}
+
+// CHECK-NOT: cir.func {{.*}} @_ZN6StringC2Ev
+// CHECK-NOT: cir.func {{.*}} @_ZN6StringC2Ei
+// CHECK-NOT: cir.func {{.*}} @_ZN6StringC2EPKc
+
+// CHECK: cir.func {{.*}} @_Z4testv()
+// CHECK:   cir.call @_ZN6StringC1Ev(%0) : (!cir.ptr<!rec_String>) -> ()
diff --git a/clang/test/CIR/Incubator/CodeGen/source-loc-expr.cpp b/clang/test/CIR/Incubator/CodeGen/source-loc-expr.cpp
new file mode 100644
index 0000000000000..f90e05ccbdc20
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/source-loc-expr.cpp
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.og.ll %s
+
+// Test in global context
+#line 100 "test_file.cpp"
+int global_line = __builtin_LINE();
+// CIR: cir.global external @global_line = #cir.int<100> : !s32i
+// LLVM: @global_line{{.*}} = global i32 100
+// OGCG: @global_line{{.*}} = global i32 100
+
+#line 15 "clang/test/CIR/CodeGen/source-loc-expr.cpp"
+// Test __builtin_LINE
+int test_builtin_LINE() {
+  // CIR-LABEL: cir.func{{.*}} @{{.*}}test_builtin_LINE
+  // CIR: %{{.*}} = cir.const #cir.int<25> : !u32i
+
+  // LLVM-LABEL: @{{.*}}test_builtin_LINE
+  // LLVM: store i32 25
+
+  // OGCG-LABEL: @{{.*}}test_builtin_LINE
+  // OGCG: ret i32 25
+  return __builtin_LINE();
+}
+
+// Test __builtin_FILE
+const char* test_builtin_FILE() {
+  // CIR-LABEL: cir.func{{.*}} @{{.*}}test_builtin_FILE
+  // CIR: %{{.*}} = cir.const #cir.global_view<@".str{{.*}}"> : !cir.ptr<!s8i>
+
+  // LLVM-LABEL: @{{.*}}test_builtin_FILE
+  // LLVM: store ptr @.str
+
+  // OGCG-LABEL: @{{.*}}test_builtin_FILE
+  // OGCG: ret ptr @.str
+  return __builtin_FILE();
+}
+
+// Test __builtin_FUNCTION
+const char* test_builtin_FUNCTION() {
+  // CIR-LABEL: cir.func{{.*}} @{{.*}}test_builtin_FUNCTION
+  // CIR: %{{.*}} = cir.const #cir.global_view<@".str{{.*}}"> : !cir.ptr<!s8i>
+
+  // LLVM-LABEL: @{{.*}}test_builtin_FUNCTION
+  // LLVM: store ptr @.str
+
+  // OGCG-LABEL: @{{.*}}test_builtin_FUNCTION
+  // OGCG: ret ptr @.str
+  return __builtin_FUNCTION();
+}
+
+// Test __builtin_COLUMN
+int test_builtin_COLUMN() {
+  // CIR-LABEL: cir.func{{.*}} @{{.*}}test_builtin_COLUMN
+  // The column number is the position of '__builtin_COLUMN'
+  // CIR: %{{.*}} = cir.const #cir.int<10> : !u32i
+
+  // LLVM-LABEL: @{{.*}}test_builtin_COLUMN
+  // LLVM: store i32 10
+
+  // OGCG-LABEL: @{{.*}}test_builtin_COLUMN
+  // OGCG: ret i32 10
+  return __builtin_COLUMN();
+}
+
+// Test default argument
+int get_line(int l = __builtin_LINE()) {
+  return l;
+}
+
+void test_default_arg() {
+  // CIR-LABEL: cir.func{{.*}} @{{.*}}test_default_arg
+  // The LINE should be from the call site, not the default argument definition
+  #line 111
+  int x = get_line();
+  // CIR: %{{.*}} = cir.const #cir.int<111> : !u32i
+  // CIR: %{{.*}} = cir.call @{{.*}}get_line{{.*}}({{.*}}) :
+
+  // LLVM-LABEL: @{{.*}}test_default_arg
+  // LLVM: call{{.*}} i32 @{{.*}}get_line{{.*}}(i32 111)
+
+  // OGCG-LABEL: @{{.*}}test_default_arg
+  // OGCG: call{{.*}} i32 @{{.*}}get_line{{.*}}(i32 {{.*}} 111)
+}
+
+#line 200 "lambda-test.cpp"
+// Test in lambda (this tests that source location correctly captures context)
+void test_in_lambda() {
+  // CIR-LABEL: cir.func{{.*}} @{{.*}}test_in_lambda
+  auto lambda = []() {
+    return __builtin_LINE();
+  };
+  int x = lambda();
+
+  // LLVM-LABEL: @{{.*}}test_in_lambda
+  // LLVM: call{{.*}} i32 @{{.*}}
+
+  // OGCG-LABEL: @{{.*}}test_in_lambda
+  // OGCG: call{{.*}} i32 @{{.*}}
+}
+
+#line 214 "combined-test.cpp"
+// Test multiple builtins in one expression
+void test_combined() {
+  // CIR-LABEL: cir.func{{.*}} @{{.*}}test_combined
+  const char* file = __builtin_FILE();
+  int line = __builtin_LINE();
+  const char* func = __builtin_FUNCTION();
+  // All should produce constants
+  // CIR: cir.const
+  // CIR: cir.const
+  // CIR: cir.const
+
+  // LLVM-LABEL: @{{.*}}test_combined
+  // LLVM: store ptr @.str
+  // LLVM: store i32 218
+  // LLVM: store ptr @.str
+
+  // OGCG-LABEL: @{{.*}}test_combined
+  // OGCG: store ptr @.str
+  // OGCG: store i32 218
+  // OGCG: store ptr @.str
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/source-location-scope.cpp b/clang/test/CIR/Incubator/CodeGen/source-location-scope.cpp
new file mode 100644
index 0000000000000..fc63ebc391710
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/source-location-scope.cpp
@@ -0,0 +1,61 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir
+
+// Test for crash when getLoc() is called with invalid SourceLocation
+// and currSrcLoc is not set. This can happen with compiler-generated
+// expressions like CXXDefaultArgExpr and CXXDefaultInitExpr.
+
+//===----------------------------------------------------------------------===//
+// CXXDefaultArgExpr - default argument expressions
+//===----------------------------------------------------------------------===//
+
+struct S {
+  int x;
+  int y;
+};
+
+// Default argument expressions can have invalid source locations
+void foo(S s = {}) {
+  S local = s;
+}
+
+void testDefaultArg() {
+  foo();
+}
+
+//===----------------------------------------------------------------------===//
+// CXXDefaultInitExpr - default member initializers
+//===----------------------------------------------------------------------===//
+
+struct Inner {
+  int value;
+};
+
+struct Outer {
+  Inner inner = {};  // Default member initializer
+  int x = 42;
+};
+
+void testDefaultInit() {
+  Outer o;
+  (void)o;
+}
+
+// CIR-DAG: cir.func {{.*}}@_Z3foo1S
+// CIR-DAG: cir.func {{.*}}@_Z14testDefaultArgv
+// CIR-DAG: cir.func {{.*}}@_Z15testDefaultInitv
+
+// Verify that CXXDefaultArgExpr gets proper source locations from the
+// default argument's initializer expression, not unknown locations.
+// The struct initialization should have a fused location covering the struct
+// definition (lines 12-15 where "struct S { int x; int y; };" is).
+// CIR-DAG: #[[LOC_S_START:loc[0-9]*]] = loc({{.*}}:12:1)
+// CIR-DAG: #[[LOC_S_END:loc[0-9]*]] = loc({{.*}}:15:1)
+// CIR-DAG: #[[LOC_FUSED:loc[0-9]*]] = loc(fused[#[[LOC_S_START]], #[[LOC_S_END]]])
+// CIR-DAG: cir.store {{.*}} loc(#[[LOC_FUSED]])
+
+// Verify that CXXDefaultInitExpr gets proper source locations from the
+// default member initializer expression, not unknown locations.
+// The constant 42 should have a location pointing to line 36 where "int x = 42" is.
+// CIR-DAG: #[[LOC42:loc[0-9]*]] = loc({{.*}}:36:11)
+// CIR-DAG: cir.const #cir.int<42> : !s32i loc(#[[LOC42]])
diff --git a/clang/test/CIR/Incubator/CodeGen/sourcelocation.cpp b/clang/test/CIR/Incubator/CodeGen/sourcelocation.cpp
new file mode 100644
index 0000000000000..77da8e700192f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/sourcelocation.cpp
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -debug-info-kind=constructor -dwarf-version=4 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+int s0(int a, int b) {
+  int x = a + b;
+  if (x > 0)
+    x = 0;
+  else
+    x = 1;
+  return x;
+}
+
+// CIR: #loc3 = loc("{{.*}}sourcelocation.cpp":6:8)
+// CIR: #loc4 = loc("{{.*}}sourcelocation.cpp":6:12)
+// CIR: #loc5 = loc("{{.*}}sourcelocation.cpp":6:15)
+// CIR: #loc6 = loc("{{.*}}sourcelocation.cpp":6:19)
+// CIR: #loc21 = loc(fused[#loc3, #loc4])
+// CIR: #loc22 = loc(fused[#loc5, #loc6])
+// CIR: module @"{{.*}}sourcelocation.cpp" attributes {{{.*}}cir.lang = #cir.lang<cxx>, {{.*}}cir.sob = #cir.signed_overflow_behavior<undefined>
+// CIR:   cir.func {{.*}} @_Z2s0ii(%arg0: !s32i loc(fused[#loc3, #loc4]), %arg1: !s32i loc(fused[#loc5, #loc6])) -> !s32i
+// CIR:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64} loc(#loc21)
+// CIR:     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64} loc(#loc22)
+// CIR:     %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64} loc(#loc2)
+// CIR:     %3 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64} loc(#loc23)
+// CIR:     cir.store{{.*}} %arg0, %0 : !s32i, !cir.ptr<!s32i> loc(#loc9)
+// CIR:     cir.store{{.*}} %arg1, %1 : !s32i, !cir.ptr<!s32i> loc(#loc9)
+// CIR:     %4 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i loc(#loc10)
+// CIR:     %5 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i loc(#loc8)
+// CIR:     %6 = cir.binop(add, %4, %5) nsw : !s32i loc(#loc10)
+// CIR:     cir.store{{.*}} %6, %3 : !s32i, !cir.ptr<!s32i> loc(#loc23)
+// CIR:     cir.scope {
+// CIR:       %9 = cir.load{{.*}} %3 : !cir.ptr<!s32i>, !s32i loc(#loc13)
+// CIR:       %10 = cir.const #cir.int<0> : !s32i loc(#loc14)
+// CIR:       %11 = cir.cmp(gt, %9, %10) : !s32i, !cir.bool loc(#loc25)
+// CIR:       cir.if %11 {
+// CIR:         %12 = cir.const #cir.int<0> : !s32i loc(#loc16)
+// CIR:         cir.store{{.*}} %12, %3 : !s32i, !cir.ptr<!s32i> loc(#loc27)
+// CIR:       } else {
+// CIR:         %12 = cir.const #cir.int<1> : !s32i loc(#loc12)
+// CIR:         cir.store{{.*}} %12, %3 : !s32i, !cir.ptr<!s32i> loc(#loc28)
+// CIR:       } loc(#loc26)
+// CIR:     } loc(#loc24)
+// CIR:     %7 = cir.load{{.*}} %3 : !cir.ptr<!s32i>, !s32i loc(#loc18)
+// CIR:     cir.store{{.*}} %7, %2 : !s32i, !cir.ptr<!s32i> loc(#loc29)
+// CIR:     %8 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i loc(#loc29)
+// CIR:     cir.return %8 : !s32i loc(#loc29)
+// CIR:   } loc(#loc20)
+// CIR: } loc(#loc)
+// CIR: #loc = loc("{{.*}}sourcelocation.cpp":0:0)
+// CIR: #loc1 = loc("{{.*}}sourcelocation.cpp":6:1)
+// CIR: #loc2 = loc("{{.*}}sourcelocation.cpp":13:1)
+// CIR: #loc7 = loc("{{.*}}sourcelocation.cpp":7:3)
+// CIR: #loc8 = loc("{{.*}}sourcelocation.cpp":7:15)
+// CIR: #loc9 = loc("{{.*}}sourcelocation.cpp":6:22)
+// CIR: #loc10 = loc("{{.*}}sourcelocation.cpp":7:11)
+// CIR: #loc11 = loc("{{.*}}sourcelocation.cpp":8:3)
+// CIR: #loc12 = loc("{{.*}}sourcelocation.cpp":11:9)
+// CIR: #loc13 = loc("{{.*}}sourcelocation.cpp":8:7)
+// CIR: #loc14 = loc("{{.*}}sourcelocation.cpp":8:11)
+// CIR: #loc15 = loc("{{.*}}sourcelocation.cpp":9:5)
+// CIR: #loc16 = loc("{{.*}}sourcelocation.cpp":9:9)
+// CIR: #loc17 = loc("{{.*}}sourcelocation.cpp":11:5)
+// CIR: #loc18 = loc("{{.*}}sourcelocation.cpp":12:10)
+// CIR: #loc19 = loc("{{.*}}sourcelocation.cpp":12:3)
+// CIR: #loc20 = loc(fused[#loc1, #loc2])
+// CIR: #loc23 = loc(fused[#loc7, #loc8])
+// CIR: #loc24 = loc(fused[#loc11, #loc12])
+// CIR: #loc25 = loc(fused[#loc13, #loc14])
+// CIR: #loc26 = loc(fused[#loc15, #loc16, #loc17, #loc12])
+// CIR: #loc27 = loc(fused[#loc15, #loc16])
+// CIR: #loc28 = loc(fused[#loc17, #loc12])
+// CIR: #loc29 = loc(fused[#loc19, #loc18])
+
+
+// LLVM: ModuleID = '{{.*}}sourcelocation.cpp'
+// LLVM: source_filename = "{{.*}}sourcelocation.cpp"
+// LLVM: define dso_local i32 @_Z2s0ii(i32 %0, i32 %1) #[[#]] !dbg ![[#SP:]]
+// LLVM:  %3 = alloca i32, i64 1, align 4, !dbg ![[#LOC1:]]
+
+
+// LLVM: !llvm.dbg.cu = !{!0}
+// LLVM: !llvm.module.flags = !{!2}
+
+// LLVM: !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "MLIR", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly)
+// LLVM: !1 = !DIFile(filename: "sourcelocation.cpp", directory: {{.*}}CodeGen{{.*}})
+// LLVM: !2 = !{i32 2, !"Debug Info Version", i32 3}
+// LLVM: !3 = distinct !DISubprogram(name: "_Z2s0ii", linkageName: "_Z2s0ii", scope: !1, file: !1, line: 6, type: !4, scopeLine: 6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+// LLVM: !4 = !DISubroutineType(cc: DW_CC_normal, types: !5)
diff --git a/clang/test/CIR/Incubator/CodeGen/special-virtual-func.cpp b/clang/test/CIR/Incubator/CodeGen/special-virtual-func.cpp
new file mode 100644
index 0000000000000..8917d25dffb1f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/special-virtual-func.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Check that pure and deleted virtual functions are correctly emitted in the
+// vtable.
+class A {
+  A();
+  virtual void pure() = 0;
+  virtual void deleted() = delete;
+};
+
+A::A() = default;
+
+// CHECK: @_ZTV1A = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1A> : !cir.ptr<!u8i>, #cir.global_view<@__cxa_pure_virtual> : !cir.ptr<!u8i>, #cir.global_view<@__cxa_deleted_virtual> : !cir.ptr<!u8i>]>
+// CHECK: cir.func {{.*}} @__cxa_pure_virtual()
+// CHECK: cir.func {{.*}} @__cxa_deleted_virtual()
diff --git a/clang/test/CIR/Incubator/CodeGen/spelling-locations.cpp b/clang/test/CIR/Incubator/CodeGen/spelling-locations.cpp
new file mode 100644
index 0000000000000..66c09c88a029c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/spelling-locations.cpp
@@ -0,0 +1,100 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+#define multiline_if_macro(c, t) \
+if (c) { \
+  return t; \
+}
+
+int testMacroLocations(void) {
+
+  // Expanded macros will use the location of the expansion site.
+  multiline_if_macro(1, 3);
+  // CHECK: cir.scope {
+  // CHECK:   cir.if %{{.+}} {
+  // CHECK:     cir.return %{{.+}} : !s32i loc(#loc[[#LOC:]])
+  // CHECK:   } loc(#loc[[#LOC]])
+  // CHECK: } loc(#loc[[#LOC]])
+
+  // Regular if statements should use different locations.
+  if (1) {
+    return 3;
+  }
+  //     CHECK: cir.scope {
+  //     CHECK:   cir.if %{{.+}} {
+  //     CHECK:     cir.return %{{.+}} : !s32i loc(#loc[[#LOC:]])
+  // CHECK-NOT:   } loc(#loc[[#LOC]])
+  // CHECK-NOT: } loc(#loc[[#LOC]])
+
+  return 0;
+}
+
+void testIfStmtLocations(int f) {
+  if (f)
+    ;
+  else
+    ;
+
+  if (f)
+    ++f;
+  else
+    ;
+
+  if (f)
+    ;
+  else
+    --f;
+
+  if (f)
+    ++f;
+  else
+    --f;
+}
+
+// CHECK: cir.if %{{.+}} {
+// CHECK: } else {
+// CHECK: } loc(#loc[[#LOC1:]])
+
+// CHECK: cir.if %{{.+}} {
+// CHECK:   %{{.+}} = cir.load
+// CHECK:   %{{.+}} = cir.unary(inc
+// CHECK:   cir.store
+// CHECK: } else {
+// CHECK: } loc(#loc[[#LOC2:]])
+
+// CHECK: cir.if %{{.+}} {
+// CHECK: } else {
+// CHECK:   %{{.+}} = cir.load
+// CHECK:   %{{.+}} = cir.unary(dec
+// CHECK:   cir.store
+// CHECK: } loc(#loc[[#LOC3:]])
+
+// CHECK: cir.if %{{.+}} {
+// CHECK:   %{{.+}} = cir.load
+// CHECK:   %{{.+}} = cir.unary(inc
+// CHECK:   cir.store
+// CHECK: } else {
+// CHECK:   %{{.+}} = cir.load
+// CHECK:   %{{.+}} = cir.unary(dec
+// CHECK:   cir.store
+// CHECK: } loc(#loc[[#LOC4:]])
+
+// CHECK: #loc[[#LOC12:]] = loc({{.+}}:35:5)
+// CHECK: #loc[[#LOC11:]] = loc({{.+}}:33:5)
+
+// CHECK: #loc[[#LOC23:]] = loc({{.+}}:40:5)
+// CHECK: #loc[[#LOC21:]] = loc({{.+}}:38:5)
+// CHECK: #loc[[#LOC22:]] = loc({{.+}}:38:7)
+
+// CHECK: #loc[[#LOC33:]] = loc({{.+}}:45:7)
+// CHECK: #loc[[#LOC31:]] = loc({{.+}}:43:5)
+// CHECK: #loc[[#LOC32:]] = loc({{.+}}:45:5)
+
+// CHECK: #loc[[#LOC44:]] = loc({{.+}}:50:7)
+// CHECK: #loc[[#LOC41:]] = loc({{.+}}:48:5)
+// CHECK: #loc[[#LOC42:]] = loc({{.+}}:48:7)
+// CHECK: #loc[[#LOC43:]] = loc({{.+}}:50:5)
+
+// CHECK: #loc[[#LOC1]] = loc(fused[#loc[[#LOC11]], #loc[[#LOC12]]])
+// CHECK: #loc[[#LOC2]] = loc(fused[#loc[[#LOC21]], #loc[[#LOC22]], #loc[[#LOC23]]])
+// CHECK: #loc[[#LOC3]] = loc(fused[#loc[[#LOC31]], #loc[[#LOC32]], #loc[[#LOC33]]])
+// CHECK: #loc[[#LOC4]] = loc(fused[#loc[[#LOC41]], #loc[[#LOC42]], #loc[[#LOC43]], #loc[[#LOC44]]])
diff --git a/clang/test/CIR/Incubator/CodeGen/static-vars.c b/clang/test/CIR/Incubator/CodeGen/static-vars.c
new file mode 100644
index 0000000000000..edd168810af57
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/static-vars.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void func1(void) {
+  // Should lower default-initialized static vars.
+  static int i;
+  // CHECK-DAG: cir.global "private" internal dso_local @func1.i = #cir.int<0> : !s32i
+
+  // Should lower constant-initialized static vars.
+  static int j = 1;
+  // CHECK-DAG: cir.global "private" internal dso_local @func1.j = #cir.int<1> : !s32i
+
+  // Should properly shadow static vars in nested scopes.
+  {
+    static int j = 2;
+    // CHECK-DAG: cir.global "private" internal dso_local @func1.j.1 = #cir.int<2> : !s32i
+  }
+  {
+    static int j = 3;
+    // CHECK-DAG: cir.global "private" internal dso_local @func1.j.2 = #cir.int<3> : !s32i
+  }
+
+  // Should lower basic static vars arithmetics.
+  j++;
+  // CHECK-DAG: %[[#V2:]] = cir.get_global @func1.j : !cir.ptr<!s32i>
+  // CHECK-DAG: %[[#V3:]] = cir.load{{.*}} %[[#V2]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-DAG: %[[#V4:]] = cir.unary(inc, %[[#V3]]) nsw : !s32i, !s32i
+  // CHECK-DAG: cir.store{{.*}} %[[#V4]], %[[#V2]] : !s32i, !cir.ptr<!s32i>
+}
+
+// Should shadow static vars on different functions.
+void func2(void) {
+  static char i;
+  // CHECK-DAG: cir.global "private" internal dso_local @func2.i = #cir.int<0> : !s8i
+  static float j;
+  // CHECK-DAG: cir.global "private" internal dso_local @func2.j = #cir.fp<0.000000e+00> : !cir.float
+}
+
+// Should const initialize static vars with constant addresses.
+void func3(void) {
+  static int var;
+  static int *constAddr = &var;
+  // CHECK-DAG: cir.global "private" internal dso_local @func3.constAddr = #cir.global_view<@func3.var> : !cir.ptr<!s32i>
+}
+
+// Should match type size in bytes between var and initializer.
+void func4(void) {
+  static char string[] = "Hello";
+  // CHECK-DAG: cir.global "private" internal dso_local @func4.string = #cir.const_array<"Hello\00" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6>
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/static-vars.cpp b/clang/test/CIR/Incubator/CodeGen/static-vars.cpp
new file mode 100644
index 0000000000000..d949936f6bff9
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/static-vars.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t1.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t1.ll %s
+
+void func1(void) {
+  // Should lower default-initialized static vars.
+  static int i;
+  // CHECK-DAG: cir.global "private" internal dso_local @_ZZ5func1vE1i = #cir.int<0> : !s32i
+
+  // Should lower constant-initialized static vars.
+  static int j = 1;
+  // CHECK-DAG: cir.global "private" internal dso_local @_ZZ5func1vE1j = #cir.int<1> : !s32i
+
+  // Should properly shadow static vars in nested scopes.
+  {
+    static int j = 2;
+    // CHECK-DAG: cir.global "private" internal dso_local @_ZZ5func1vE1j_0 = #cir.int<2> : !s32i
+  }
+  {
+    static int j = 3;
+    // CHECK-DAG: cir.global "private" internal dso_local @_ZZ5func1vE1j_1 = #cir.int<3> : !s32i
+  }
+
+  // Should lower basic static vars arithmetics.
+  j++;
+  // CHECK-DAG: %[[#V2:]] = cir.get_global @_ZZ5func1vE1j : !cir.ptr<!s32i>
+  // CHECK-DAG: %[[#V3:]] = cir.load{{.*}} %[[#V2]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-DAG: %[[#V4:]] = cir.unary(inc, %[[#V3]]) nsw : !s32i, !s32i
+  // CHECK-DAG: cir.store{{.*}} %[[#V4]], %[[#V2]] : !s32i, !cir.ptr<!s32i>
+}
+
+// Should shadow static vars on different functions.
+void func2(void) {
+  static char i;
+  // CHECK-DAG: cir.global "private" internal dso_local @_ZZ5func2vE1i = #cir.int<0> : !s8i
+  static float j;
+  // CHECK-DAG: cir.global "private" internal dso_local @_ZZ5func2vE1j = #cir.fp<0.000000e+00> : !cir.float
+}
+
+// CHECK-DAG: cir.global linkonce_odr comdat @_ZZ4testvE1c = #cir.int<0> : !s32i
+
+// LLVM-DAG: $_ZZ4testvE1c = comdat any
+// LLVM-DAG: @_ZZ4testvE1c = linkonce_odr global i32 0, comdat, align 4
+
+inline void test() { static int c; }
+// CHECK-LABEL: @_Z4testv
+// CHECK: {{%.*}} = cir.get_global @_ZZ4testvE1c : !cir.ptr<!s32i>
+void foo() { test(); }
diff --git a/clang/test/CIR/Incubator/CodeGen/static.cpp b/clang/test/CIR/Incubator/CodeGen/static.cpp
new file mode 100644
index 0000000000000..92e962dc07ae0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/static.cpp
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: cir-opt %t.cir -o - | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM
+
+class Init {
+
+public:
+  Init(bool a) ;
+  ~Init();
+private:
+  static bool _S_synced_with_stdio;
+};
+
+
+static Init __ioinit(true);
+static Init __ioinit2(false);
+
+// BEFORE:      module {{.*}} {
+// BEFORE-NEXT:   cir.func {{.*}} @_ZN4InitC1Eb(!cir.ptr<!rec_Init>, !cir.bool)
+// BEFORE-NEXT:   cir.func {{.*}} @_ZN4InitD1Ev(!cir.ptr<!rec_Init>) special_member<#cir.cxx_dtor<!rec_Init>>
+// BEFORE-NEXT:   cir.global "private" internal dso_local @_ZL8__ioinit = ctor : !rec_Init {
+// BEFORE-NEXT:     %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!rec_Init>
+// BEFORE-NEXT:     %1 = cir.const #true
+// BEFORE-NEXT:     cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!rec_Init>, !cir.bool) -> ()
+// BEFORE-NEXT:   } dtor {
+// BEFORE-NEXT:      %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!rec_Init>
+// BEFORE-NEXT:      cir.call @_ZN4InitD1Ev(%0) : (!cir.ptr<!rec_Init>) -> ()
+// BEFORE-NEXT:   } {alignment = 1 : i64, ast = #cir.var.decl.ast}
+// BEFORE:        cir.global "private" internal dso_local @_ZL9__ioinit2 = ctor : !rec_Init {
+// BEFORE-NEXT:     %0 = cir.get_global @_ZL9__ioinit2 : !cir.ptr<!rec_Init>
+// BEFORE-NEXT:     %1 = cir.const #false
+// BEFORE-NEXT:     cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!rec_Init>, !cir.bool) -> ()
+// BEFORE-NEXT:   } dtor  {
+// BEFORE-NEXT:     %0 = cir.get_global @_ZL9__ioinit2 : !cir.ptr<!rec_Init>
+// BEFORE-NEXT:     cir.call @_ZN4InitD1Ev(%0) : (!cir.ptr<!rec_Init>) -> ()
+// BEFORE-NEXT:   } {alignment = 1 : i64, ast = #cir.var.decl.ast}
+// BEFORE-NEXT: }
+
+
+// AFTER:      module {{.*}} attributes {{.*}}cir.global_ctors = [#cir.global_ctor<"__cxx_global_var_init", 65535>, #cir.global_ctor<"__cxx_global_var_init.1", 65535>]
+// AFTER-NEXT:   cir.global "private" external @__dso_handle : i8
+// AFTER-NEXT:   cir.func {{.*}} @__cxa_atexit(!cir.ptr<!cir.func<(!cir.ptr<!void>)>>, !cir.ptr<!void>, !cir.ptr<i8>)
+// AFTER-NEXT:   cir.func {{.*}} @_ZN4InitC1Eb(!cir.ptr<!rec_Init>, !cir.bool)
+// AFTER-NEXT:   cir.func {{.*}} @_ZN4InitD1Ev(!cir.ptr<!rec_Init>) special_member<#cir.cxx_dtor<!rec_Init>>
+// AFTER-NEXT:   cir.global "private" internal dso_local @_ZL8__ioinit =  #cir.zero : !rec_Init {alignment = 1 : i64, ast = #cir.var.decl.ast}
+// AFTER-NEXT:   cir.func internal private @__cxx_global_var_init()
+// AFTER-NEXT:     %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!rec_Init>
+// AFTER-NEXT:     %1 = cir.const #true
+// AFTER-NEXT:     cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!rec_Init>, !cir.bool) -> ()
+// AFTER-NEXT:     %2 = cir.get_global @_ZL8__ioinit : !cir.ptr<!rec_Init>
+// AFTER-NEXT:     %3 = cir.get_global @_ZN4InitD1Ev : !cir.ptr<!cir.func<(!cir.ptr<!rec_Init>)>>
+// AFTER-NEXT:     %4 = cir.cast bitcast %3 : !cir.ptr<!cir.func<(!cir.ptr<!rec_Init>)>> -> !cir.ptr<!cir.func<(!cir.ptr<!void>)>>
+// AFTER-NEXT:     %5 = cir.cast bitcast %2 : !cir.ptr<!rec_Init> -> !cir.ptr<!void>
+// AFTER-NEXT:     %6 = cir.get_global @__dso_handle : !cir.ptr<i8>
+// AFTER-NEXT:     cir.call @__cxa_atexit(%4, %5, %6) : (!cir.ptr<!cir.func<(!cir.ptr<!void>)>>, !cir.ptr<!void>, !cir.ptr<i8>) -> ()
+// AFTER-NEXT:     cir.return
+// AFTER:        cir.global "private" internal dso_local @_ZL9__ioinit2 =  #cir.zero : !rec_Init {alignment = 1 : i64, ast = #cir.var.decl.ast}
+// AFTER-NEXT:   cir.func internal private @__cxx_global_var_init.1()
+// AFTER-NEXT:     %0 = cir.get_global @_ZL9__ioinit2 : !cir.ptr<!rec_Init>
+// AFTER-NEXT:     %1 = cir.const #false
+// AFTER-NEXT:     cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!rec_Init>, !cir.bool) -> ()
+// AFTER-NEXT:     %2 = cir.get_global @_ZL9__ioinit2 : !cir.ptr<!rec_Init>
+// AFTER-NEXT:     %3 = cir.get_global @_ZN4InitD1Ev : !cir.ptr<!cir.func<(!cir.ptr<!rec_Init>)>>
+// AFTER-NEXT:     %4 = cir.cast bitcast %3 : !cir.ptr<!cir.func<(!cir.ptr<!rec_Init>)>> -> !cir.ptr<!cir.func<(!cir.ptr<!void>)>>
+// AFTER-NEXT:     %5 = cir.cast bitcast %2 : !cir.ptr<!rec_Init> -> !cir.ptr<!void>
+// AFTER-NEXT:     %6 = cir.get_global @__dso_handle : !cir.ptr<i8>
+// AFTER-NEXT:     cir.call @__cxa_atexit(%4, %5, %6) : (!cir.ptr<!cir.func<(!cir.ptr<!void>)>>, !cir.ptr<!void>, !cir.ptr<i8>) -> ()
+// AFTER-NEXT:     cir.return
+// AFTER:        cir.func {{.*}} @_GLOBAL__sub_I_static.cpp()
+// AFTER-NEXT:     cir.call @__cxx_global_var_init() : () -> ()
+// AFTER-NEXT:     cir.call @__cxx_global_var_init.1() : () -> ()
+// AFTER-NEXT:     cir.return
+
+// LLVM:      @__dso_handle = external global i8
+// LLVM:      @_ZL8__ioinit = internal global %class.Init zeroinitializer
+// LLVM:      @_ZL9__ioinit2 = internal global %class.Init zeroinitializer
+// LLVM:      @llvm.global_ctors = appending constant [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr null }, { i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init.1, ptr null }]
+// LLVM:      define internal void @__cxx_global_var_init()
+// LLVM-NEXT:   call void @_ZN4InitC1Eb(ptr @_ZL8__ioinit, i1 true)
+// LLVM-NEXT:   call void @__cxa_atexit(ptr @_ZN4InitD1Ev, ptr @_ZL8__ioinit, ptr @__dso_handle)
+// LLVM-NEXT:   ret void
+// LLVM:      define internal void @__cxx_global_var_init.1()
+// LLVM-NEXT:   call void @_ZN4InitC1Eb(ptr @_ZL9__ioinit2, i1 false)
+// LLVM-NEXT:   call void @__cxa_atexit(ptr @_ZN4InitD1Ev, ptr @_ZL9__ioinit2, ptr @__dso_handle)
+// LLVM-NEXT:   ret void
+// LLVM:      define void @_GLOBAL__sub_I_static.cpp()
+// LLVM-NEXT:  call void @__cxx_global_var_init()
+// LLVM-NEXT:  call void @__cxx_global_var_init.1()
+// LLVM-NEXT:  ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/static_class_ref.cpp b/clang/test/CIR/Incubator/CodeGen/static_class_ref.cpp
new file mode 100644
index 0000000000000..64cc802a97783
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/static_class_ref.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir -o - %s | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm -o - %s | FileCheck %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm -o - %s | FileCheck %s -check-prefix=OGCG
+
+// CIR: !rec_A = !cir.record<class "A" {!s32i} #cir.record.decl.ast>
+// CIR: cir.global "private" constant external @_ZN1B1AE : !cir.ptr<!rec_A> {alignment = 8 : i64}
+
+// LLVM: @_ZN1B1AE = external local_unnamed_addr constant ptr, align 8
+// OGCG: @_ZN1B1AE = external local_unnamed_addr constant ptr, align 8
+class A { int p = 1;};
+class B {
+public:
+  static A &A;
+};
+A& ref() {
+  // CIR-LABEL: _Z3refv
+  // CIR: [[GLOBAL:%.*]] = cir.get_global @_ZN1B1AE : !cir.ptr<!cir.ptr<!rec_A>>
+  // CIR: [[LD1:%.*]] = cir.load [[GLOBAL]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+  // CIR: cir.store{{.*}} [[LD1]], [[ALLOCA:%.*]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+  // CIR: [[LD2:%.*]] = cir.load{{.*}} [[ALLOCA:%.*]]: !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+  // CIR: cir.return [[LD2]] : !cir.ptr<!rec_A>
+
+  // LLVM-LABEL: _Z3refv
+  // LLVM: [[LD:%.*]] = load ptr, ptr @_ZN1B1AE
+  // LLVM-NEXT: ret ptr [[LD]]
+
+  // OGCG-LABEL: _Z3refv
+  // OGCG: [[LD:%.*]] = load ptr, ptr @_ZN1B1AE
+  // OGCG-NEXT: ret ptr [[LD]]
+  return B::A;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/std-array.cpp b/clang/test/CIR/Incubator/CodeGen/std-array.cpp
new file mode 100644
index 0000000000000..cd9cc37a93367
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/std-array.cpp
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+void t() {
+  std::array<unsigned char, 9> v = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+  (void)v.end();
+}
+
+// CHECK: ![[array:.*]] = !cir.record<struct "std::array<unsigned char, 9U>"
+
+// CHECK: cir.call @_ZNSt5arrayIhLj9EE3endEv
diff --git a/clang/test/CIR/Incubator/CodeGen/std-find.cpp b/clang/test/CIR/Incubator/CodeGen/std-find.cpp
new file mode 100644
index 0000000000000..6e754185f1182
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/std-find.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+// CHECK: ![[array:.*]] = !cir.record<struct "std::array<unsigned char, 9U>"
+
+int test_find(unsigned char n = 3)
+{
+    // CHECK: cir.func {{.*}} @_Z9test_findh(%arg0: !u8i
+    unsigned num_found = 0;
+    std::array<unsigned char, 9> v = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    // CHECK: %[[array_addr:.*]] = cir.alloca ![[array]], !cir.ptr<![[array]]>, ["v", init]
+
+    auto f = std::find(v.begin(), v.end(), n);
+    // CHECK: {{.*}} cir.call @_ZNSt5arrayIhLj9EE5beginEv(%[[array_addr]])
+    // CHECK: {{.*}} cir.call @_ZNSt5arrayIhLj9EE3endEv(%[[array_addr]])
+    // CHECK: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+
+    if (f != v.end())
+        num_found++;
+    // CHECK: cir.call @_ZNSt5arrayIhLj9EE3endEv(%[[array_addr]]
+    // CHECK: %[[neq_cmp:.*]] = cir.cmp
+    // CHECK: cir.if %[[neq_cmp]]
+
+    return num_found;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/stmt-expr.c b/clang/test/CIR/Incubator/CodeGen/stmt-expr.c
new file mode 100644
index 0000000000000..cad7fc7eb1d6f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/stmt-expr.c
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Yields void.
+void test1() { ({ }); }
+// CHECK: @test1
+//     CHECK-NEXT: cir.return
+
+
+// Yields an out-of-scope scalar.
+void test2() { ({int x = 3; x; }); }
+// CHECK: @test2
+// CHECK: %[[#RETVAL:]] = cir.alloca !s32i, !cir.ptr<!s32i>
+// CHECK: cir.scope {
+// CHECK:   %[[#VAR:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+//          [...]
+// CHECK:   %[[#TMP:]] = cir.load{{.*}} %[[#VAR]] : !cir.ptr<!s32i>, !s32i
+// CHECK:   cir.store{{.*}} %[[#TMP]], %[[#RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CHECK: }
+// CHECK: %{{.+}} = cir.load{{.*}} %[[#RETVAL]] : !cir.ptr<!s32i>, !s32i
+
+// Yields an aggregate.
+struct S { int x; };
+int test3() { return ({ struct S s = {1}; s; }).x; }
+// CHECK: @test3
+// CHECK: cir.scope {
+// CHECK: %[[#REF_TMP:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["ref.tmp0"]
+// CHECK:   cir.scope {
+// CHECK:     %[[#VAR:]] = cir.alloca !rec_S, !cir.ptr<!rec_S>
+//            [...]
+// CHECK:     cir.copy %[[#VAR]] to %[[#REF_TMP]] : !cir.ptr<!rec_S>
+// CHECK:   }
+// CHECK: %[[#RETADDR:]] = cir.get_member %[[#REF_TMP]][0] {name = "x"} : !cir.ptr<!rec_S> -> !cir.ptr<!s32i>
+// CHECK: %{{.+}} = cir.load{{.*}} %[[#RETADDR]] : !cir.ptr<!s32i>, !s32i
+
+// Expression is wrapped in an expression attribute (just ensure it does not crash).
+void test4(int x) { ({[[gsl::suppress("foo")]] x;}); }
+// CHECK: @test4
+
+// TODO(cir): Missing label support.
+// // Expression is wrapped in a label.
+// // void test5(int x) { x = ({ label: x; }); }
diff --git a/clang/test/CIR/Incubator/CodeGen/stmt-expr.cpp b/clang/test/CIR/Incubator/CodeGen/stmt-expr.cpp
new file mode 100644
index 0000000000000..70ae3f3a1fe5c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/stmt-expr.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+class A {
+public:
+  A(): x(0) {}
+  A(A &a) : x(a.x) {}
+  // TODO(cir): Ensure dtors are properly called. The dtor below crashes.
+  // ~A() {}
+  int x;
+  void Foo() {}
+};
+
+void test1() {
+  ({
+    A a;
+    a;
+  }).Foo();
+}
+// CHECK: @_Z5test1v
+// CHECK: cir.scope {
+// CHECK:   %[[#RETVAL:]] = cir.alloca !rec_A, !cir.ptr<!rec_A>
+// CHECK:   cir.scope {
+// CHECK:     %[[#VAR:]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a", init] {alignment = 4 : i64}
+// CHECK:     cir.call @_ZN1AC1Ev(%[[#VAR]]) : (!cir.ptr<!rec_A>) -> ()
+// CHECK:     cir.copy %[[#VAR]] to %[[#RETVAL]] : !cir.ptr<!rec_A>
+//            TODO(cir): the local VAR should be destroyed here.
+// CHECK:   }
+// CHECK:   cir.call @_ZN1A3FooEv(%[[#RETVAL]]) : (!cir.ptr<!rec_A>) -> ()
+//          TODO(cir): the temporary RETVAL should be destroyed here.
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/stmtexpr-init.c b/clang/test/CIR/Incubator/CodeGen/stmtexpr-init.c
new file mode 100644
index 0000000000000..0e91ce5e79b65
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/stmtexpr-init.c
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// CIR: ![[annon_struct:.*]] = !cir.record<struct  {!s32i, !cir.array<!s32i x 2>}>
+// CIR: ![[sized_array:.*]] = !cir.record<struct "sized_array" {!s32i, !cir.array<!s32i x 0>}
+
+void escape(const void *);
+
+// CIR-DAG: cir.global "private" internal dso_local @T1._x = #cir.int<99> : !s8i
+// LLVM-DAG: internal global i8 99
+
+void T1(void) {
+  const char *x[1] = {({static char _x = 99; &_x; })};
+  escape(x);
+}
+
+struct sized_array {
+  int count;
+  int entries[];
+};
+
+#define N_ARGS(...) (sizeof((int[]){__VA_ARGS__}) / sizeof(int))
+
+#define ARRAY_PTR(...) ({                                                    \
+  static const struct sized_array _a = {N_ARGS(__VA_ARGS__), {__VA_ARGS__}}; \
+  &_a;                                                                       \
+})
+
+struct outer {
+  const struct sized_array *a;
+};
+
+void T2(void) {
+  // CIR-DAG: cir.global "private" constant internal dso_local @T2._a = #cir.const_record<{#cir.int<2> : !s32i, #cir.const_array<[#cir.int<50> : !s32i, #cir.int<60> : !s32i]> : !cir.array<!s32i x 2>}>
+  // LLVM-DAG: internal constant { i32, [2 x i32] } { i32 2, [2 x i32] [i32 50, i32 60] }
+  const struct sized_array *A = ARRAY_PTR(50, 60);
+
+  // CIR-DAG: cir.global "private" constant internal dso_local @T2._a.1 = #cir.const_record<{#cir.int<3> : !s32i, #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3>}>
+  // LLVM-DAG: internal constant { i32, [3 x i32] } { i32 3, [3 x i32] [i32 10, i32 20, i32 30] }
+  struct outer X = {ARRAY_PTR(10, 20, 30)};
+
+  // CIR-DAG: %[[T2A:.*]] = cir.get_global @T2._a : !cir.ptr<![[annon_struct]]>
+  // CIR-DAG: cir.cast bitcast %[[T2A]] : !cir.ptr<![[annon_struct]]> -> !cir.ptr<![[sized_array]]>
+  escape(A);
+  escape(&X);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/store.c b/clang/test/CIR/Incubator/CodeGen/store.c
new file mode 100644
index 0000000000000..47f2e201087ff
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/store.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo(void) {
+  int a = 0;
+  a = 1;
+}
+
+//      CHECK: cir.func {{.*}} @foo()
+// CHECK-NEXT:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %1 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   cir.store{{.*}} %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %2 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.store{{.*}} %2, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+typedef int (*fn_t)();
+int get42() { return 42; }
+
+void storeNoArgsFn() {
+  fn_t f = get42;
+}
+
+// CHECK:  cir.func {{.*@storeNoArgsFn}}
+// CHECK:    %0 = cir.alloca
+// CHECK:    %1 = cir.get_global @get42 : !cir.ptr<!cir.func<() -> !s32i>>
+// CHECK:    %2 = cir.cast bitcast %1 : !cir.ptr<!cir.func<() -> !s32i>> -> !cir.ptr<!cir.func<(...) -> !s32i>>
+// CHECK:    cir.store{{.*}} %2, %0 : !cir.ptr<!cir.func<(...) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(...) -> !s32i>>>
diff --git a/clang/test/CIR/Incubator/CodeGen/string-literals.c b/clang/test/CIR/Incubator/CodeGen/string-literals.c
new file mode 100644
index 0000000000000..13a866b665581
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/string-literals.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct {
+  char x[10];
+  char y[10];
+  char z[10];
+} literals = {"1", "", "\00"};
+
+// CIR-LABEL: @literals
+// CIR:  #cir.const_record<{
+// CIR:     #cir.const_array<"1" : !cir.array<!s8i x 1>, trailing_zeros> : !cir.array<!s8i x 10>,
+// CIR:     #cir.zero : !cir.array<!s8i x 10>,
+// CIR:     #cir.zero : !cir.array<!s8i x 10>
+// CIR:  }> 
+
+// LLVM-LABEL: @literals
+// LLVM:  global %struct.anon.0 {
+// LLVM:    [10 x i8] c"1\00\00\00\00\00\00\00\00\00",
+// LLVM:    [10 x i8] zeroinitializer,
+// LLVM:    [10 x i8] zeroinitializer
+// LLVM:  }
diff --git a/clang/test/CIR/Incubator/CodeGen/struct-comma.c b/clang/test/CIR/Incubator/CodeGen/struct-comma.c
new file mode 100644
index 0000000000000..cab2e9aa31ff6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/struct-comma.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct AA {int a, b;} x;
+extern int r(void);
+void a(struct AA* b) {*b = (r(), x);}
+
+// CHECK-LABEL: @a
+// CHECK: %[[ADDR:.*]] = cir.alloca {{.*}} ["b"
+// CHECK: cir.store {{.*}}, %[[ADDR]]
+// CHECK: %[[LOAD:.*]] = cir.load deref{{.*}} %[[ADDR]]
+// CHECK: cir.call @r
+// CHECK: %[[GADDR:.*]] = cir.get_global @x
+// CHECK: cir.copy %[[GADDR]] to %[[LOAD]]
diff --git a/clang/test/CIR/Incubator/CodeGen/struct-empty.c b/clang/test/CIR/Incubator/CodeGen/struct-empty.c
new file mode 100644
index 0000000000000..e36efdee19632
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/struct-empty.c
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// CIR: ![[lock:.*]] = !cir.record<struct "rwlock_t" {}>
+// CIR: ![[fs_struct:.*]] = !cir.record<struct "fs_struct" {![[lock]], !s32i}
+
+typedef struct { } rwlock_t;
+struct fs_struct { rwlock_t lock; int umask; };
+void __copy_fs_struct(struct fs_struct *fs) { fs->lock = (rwlock_t) { }; }
+
+// CIR-LABEL: __copy_fs_struct
+// CIR:   %[[VAL_1:.*]] = cir.alloca !cir.ptr<![[fs_struct]]>, !cir.ptr<!cir.ptr<![[fs_struct]]>>, ["fs", init] {alignment = 8 : i64}
+// CIR:   %[[VAL_2:.*]] = cir.alloca ![[lock]], !cir.ptr<![[lock]]>, [".compoundliteral"] {alignment = 1 : i64}
+// CIR:   cir.store {{.*}}, %[[VAL_1]] : !cir.ptr<![[fs_struct]]>, !cir.ptr<!cir.ptr<![[fs_struct]]>>
+// CIR:   %[[VAL_3:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.ptr<![[fs_struct]]>>, !cir.ptr<![[fs_struct]]>
+// CIR:   %[[VAL_4:.*]] = cir.get_member %[[VAL_3]][0] {name = "lock"} : !cir.ptr<![[fs_struct]]> -> !cir.ptr<![[lock]]>
+// CIR:   cir.copy %[[VAL_2]] to %[[VAL_4]] : !cir.ptr<![[lock]]>
+
+// LLVM-LABEL: __copy_fs_struct
+// LLVM:  %[[VAL_5:.*]] = getelementptr {{.*}}, {{.*}}, i32 0, i32 0
+// LLVM:  call void @llvm.memcpy.p0.p0.i32(ptr %[[VAL_5]], ptr {{.*}}, i32 0, i1 false)
diff --git a/clang/test/CIR/Incubator/CodeGen/struct.c b/clang/test/CIR/Incubator/CodeGen/struct.c
new file mode 100644
index 0000000000000..274c1246a36e6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/struct.c
@@ -0,0 +1,119 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Bar {
+  int a;
+  char b;
+} bar;
+
+struct Foo {
+  int a;
+  char b;
+  struct Bar z;
+};
+
+// Recursive type
+typedef struct Node {
+  struct Node* next;
+} NodeStru;
+
+void baz(void) {
+  struct Bar b;
+  struct Foo f;
+}
+
+// CHECK-DAG: !rec_Node = !cir.record<struct "Node" {!cir.ptr<!cir.record<struct "Node">>} #cir.record.decl.ast>
+// CHECK-DAG: !rec_Bar = !cir.record<struct "Bar" {!s32i, !s8i}>
+// CHECK-DAG: !rec_Foo = !cir.record<struct "Foo" {!s32i, !s8i, !rec_Bar}>
+// CHECK-DAG: !rec_SLocal = !cir.record<struct "SLocal" {!s32i}>
+// CHECK-DAG: !rec_SLocal2E0 = !cir.record<struct "SLocal.0" {!cir.float}>
+//  CHECK-DAG: module {{.*}} {
+     // CHECK:   cir.func {{.*}} @baz()
+// CHECK-NEXT:     %0 = cir.alloca !rec_Bar, !cir.ptr<!rec_Bar>, ["b"] {alignment = 4 : i64}
+// CHECK-NEXT:     %1 = cir.alloca !rec_Foo, !cir.ptr<!rec_Foo>, ["f"] {alignment = 4 : i64}
+// CHECK-NEXT:     cir.return
+// CHECK-NEXT:   }
+
+void shouldConstInitStructs(void) {
+// CHECK: cir.func {{.*}} @shouldConstInitStructs
+  struct Foo f = {1, 2, {3, 4}};
+  // CHECK: %[[#V0:]] = cir.alloca !rec_Foo, !cir.ptr<!rec_Foo>, ["f"] {alignment = 4 : i64}
+  // CHECK: %[[#V1:]] = cir.cast bitcast %[[#V0]] : !cir.ptr<!rec_Foo> -> !cir.ptr<!rec_anon_struct1>
+  // CHECK: %[[#V2:]] = cir.const #cir.const_record<{#cir.int<1> : !s32i, #cir.int<2> : !s8i,
+  // CHECK-SAME:        #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>,
+  // CHECK-SAME:        #cir.const_record<{#cir.int<3> : !s32i, #cir.int<4> : !s8i,
+  // CHECK-SAME:        #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 3>}>
+  // CHECK-SAME:        : !rec_anon_struct}> : !rec_anon_struct1
+  // CHECK: cir.store{{.*}} %[[#V2]], %[[#V1]] : !rec_anon_struct1, !cir.ptr<!rec_anon_struct1>
+}
+
+// Should zero-initialize uninitialized global structs.
+struct S {
+  int a,b;
+} s;
+// CHECK-DAG: cir.global external @s = #cir.zero : !rec_S
+
+// Should initialize basic global structs.
+struct S1 {
+  int a;
+  float f;
+  int *p;
+} s1 = {1, .1, 0};
+// CHECK-DAG: cir.global external @s1 = #cir.const_record<{#cir.int<1> : !s32i, #cir.fp<1.000000e-01> : !cir.float, #cir.ptr<null> : !cir.ptr<!s32i>}> : !rec_S1
+
+// Should initialize global nested structs.
+struct S2 {
+  struct S2A {
+    int a;
+  } s2a;
+} s2 = {{1}};
+// CHECK-DAG: cir.global external @s2 = #cir.const_record<{#cir.const_record<{#cir.int<1> : !s32i}> : !rec_S2A}> : !rec_S2
+
+// Should initialize global arrays of structs.
+struct S3 {
+  int a;
+} s3[3] = {{1}, {2}, {3}};
+// CHECK-DAG: cir.global external @s3 = #cir.const_array<[#cir.const_record<{#cir.int<1> : !s32i}> : !rec_S3, #cir.const_record<{#cir.int<2> : !s32i}> : !rec_S3, #cir.const_record<{#cir.int<3> : !s32i}> : !rec_S3]> : !cir.array<!rec_S3 x 3>
+
+void shouldCopyStructAsCallArg(struct S1 s) {
+// CHECK-DAG: cir.func {{.*}} @shouldCopyStructAsCallArg
+  shouldCopyStructAsCallArg(s);
+  // CHECK-DAG: %[[#LV:]] = cir.load{{.*}} %{{.+}} : !cir.ptr<!rec_S1>, !rec_S1
+  // CHECK-DAG: cir.call @shouldCopyStructAsCallArg(%[[#LV]]) : (!rec_S1) -> ()
+}
+
+struct Bar shouldGenerateAndAccessStructArrays(void) {
+  struct Bar s[1] = {{3, 4}};
+  return s[0];
+}
+// CHECK-DAG: cir.func {{.*}} @shouldGenerateAndAccessStructArrays
+// CHECK-DAG: %[[#STRIDE:]] = cir.const #cir.int<0> : !s32i
+// CHECK-DAG: %[[#ELT:]] = cir.get_element %{{.+}}[%[[#STRIDE]]] : (!cir.ptr<!cir.array<!rec_Bar x 1>>, !s32i) -> !cir.ptr<!rec_Bar>
+// CHECK-DAG: cir.copy %[[#ELT]] to %{{.+}} : !cir.ptr<!rec_Bar>
+
+// CHECK-DAG: cir.func {{.*}} @local_decl
+// CHECK-DAG: {{%.}} = cir.alloca !rec_Local, !cir.ptr<!rec_Local>, ["a"]
+void local_decl(void) {
+  struct Local {
+    int i;
+  };
+  struct Local a;
+}
+
+// CHECK-DAG: cir.func {{.*}} @useRecursiveType
+// CHECK-DAG: cir.get_member {{%.}}[0] {name = "next"} : !cir.ptr<!rec_Node> -> !cir.ptr<!cir.ptr<!rec_Node>>
+void useRecursiveType(NodeStru* a) {
+  a->next = 0;
+}
+
+// CHECK-DAG: cir.alloca !rec_SLocal, !cir.ptr<!rec_SLocal>, ["loc", init] {alignment = 4 : i64}
+// CHECK-DAG: cir.scope {
+// CHECK-DAG:   cir.alloca !rec_SLocal2E0, !cir.ptr<!rec_SLocal2E0>, ["loc", init] {alignment = 4 : i64}
+void local_structs(int a, float b) {
+  struct SLocal { int x; };
+  struct SLocal loc = {a};
+  {
+    struct SLocal { float y; };
+    struct SLocal loc = {b};
+  }
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/struct.cpp b/clang/test/CIR/Incubator/CodeGen/struct.cpp
new file mode 100644
index 0000000000000..deb4d174c6fb6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/struct.cpp
@@ -0,0 +1,230 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Bar {
+  int a;
+  char b;
+  void method() {}
+  void method2(int a) {}
+  int method3(int a) { return a; }
+};
+
+struct Foo {
+  int a;
+  char b;
+  Bar z;
+};
+
+void baz() {
+  Bar b;
+  b.method();
+  b.method2(4);
+  int result = b.method3(4);
+  Foo f;
+}
+
+struct incomplete;
+void yoyo(incomplete *i) {}
+
+//  CHECK-DAG: !rec_incomplete = !cir.record<struct "incomplete" incomplete
+//  CHECK-DAG: !rec_Bar = !cir.record<struct "Bar" {!s32i, !s8i}>
+
+//  CHECK-DAG: !rec_Foo = !cir.record<struct "Foo" {!s32i, !s8i, !rec_Bar}>
+//  CHECK-DAG: !rec_Mandalore = !cir.record<struct "Mandalore" {!u32i, !cir.ptr<!void>, !s32i} #cir.record.decl.ast>
+//  CHECK-DAG: !rec_Adv = !cir.record<class "Adv" {!rec_Mandalore}>
+//  CHECK-DAG: !rec_Entry = !cir.record<struct "Entry" {!cir.ptr<!cir.func<(!s32i, !cir.ptr<!s8i>, !cir.ptr<!void>) -> !u32i>>}>
+
+//      CHECK: cir.func {{.*}} @_ZN3Bar6methodEv(%arg0: !cir.ptr<!rec_Bar>
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!rec_Bar>, !cir.ptr<!cir.ptr<!rec_Bar>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_Bar>, !cir.ptr<!cir.ptr<!rec_Bar>>
+// CHECK-NEXT:   %1 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_Bar>>, !cir.ptr<!rec_Bar>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+//      CHECK: cir.func {{.*}} @_ZN3Bar7method2Ei(%arg0: !cir.ptr<!rec_Bar> {{.*}}, %arg1: !s32i
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!rec_Bar>, !cir.ptr<!cir.ptr<!rec_Bar>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_Bar>, !cir.ptr<!cir.ptr<!rec_Bar>>
+// CHECK-NEXT:   cir.store{{.*}} %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %2 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_Bar>>, !cir.ptr<!rec_Bar>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+//      CHECK: cir.func {{.*}} @_ZN3Bar7method3Ei(%arg0: !cir.ptr<!rec_Bar> {{.*}}, %arg1: !s32i
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!rec_Bar>, !cir.ptr<!cir.ptr<!rec_Bar>>, ["this", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_Bar>, !cir.ptr<!cir.ptr<!rec_Bar>>
+// CHECK-NEXT:   cir.store{{.*}} %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %3 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_Bar>>, !cir.ptr<!rec_Bar>
+// CHECK-NEXT:   %4 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.store{{.*}} %4, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   %5 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.return %5
+// CHECK-NEXT: }
+
+//      CHECK: cir.func {{.*}} @_Z3bazv()
+// CHECK-NEXT:   %0 = cir.alloca !rec_Bar, !cir.ptr<!rec_Bar>, ["b"] {alignment = 4 : i64}
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["result", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %2 = cir.alloca !rec_Foo, !cir.ptr<!rec_Foo>, ["f"] {alignment = 4 : i64}
+// CHECK-NEXT:   cir.call @_ZN3Bar6methodEv(%0) : (!cir.ptr<!rec_Bar>) -> ()
+// CHECK-NEXT:   %3 = cir.const #cir.int<4> : !s32i
+// CHECK-NEXT:   cir.call @_ZN3Bar7method2Ei(%0, %3) : (!cir.ptr<!rec_Bar>, !s32i) -> ()
+// CHECK-NEXT:   %4 = cir.const #cir.int<4> : !s32i
+// CHECK-NEXT:   %5 = cir.call @_ZN3Bar7method3Ei(%0, %4) : (!cir.ptr<!rec_Bar>, !s32i) -> !s32i
+// CHECK-NEXT:   cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+typedef enum Ways {
+  ThisIsTheWay = 1000024001,
+} Ways;
+
+typedef struct Mandalore {
+    Ways             w;
+    const void*      n;
+    int              d;
+} Mandalore;
+
+class Adv {
+  Mandalore x{ThisIsTheWay};
+public:
+  Adv() {}
+};
+
+void m() { Adv C; }
+
+// CHECK: cir.func {{.*}} @_ZN3AdvC2Ev(%arg0: !cir.ptr<!rec_Adv>
+// CHECK:     %0 = cir.alloca !cir.ptr<!rec_Adv>, !cir.ptr<!cir.ptr<!rec_Adv>>, ["this", init] {alignment = 8 : i64}
+// CHECK:     cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_Adv>, !cir.ptr<!cir.ptr<!rec_Adv>>
+// CHECK:     %1 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_Adv>>, !cir.ptr<!rec_Adv>
+// CHECK:     %2 = cir.get_member %1[0] {name = "x"} : !cir.ptr<!rec_Adv> -> !cir.ptr<!rec_Mandalore>
+// CHECK:     %3 = cir.get_member %2[0] {name = "w"} : !cir.ptr<!rec_Mandalore> -> !cir.ptr<!u32i>
+// CHECK:     %4 = cir.const #cir.int<1000024001> : !u32i
+// CHECK:     cir.store{{.*}} %4, %3 : !u32i, !cir.ptr<!u32i>
+// CHECK:     %5 = cir.get_member %2[1] {name = "n"} : !cir.ptr<!rec_Mandalore> -> !cir.ptr<!cir.ptr<!void>>
+// CHECK:     %6 = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CHECK:     cir.store{{.*}} %6, %5 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK:     %7 = cir.get_member %2[2] {name = "d"} : !cir.ptr<!rec_Mandalore> -> !cir.ptr<!s32i>
+// CHECK:     %8 = cir.const #cir.int<0> : !s32i
+// CHECK:     cir.store{{.*}} %8, %7 : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.return
+// CHECK:   }
+
+struct A {
+  int a;
+};
+
+// Should globally const-initialize struct members.
+struct A simpleConstInit = {1};
+// CHECK: cir.global external @simpleConstInit = #cir.const_record<{#cir.int<1> : !s32i}> : !rec_A
+
+// Should globally const-initialize arrays with struct members.
+struct A arrConstInit[1] = {{1}};
+// CHECK: cir.global external @arrConstInit = #cir.const_array<[#cir.const_record<{#cir.int<1> : !s32i}> : !rec_A]> : !cir.array<!rec_A x 1>
+
+// Should globally const-initialize empty structs with a non-trivial constexpr
+// constructor (as undef, to match existing clang CodeGen behavior).
+struct NonTrivialConstexprConstructor {
+  constexpr NonTrivialConstexprConstructor() {}
+} nonTrivialConstexprConstructor;
+// CHECK: cir.global external @nonTrivialConstexprConstructor = #cir.undef : !rec_NonTrivialConstexprConstructor {alignment = 1 : i64}
+// CHECK-NOT: @__cxx_global_var_init
+
+// Should locally copy struct members.
+void shouldLocallyCopyStructAssignments(void) {
+  struct A a = { 3 };
+  // CHECK: %[[#SA:]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a", init] {alignment = 4 : i64}
+  struct A b = a;
+  // CHECK: %[[#SB:]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["b", init] {alignment = 4 : i64}
+  // cir.copy %[[#SA]] to %[[SB]] : !cir.ptr<!rec_A>
+}
+
+A get_default() { return A{2}; }
+
+struct S {
+  S(A a = get_default());
+};
+
+void h() { S s; }
+
+// CHECK: cir.func {{.*}} @_Z1hv()
+// CHECK:   %0 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s", init] {alignment = 1 : i64}
+// CHECK:   %1 = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["agg.tmp0"] {alignment = 4 : i64}
+// CHECK:   %2 = cir.call @_Z11get_defaultv() : () -> !rec_A
+// CHECK:   cir.store{{.*}} %2, %1 : !rec_A, !cir.ptr<!rec_A>
+// CHECK:   %3 = cir.load{{.*}} %1 : !cir.ptr<!rec_A>, !rec_A
+// CHECK:   cir.call @_ZN1SC1E1A(%0, %3) : (!cir.ptr<!rec_S>, !rec_A) -> ()
+// CHECK:   cir.return
+// CHECK: }
+
+typedef enum enumy {
+  A = 1
+} enumy;
+
+typedef enumy (*fnPtr)(int instance, const char* name, void* function);
+
+struct Entry {
+  fnPtr procAddr = nullptr;
+};
+
+void ppp() { Entry x; }
+
+// CHECK: cir.func {{.*}} @_ZN5EntryC2Ev(%arg0: !cir.ptr<!rec_Entry>
+
+// CHECK: cir.get_member %1[0] {name = "procAddr"} : !cir.ptr<!rec_Entry> -> !cir.ptr<!cir.ptr<!cir.func<(!s32i, !cir.ptr<!s8i>, !cir.ptr<!void>) -> !u32i>>>
+
+struct CompleteS {
+  int a;
+  char b;
+};
+
+void designated_init_update_expr() {
+  CompleteS a;
+
+  struct Container {
+    CompleteS c;
+  } b = {a, .c.a = 1};
+}
+
+// CHECK: %[[A_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["a"]
+// CHECK: %[[B_ADDR:.*]] = cir.alloca !rec_Container, !cir.ptr<!rec_Container>, ["b", init]
+// CHECK: %[[C_ADDR:.*]] = cir.get_member %[[B_ADDR]][0] {name = "c"} : !cir.ptr<!rec_Container> -> !cir.ptr<!rec_CompleteS>
+// CHECK: cir.copy %[[A_ADDR]] to %[[C_ADDR]] : !cir.ptr<!rec_CompleteS>
+// CHECK: %[[ELEM_0_PTR:.*]] = cir.get_member %[[C_ADDR]][0] {name = "a"} : !cir.ptr<!rec_CompleteS> -> !cir.ptr<!s32i>
+// CHECK: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store{{.*}} %[[CONST_1]], %[[ELEM_0_PTR]] : !s32i, !cir.ptr<!s32i>
+// CHECK: %[[ELEM_1_PTR:.*]] = cir.get_member %[[C_ADDR]][1] {name = "b"} : !cir.ptr<!rec_CompleteS> -> !cir.ptr<!s8i>
+
+void unary_extension() {
+  CompleteS a = __extension__ CompleteS();
+}
+
+// CHECK: %[[A_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["a", init]
+// CHECK: %[[ZERO_INIT:.*]] = cir.const #cir.zero : !rec_CompleteS
+// CHECK: cir.store{{.*}} %[[ZERO_INIT]], %[[A_ADDR]] : !rec_CompleteS, !cir.ptr<!rec_CompleteS>
+
+void generic_selection() {
+  CompleteS a;
+  CompleteS b;
+  int c;
+  CompleteS d = _Generic(c, int : a, default: b);
+}
+
+// CHECK: %[[A_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["a"]
+// CHECK: %[[B_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["b"]
+// CHECK: %[[C_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["c"]
+// CHECK: %[[D_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["d", init]
+// CHECK: cir.copy %[[A_ADDR]] to %[[D_ADDR]] : !cir.ptr<!rec_CompleteS>
+
+void choose_expr() {
+  CompleteS a;
+  CompleteS b;
+  CompleteS c = __builtin_choose_expr(true, a, b);
+}
+
+// CHECK: cir.func{{.*}} @_Z11choose_exprv()
+// CHECK:   %[[A_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["a"]
+// CHECK:   %[[B_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["b"]
+// CHECK:   %[[C_ADDR:.*]] = cir.alloca !rec_CompleteS, !cir.ptr<!rec_CompleteS>, ["c", init]
+// CHECK:   cir.copy %[[A_ADDR]] to %[[C_ADDR]] : !cir.ptr<!rec_CompleteS>
diff --git a/clang/test/CIR/Incubator/CodeGen/structural-binding.cpp b/clang/test/CIR/Incubator/CodeGen/structural-binding.cpp
new file mode 100644
index 0000000000000..eaf95ce34ce97
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/structural-binding.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+struct B { B(); };
+
+struct A {
+  B a;
+  int b;
+  char c;
+};
+
+struct C {
+  C(int a, int b): a(a), b(b) {}
+  template <unsigned>
+  friend const int &get(const C&);
+ private:
+  int a;
+  int b;
+};
+
+template <>
+const int &get<0>(const C& c) { return c.a; }
+template <>
+const int &get<1>(const C& c) { return c.b; }
+
+namespace std {
+
+template <typename>
+struct tuple_size;
+
+template <>
+struct tuple_size<C> { constexpr inline static unsigned value = 2; };
+
+template <unsigned, typename>
+struct tuple_element;
+
+template <unsigned I>
+struct tuple_element<I, C> { using type = const int; };
+
+}
+
+
+// binding to data members
+void f(A &a) {
+  // CIR: @_Z1fR1A
+  // LLVM: @_Z1fR1A
+
+  auto &[x, y, z] = a;
+  (x, y, z);
+  // CIR: %[[a:.*]] = cir.load %1 : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+  // CIR: {{.*}} = cir.get_member %[[a]][0] {name = "a"} : !cir.ptr<!rec_A> -> !cir.ptr<!rec_B>
+  // CIR: %[[a:.*]] = cir.load %1 : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+  // CIR: {{.*}} = cir.get_member %[[a]][2] {name = "b"} : !cir.ptr<!rec_A> -> !cir.ptr<!s32i>
+  // CIR: %[[a:.*]] = cir.load %1 : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+  // CIR: {{.*}} = cir.get_member %[[a]][3] {name = "c"} : !cir.ptr<!rec_A> -> !cir.ptr<!s8i>
+  // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 0
+  // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 2
+  // LLVM: {{.*}} = getelementptr %struct.A, ptr {{.*}}, i32 0, i32 3
+
+  auto [x2, y2, z2] = a;
+  (x2, y2, z2);
+  // CIR: cir.copy %[[a:.*]] to %2 : !cir.ptr<!rec_A>
+  // CIR: {{.*}} = cir.get_member %2[0] {name = "a"} : !cir.ptr<!rec_A> -> !cir.ptr<!rec_B>
+  // CIR: {{.*}} = cir.get_member %2[2] {name = "b"} : !cir.ptr<!rec_A> -> !cir.ptr<!s32i>
+  // CIR: {{.*}} = cir.get_member %2[3] {name = "c"} : !cir.ptr<!rec_A> -> !cir.ptr<!s8i>
+
+  // for the rest, just expect the codegen does't crash
+  auto &&[x3, y3, z3] = a;
+  (x3, y3, z3);
+
+  const auto &[x4, y4, z4] = a;
+  (x4, y4, z4);
+
+  const auto [x5, y5, z5] = a;
+  (x5, y5, z5);
+}
+
+// binding to a tuple-like type
+void g(C &c) {
+  // CIR: @_Z1gR1C
+  // LLVM: @_Z1gR1C
+
+  auto [x8, y8] = c;
+  (x8, y8);
+  // CIR: cir.copy %7 to %[[c:.*]] : !cir.ptr<!rec_C>
+  // CIR: %[[x8:.*]] = cir.call @_Z3getILj0EERKiRK1C(%[[c]]) : (!cir.ptr<!rec_C>) -> !cir.ptr<!s32i>
+  // CIR: cir.store{{.*}} %[[x8]], %[[x8p:.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: %[[x9:.*]] = cir.call @_Z3getILj1EERKiRK1C(%[[c]]) : (!cir.ptr<!rec_C>) -> !cir.ptr<!s32i>
+  // CIR: cir.store{{.*}} %[[x9]], %[[x9p:.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: {{.*}} = cir.load %[[x8p]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CIR: {{.*}} = cir.load %[[x9p]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // LLVM: call void @llvm.memcpy.p0.p0.i32(ptr {{.*}}, ptr {{.*}}, i32 8, i1 false)
+  // LLVM: {{.*}} = call ptr @_Z3getILj0EERKiRK1C(ptr {{.*}})
+  // LLVM: {{.*}} = call ptr @_Z3getILj1EERKiRK1C(ptr {{.*}})
+
+  auto &[x9, y9] = c;
+  (x9, y9);
+  // CIR: cir.store{{.*}} %12, %[[cp:.*]] : !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>
+  // CIR: %[[c:.*]] = cir.load %[[cp]] : !cir.ptr<!cir.ptr<!rec_C>>, !cir.ptr<!rec_C>
+  // CIR: %[[x8:.*]] = cir.call @_Z3getILj0EERKiRK1C(%[[c]]) : (!cir.ptr<!rec_C>) -> !cir.ptr<!s32i>
+  // CIR: cir.store{{.*}} %[[x8]], %[[x8p:.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: %[[c:.*]] = cir.load %[[cp]] : !cir.ptr<!cir.ptr<!rec_C>>, !cir.ptr<!rec_C>
+  // CIR: %[[x9:.*]] = cir.call @_Z3getILj1EERKiRK1C(%[[c]]) : (!cir.ptr<!rec_C>) -> !cir.ptr<!s32i>
+  // CIR: cir.store{{.*}} %[[x9]], %[[x9p:.*]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  // CIR: {{.*}} = cir.load %[[x8p]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CIR: {{.*}} = cir.load %[[x9p]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> 
+}
+
+// TODO: add test case for binding to an array type
+// after ArrayInitLoopExpr is supported
diff --git a/clang/test/CIR/Incubator/CodeGen/switch-gnurange.cpp b/clang/test/CIR/Incubator/CodeGen/switch-gnurange.cpp
new file mode 100644
index 0000000000000..72f89488ef6c3
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/switch-gnurange.cpp
@@ -0,0 +1,348 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+enum letter {
+ A, B, C, D, E, F, G, H, I, J, L
+};
+
+int sw1(enum letter c) {
+  switch (c) {
+    case A ... C:
+    case D:
+    case E ... F:
+    case G ... L:
+      return 1;
+    default:
+      return 0;
+  }
+}
+
+//      CIR:  cir.func {{.*}} @_Z3sw16letter
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      cir.case(range, [#cir.int<0> : !s32i, #cir.int<2> : !s32i]) {
+// CIR-NEXT:        cir.case(equal, [#cir.int<3> : !s32i]) {
+// CIR-NEXT:          cir.case(range, [#cir.int<4> : !s32i, #cir.int<5> : !s32i]) {
+// CIR-NEXT:            cir.case(range, [#cir.int<6> : !s32i, #cir.int<10> : !s32i]) {
+//      CIR:              cir.int<1>
+//      CIR:              cir.return
+//      CIR:          cir.yield
+//      CIR:        cir.yield
+//      CIR:      cir.yield
+//      CIR:      cir.case(default, []) {
+// CIR-NEXT:        cir.int<0>
+//      CIR:        cir.return
+
+//      LLVM:  @_Z3sw16letter
+//      LLVM:    switch i32 %[[C:[0-9]+]], label %[[DEFAULT:[0-9]+]] [
+// LLVM-NEXT:      i32 3, label %[[CASE_3:[0-9]+]]
+// LLVM-NEXT:      i32 0, label %[[CASE_0_2:[0-9]+]]
+// LLVM-NEXT:      i32 1, label %[[CASE_0_2]]
+// LLVM-NEXT:      i32 2, label %[[CASE_0_2]]
+// LLVM-NEXT:      i32 4, label %[[CASE_4_5:[0-9]+]]
+// LLVM-NEXT:      i32 5, label %[[CASE_4_5]]
+// LLVM-NEXT:      i32 6, label %[[CASE_6_10:[0-9]+]]
+// LLVM-NEXT:      i32 7, label %[[CASE_6_10]]
+// LLVM-NEXT:      i32 8, label %[[CASE_6_10]]
+// LLVM-NEXT:      i32 9, label %[[CASE_6_10]]
+// LLVM-NEXT:      i32 10, label %[[CASE_6_10]]
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE_0_2]]:
+//      LLVM:    br label %[[CASE_3]]
+//      LLVM:  [[CASE_3]]:
+//      LLVM:    br label %[[CASE_4_5]]
+//      LLVM:  [[CASE_4_5]]:
+//      LLVM:    br label %[[CASE_6_10]]
+//      LLVM:  [[CASE_6_10]]:
+//      LLVM:    store i32 1
+//      LLVM:    ret
+//      LLVM:  [[DEFAULT]]:
+//      LLVM:    store i32 0
+//      LLVM:    ret
+
+
+int sw2(enum letter c) {
+  switch (c) {
+    case A ... C:
+    case L ... A:
+      return 1;
+    default:
+      return 0;
+  }
+}
+
+//      CIR:  cir.func {{.*}} @_Z3sw26letter
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      cir.case(range, [#cir.int<0> : !s32i, #cir.int<2> : !s32i]) {
+//      CIR:          cir.case(range, [#cir.int<10> : !s32i, #cir.int<0> : !s32i]) {
+//      CIR:        cir.return
+// CIR-NEXT:      }
+//      CIR:      cir.case(default, []) {
+//      CIR:        cir.return
+// CIR-NEXT:      }
+
+//      LLVM:  @_Z3sw26letter
+//      LLVM:    switch i32 %[[C:[0-9]+]], label %[[DEFAULT:[0-9]+]] [
+// LLVM-NEXT:      i32 0, label %[[CASE:[0-9]+]]
+// LLVM-NEXT:      i32 1, label %[[CASE]]
+// LLVM-NEXT:      i32 2, label %[[CASE]]
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE]]:
+//      LLVM:    br label %[[IMPL:[0-9]+]]
+//      LLVM:  [[IMPL]]:
+//      LLVM:    store i32 1
+//      LLVM:    ret
+//      LLVM:  [[DEFAULT]]:
+//      LLVM:    store i32 0
+//      LLVM:    ret
+
+void sw3(enum letter c) {
+  int x = 0;
+  switch (c) {
+  case A ... C:
+    x = 1;
+    break;
+  case D ... F:
+    x = 2;
+    break;
+  case G ... I:
+    x = 3;
+    break;
+  case J ... L:
+    x = 4;
+    break;
+  }
+}
+
+//      CIR:  cir.func {{.*}} @_Z3sw36letter
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      cir.case(range, [#cir.int<0> : !s32i, #cir.int<2> : !s32i]) {
+// CIR-NEXT:        cir.int<1>
+//      CIR:        cir.break
+// CIR-NEXT:      }
+//      CIR:      cir.case(range, [#cir.int<3> : !s32i, #cir.int<5> : !s32i]) {
+// CIR-NEXT:        cir.int<2>
+//      CIR:        cir.break
+// CIR-NEXT:      }
+//      CIR:      cir.case(range, [#cir.int<6> : !s32i, #cir.int<8> : !s32i]) {
+// CIR-NEXT:        cir.int<3>
+//      CIR:        cir.break
+// CIR-NEXT:      }
+//      CIR:      cir.case(range, [#cir.int<9> : !s32i, #cir.int<10> : !s32i]) {
+// CIR-NEXT:        cir.int<4>
+//      CIR:        cir.break
+// CIR-NEXT:      }
+
+//      LLVM:  @_Z3sw36letter
+//      LLVM:    switch i32 %[[C:[0-9]+]], label %[[DEFAULT:[0-9]+]] [
+// LLVM-NEXT:      i32 0, label %[[CASE_AC:[0-9]+]]
+// LLVM-NEXT:      i32 1, label %[[CASE_AC]]
+// LLVM-NEXT:      i32 2, label %[[CASE_AC]]
+// LLVM-NEXT:      i32 3, label %[[CASE_DF:[0-9]+]]
+// LLVM-NEXT:      i32 4, label %[[CASE_DF]]
+// LLVM-NEXT:      i32 5, label %[[CASE_DF]]
+// LLVM-NEXT:      i32 6, label %[[CASE_GI:[0-9]+]]
+// LLVM-NEXT:      i32 7, label %[[CASE_GI]]
+// LLVM-NEXT:      i32 8, label %[[CASE_GI]]
+// LLVM-NEXT:      i32 9, label %[[CASE_JL:[0-9]+]]
+// LLVM-NEXT:      i32 10, label %[[CASE_JL]]
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE_AC]]:
+//      LLVM:    store i32 1, ptr %[[X:[0-9]+]]
+//      LLVM:    br label %[[EPILOG:[0-9]+]]
+//      LLVM:  [[CASE_DF]]:
+//      LLVM:    store i32 2, ptr %[[X]]
+//      LLVM:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_GI]]:
+//      LLVM:    store i32 3, ptr %[[X]]
+//      LLVM:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_JL]]:
+//      LLVM:    store i32 4, ptr %[[X]]
+//      LLVM:    br label %[[EPILOG]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
+
+void sw4(int x) {
+  switch (x) {
+    case 66 ... 233:
+      break;
+    case -50 ... 50:
+      break;
+  }
+}
+
+//      CIR:  cir.func {{.*}} @_Z3sw4i
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      cir.case(range, [#cir.int<66> : !s32i, #cir.int<233> : !s32i]) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+//      CIR:      cir.case(range, [#cir.int<-50> : !s32i, #cir.int<50> : !s32i]) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+
+
+//      LLVM:  @_Z3sw4i
+//      LLVM:    switch i32 %[[X:[0-9]+]], label %[[JUDGE_NEG50_50:[0-9]+]] [
+// LLVM-NEXT:    ]
+//      LLVM:  [[UNREACHABLE_BB:[0-9]+]]: {{.*}} No predecessors!
+// LLVM-NEXT:    br label
+//      LLVM:  [[CASE_66_233:[0-9]+]]:
+// LLVM-NEXT:    br label %[[EPILOG:[0-9]+]]
+//      LLVM:  [[CASE_NEG50_50:[0-9]+]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[JUDGE_NEG50_50]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], -50
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 100
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_NEG50_50]], label %[[JUDGE_66_233:[0-9]+]]
+//      LLVM:  [[JUDGE_66_233]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], 66
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 167
+//      LLVM:    br i1 %[[DIFF_CMP]], label %[[CASE_66_233]], label %[[EPILOG]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
+
+void sw5(int x) {
+  int y = 0;
+  switch (x) {
+    case 100 ... -100:
+      y = 1;
+  }
+}
+
+//      CIR:  cir.func {{.*}} @_Z3sw5i
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      cir.case(range, [#cir.int<100> : !s32i, #cir.int<-100> : !s32i]) {
+// CIR-NEXT:        cir.int<1>
+//      CIR:        cir.yield
+// CIR-NEXT:      }
+
+//      LLVM:  @_Z3sw5i
+//      LLVM:    switch i32 %[[X:[0-9]+]], label %[[EPILOG:[0-9]+]] [
+// LLVM-NEXT:    ]
+//      LLVM:  [[UNREACHABLE_BB:[0-9]+]]: {{.*}} No predecessors!
+// LLVM-NEXT:    br label
+//      LLVM:  [[CASE_100_NEG100:[0-9]+]]:
+// LLVM-NEXT:    store i32 1, ptr %[[Y:[0-9]+]]
+// LLVM-NEXT:    br label %[[EPILOG_PRED:.+]]
+//      LLVM:  [[EPILOG_PRED:[0-9]+]]:
+//      LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
+
+void sw6(int x) {
+  int y = 0;
+  switch (x) {
+    case -2147483648 ... 2147483647:
+      y = 1;
+  }
+}
+
+//      CIR:  cir.func {{.*}} @_Z3sw6i
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      cir.case(range, [#cir.int<-2147483648> : !s32i, #cir.int<2147483647> : !s32i]) {
+// CIR-NEXT:        cir.int<1>
+//      CIR:        cir.yield
+// CIR-NEXT:      }
+
+//      LLVM:  @_Z3sw6i
+//      LLVM:    switch i32 %[[X:[0-9]+]], label %[[DEFAULT:[0-9]+]] [
+// LLVM-NEXT:    ]
+//      LLVM:  [[UNREACHABLE_BB:[0-9]+]]: {{.*}} No predecessors!
+// LLVM-NEXT:    br label
+//      LLVM:  [[CASE_MIN_MAX:[0-9]+]]:
+// LLVM-NEXT:    store i32 1, ptr %[[Y:[0-9]+]]
+// LLVM-NEXT:    br label
+//      LLVM:  [[DEFAULT]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], -2147483648
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], -1
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_MIN_MAX]], label %[[EPILOG:[0-9]+]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
+
+void sw7(int x) {
+  switch(x) {
+  case 0:
+    break;
+  case 100 ... 200:
+    break;
+  case 1:
+    break;
+  case 300 ... 400:
+    break;
+  default:
+    break;
+  case 500 ... 600:
+    break;
+  }
+}
+
+//      CIR:  cir.func {{.*}} @_Z3sw7i
+//      CIR:    cir.scope {
+//      CIR:      cir.switch
+// CIR-NEXT:      cir.case(equal, [#cir.int<0> : !s32i]) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+// CIR-NEXT:      cir.case(range, [#cir.int<100> : !s32i, #cir.int<200> : !s32i]) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+// CIR-NEXT:      cir.case(equal, [#cir.int<1> : !s32i]) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+// CIR-NEXT:      cir.case(range, [#cir.int<300> : !s32i, #cir.int<400> : !s32i]) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+// CIR-NEXT:      cir.case(default, []) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+// CIR-NEXT:      cir.case(range, [#cir.int<500> : !s32i, #cir.int<600> : !s32i]) {
+// CIR-NEXT:        cir.break
+// CIR-NEXT:      }
+
+//      LLVM:  @_Z3sw7i
+//      LLVM:    switch i32 %[[X:[0-9]+]], label %[[JUDGE_RANGE_500_600:[0-9]+]] [
+// LLVM-NEXT:      i32 0, label %[[CASE_0:[0-9]+]]
+// LLVM-NEXT:      i32 1, label %[[CASE_1:[0-9]+]]
+// LLVM-NEXT:    ]
+//      LLVM:  [[CASE_0]]:
+// LLVM-NEXT:    br label %[[EPILOG:[0-9]+]]
+//      LLVM:  [[CASE_100_200:[0-9]+]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_1]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_300_400:[0-9]+]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[JUDGE_RANGE_500_600]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], 500
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 100
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_500_600:[0-9]+]], label %[[JUDGE_RANGE_300_400:[0-9]+]]
+//      LLVM:  [[JUDGE_RANGE_300_400]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], 300
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 100
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_300_400]], label %[[JUDGE_RANGE_100_200:[0-9]+]]
+//      LLVM:  [[JUDGE_RANGE_100_200]]:
+// LLVM-NEXT:    %[[DIFF:[0-9]+]] = sub i32 %[[X]], 100
+// LLVM-NEXT:    %[[DIFF_CMP:[0-9]+]] = icmp ule i32 %[[DIFF]], 100
+// LLVM-NEXT:    br i1 %[[DIFF_CMP]], label %[[CASE_100_200]], label %[[DEFAULT:[0-9]+]]
+//      LLVM:  [[DEFAULT]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[CASE_500_600]]:
+// LLVM-NEXT:    br label %[[EPILOG]]
+//      LLVM:  [[EPILOG]]:
+// LLVM-NEXT:    br label %[[EPILOG_END:[0-9]+]]
+//      LLVM:  [[EPILOG_END]]:
+// LLVM-NEXT:    ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/switch-unreachable-after-break.cpp b/clang/test/CIR/Incubator/CodeGen/switch-unreachable-after-break.cpp
new file mode 100644
index 0000000000000..5b72b3a02b9b5
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/switch-unreachable-after-break.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// CHECK: _Z23unreachable_after_breaki
+void unreachable_after_break(int a) {
+  switch(a) {
+  case 0:
+    break;
+    break;
+    int x = 1;
+  }
+  // cir.switch
+  //   cir.case(equal, [#cir.int<0> : !s32i]) {
+  //     cir.break
+  //   ^bb{{.*}}:  // no predecessors
+  //     cir.break
+  //   ^bb{{.*}}:  // no predecessors
+  //     %[[CONST:.*]] = cir.const #cir.int<1> : !s32i
+  //     cir.store align(4) {{.*}}, %[[CONST]]
+  //     cir.yield
+}
+
+// CHECK: _Z24unreachable_after_returni
+int unreachable_after_return(int a) {
+  switch (a) {
+  case 0:
+    return 0;
+    return 1;
+    int x = 3;
+  }
+  return 2;
+  // cir.switch
+  //   cir.case(equal, [#cir.int<0> : !s32i]) {
+  //     %[[CONST_ZERO:.*]] = cir.const #cir.int<0> : !s32i
+  //     cir.store {{.*}}, %[[CONST_ZERO]]
+  //     cir.br ^bb1
+  //   ^bb1:  // 2 preds: ^bb0, ^bb2
+  //     cir.load
+  //     cir.return
+  //   ^bb2:  // no predecessors
+  //     %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !s32i
+  //     cir.store %[[CONST_ONE]]
+  //     cir.br ^bb1
+  //   ^bb3:  // no predecessors
+  //     %[[CONST_THREE:.*]] = cir.const #cir.int<3> : !s32i
+  //     cir.store align(4) %[[CONST_THREE]]
+  //     cir.yield
+  //   }
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/switch.cpp b/clang/test/CIR/Incubator/CodeGen/switch.cpp
new file mode 100644
index 0000000000000..7cb3516937b04
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/switch.cpp
@@ -0,0 +1,381 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void sw1(int a) {
+  switch (int b = 1; a) {
+  case 0:
+    b = b + 1;
+    break;
+  case 1:
+    break;
+  case 2: {
+    b = b + 1;
+    int yolo = 100;
+    break;
+  }
+  }
+}
+// CHECK: cir.func {{.*}} @_Z3sw1i
+// CHECK: cir.switch (%3 : !s32i) {
+// CHECK-NEXT: cir.case(equal, [#cir.int<0> : !s32i]) {
+// CHECK: cir.break
+// CHECK: cir.case(equal, [#cir.int<1> : !s32i]) {
+// CHECK-NEXT: cir.break
+// CHECK: cir.case(equal, [#cir.int<2> : !s32i]) {
+// CHECK: cir.scope {
+// CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init]
+// CHECK: cir.break
+
+void sw2(int a) {
+  switch (int yolo = 2; a) {
+  case 3:
+    // "fomo" has the same lifetime as "yolo"
+    int fomo = 0;
+    yolo = yolo + fomo;
+    break;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z3sw2i
+// CHECK: cir.scope {
+// CHECK-NEXT:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init]
+// CHECK-NEXT:   %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["fomo", init]
+// CHECK:        cir.switch (%4 : !s32i) {
+// CHECK-NEXT:   cir.case(equal, [#cir.int<3> : !s32i]) {
+// CHECK-NEXT:     %5 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:     cir.store{{.*}} %5, %2 : !s32i, !cir.ptr<!s32i>
+
+void sw3(int a) {
+  switch (a) {
+  default:
+    break;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z3sw3i
+// CHECK: cir.scope {
+// CHECK-NEXT:   %1 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:   cir.switch (%1 : !s32i) {
+// CHECK-NEXT:   cir.case(default, []) {
+// CHECK-NEXT:     cir.break
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT:   }
+
+int sw4(int a) {
+  switch (a) {
+  case 42: {
+    return 3;
+  }
+  default:
+    return 2;
+  }
+  return 0;
+}
+
+// CHECK: cir.func {{.*}} @_Z3sw4i
+// CHECK:       cir.switch (%4 : !s32i) {
+// CHECK-NEXT:       cir.case(equal, [#cir.int<42> : !s32i]) {
+// CHECK-NEXT:         cir.scope {
+// CHECK-NEXT:           %5 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:           cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:           %6 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:           cir.return %6 : !s32i
+// CHECK-NEXT:         }
+// CHECK-NEXT:         cir.yield
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.case(default, []) {
+// CHECK-NEXT:         %5 = cir.const #cir.int<2> : !s32i
+// CHECK-NEXT:         cir.store{{.*}} %5, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:         %6 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:         cir.return %6 : !s32i
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.yield
+// CHECK-NEXT:       }
+
+void sw5(int a) {
+  switch (a) {
+  case 1:;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z3sw5i
+// CHECK: cir.switch (%1 : !s32i) {
+// CHECK-NEXT:   cir.case(equal, [#cir.int<1> : !s32i]) {
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT:   }
+
+void sw6(int a) {
+  switch (a) {
+  case 0:
+  case 1:
+  case 2:
+    break;
+  case 3:
+  case 4:
+  case 5:
+    break;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z3sw6i
+// CHECK: cir.switch (%1 : !s32i) {
+// CHECK-NEXT: cir.case(anyof, [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.case(anyof, [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i]) {
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: }
+
+void sw7(int a) {
+  switch (a) {
+  case 0:
+  case 1:
+  case 2:
+    int x;
+  case 3:
+  case 4:
+  case 5:
+    break;
+  }
+}
+
+// CHECK: cir.func {{.*}} @_Z3sw7i
+// CHECK: cir.case(anyof, [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.case(anyof, [#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i]) {
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: }
+
+void sw8(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  case 4:
+  default:
+    break;
+  }
+}
+
+//CHECK:    cir.func {{.*}} @_Z3sw8i
+//CHECK:      cir.case(equal, [#cir.int<3> : !s32i]) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(equal, [#cir.int<4> : !s32i]) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(default, []) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+
+void sw9(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  default:
+  case 4:
+    break;
+  }
+}
+
+//CHECK:    cir.func {{.*}} @_Z3sw9i
+//CHECK:      cir.case(equal, [#cir.int<3> : !s32i]) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(default, []) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(equal, [#cir.int<4> : !s32i]) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+
+void sw10(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  case 4:
+  default:
+  case 5:
+    break;
+  }
+}
+
+//CHECK:    cir.func {{.*}} @_Z4sw10i
+//CHECK:      cir.case(equal, [#cir.int<3> : !s32i]) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(equal, [#cir.int<4> : !s32i]) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(default, []) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(equal, [#cir.int<5> : !s32i]) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+
+void sw11(int a) {
+  switch (a)
+  {
+  case 3:
+    break;
+  case 4:
+  case 5:
+  default:
+  case 6:
+  case 7:
+    break;
+  }
+}
+
+//CHECK:    cir.func {{.*}} @_Z4sw11i
+//CHECK:      cir.case(equal, [#cir.int<3> : !s32i]) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(anyof, [#cir.int<4> : !s32i, #cir.int<5> : !s32i]) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(default, []) {
+//CHECK-NEXT:   cir.yield
+//CHECK-NEXT: }
+//CHECK-NEXT: cir.case(anyof, [#cir.int<6> : !s32i, #cir.int<7> : !s32i]) {
+//CHECK-NEXT:   cir.break
+//CHECK-NEXT: }
+
+void sw12(int a) {
+  switch (a)
+  {
+  case 3:
+    return;
+    break;
+  }
+}
+
+//      CHECK: cir.func {{.*}} @_Z4sw12i
+//      CHECK:   cir.scope {
+//      CHECK:     cir.switch
+// CHECK-NEXT:     cir.case(equal, [#cir.int<3> : !s32i]) {
+// CHECK-NEXT:       cir.return
+// CHECK-NEXT:     ^bb1:  // no predecessors
+// CHECK-NEXT:       cir.break
+// CHECK-NEXT:     }
+
+void sw13(int a, int b) {
+  switch (a) {
+  case 1:
+    switch (b) {
+    case 2:
+      break;
+    }
+  }
+}
+
+//      CHECK:  cir.func {{.*}} @_Z4sw13ii
+//      CHECK:    cir.scope {
+//      CHECK:      cir.switch
+// CHECK-NEXT:      cir.case(equal, [#cir.int<1> : !s32i]) {
+// CHECK-NEXT:        cir.scope {
+//      CHECK:          cir.switch
+// CHECK-NEXT:          cir.case(equal, [#cir.int<2> : !s32i]) {
+// CHECK-NEXT:            cir.break
+// CHECK-NEXT:          }
+// CHECK-NEXT:          cir.yield
+// CHECK-NEXT:        }
+// CHECK-NEXT:      }
+// CHECK:         cir.yield
+//      CHECK:    }
+//      CHECK:    cir.return
+
+void fallthrough(int x) {
+  switch (x) {
+    case 1:
+      __attribute__((fallthrough));
+    case 2:
+      break;
+    default:
+      break;
+  }
+}
+
+//      CHECK:  cir.func {{.*}} @_Z11fallthroughi
+//      CHECK:    cir.scope {
+//      CHECK:      cir.switch (%1 : !s32i) {
+// CHECK-NEXT:      cir.case(equal, [#cir.int<1> : !s32i]) {
+// CHECK-NEXT:        cir.yield
+// CHECK-NEXT:      }
+// CHECK-NEXT:      cir.case(equal, [#cir.int<2> : !s32i]) {
+// CHECK-NEXT:        cir.break
+// CHECK-NEXT:      }
+// CHECK-NEXT:      cir.case(default, []) {
+// CHECK-NEXT:        cir.break
+// CHECK-NEXT:      }
+// CHECK-NEXT:      cir.yield
+// CHECK-NEXT:      }
+// CHECK-NEXT:    }
+
+int unreachable_after_break_1(int a) {
+  switch (a) {
+    case(42):
+      break;
+      goto exit;
+    default:
+      return 0;
+  };
+
+exit:
+  return -1;
+
+}
+// CHECK: cir.func {{.*}} @_Z25unreachable_after_break_1i
+// CHECK:   cir.case(equal, [#cir.int<42> : !s32i]) {
+// CHECK:     cir.break
+// CHECK:   ^bb1:  // no predecessors
+// CHECK:     cir.goto "exit"
+// CHECK:   }
+
+int nested_switch(int a) {
+  switch (int b = 1; a) {
+  case 0:
+    b = b + 1;
+  case 1:
+    return b;
+  case 2: {
+    b = b + 1;
+    if (a > 1000) {
+        case 9:
+          b += a;
+    }
+    if (a > 500) {
+        case 7:
+          return a + b;
+    }
+    break;
+  }
+  }
+
+  return 0;
+}
+
+// CHECK: cir.switch (%6 : !s32i) {
+// CHECK:   cir.case(equal, [#cir.int<0> : !s32i]) {
+// CHECK:     cir.yield
+// CHECK:   }
+// CHECK:   cir.case(equal, [#cir.int<1> : !s32i]) {
+// CHECK:     cir.return
+// CHECK:   }
+// CHECK:   cir.case(equal, [#cir.int<2> : !s32i]) {
+// CHECK:     cir.scope {
+// CHECK:     cir.scope {
+// CHECK:       cir.if
+// CHECK:         cir.case(equal, [#cir.int<9> : !s32i]) {
+// CHECK:         cir.yield
+// CHECK:     cir.scope {
+// CHECK:         cir.if
+// CHECK:           cir.case(equal, [#cir.int<7> : !s32i]) {
+// CHECK:           cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/synthetic-try-resume.cpp b/clang/test/CIR/Incubator/CodeGen/synthetic-try-resume.cpp
new file mode 100644
index 0000000000000..2d35958c58480
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/synthetic-try-resume.cpp
@@ -0,0 +1,89 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -I%S/../Inputs -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -I%S/../Inputs -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+#include "std-cxx.h"
+
+// CIR-LABEL:  @_Z3fooPKc
+// LLVM-LABEL: @_Z3fooPKc
+
+void foo(const char* path) {
+  std::string str = path;
+  str = path;
+  str = path;
+}
+
+// CIR: cir.try synthetic cleanup {
+// CIR:   cir.call exception @_ZNSbIcEC1EPKcRKNS_9AllocatorE({{.*}}, {{.*}}, {{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>, !cir.ptr<!s8i>, !cir.ptr<!rec_std3A3Abasic_string3Cchar3E3A3AAllocator>) -> () cleanup {
+// CIR:     cir.call @_ZNSbIcED1Ev({{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>) -> ()
+// CIR:     cir.yield
+// CIR:   }
+// CIR:   cir.yield
+// CIR: } catch [#cir.unwind {
+// CIR:   cir.resume
+// CIR: }]
+// CIR: cir.try synthetic cleanup {
+// CIR:   {{.*}} = cir.call exception @_ZNSbIcEaSERKS_({{.*}}, {{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>, !cir.ptr<!rec_std3A3Abasic_string3Cchar3E>) -> !cir.ptr<!rec_std3A3Abasic_string3Cchar3E> cleanup {
+// CIR:     cir.call @_ZNSbIcED1Ev({{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>) -> ()
+// CIR:     cir.yield
+// CIR:   }
+// CIR:   cir.store {{.*}}, {{.*}} : !cir.ptr<!rec_std3A3Abasic_string3Cchar3E>, !cir.ptr<!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>>
+// CIR:   cir.yield
+// CIR: } catch [#cir.unwind {
+// CIR:   cir.resume
+// CIR: }]
+// CIR: {{.*}} = cir.load {{.*}} : !cir.ptr<!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>>, !cir.ptr<!rec_std3A3Abasic_string3Cchar3E>
+// CIR: cir.call @_ZNSbIcED1Ev({{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>) -> ()
+// CIR: cir.try synthetic cleanup {
+// CIR:   cir.call exception @_ZNSbIcEC1EPKcRKNS_9AllocatorE({{.*}}, {{.*}}, {{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>, !cir.ptr<!s8i>, !cir.ptr<!rec_std3A3Abasic_string3Cchar3E3A3AAllocator>) -> ()
+// CIR:   cir.yield
+// CIR: } catch [#cir.unwind {
+// CIR:   cir.resume
+// CIR: }]
+// CIR: cir.try synthetic cleanup {
+// CIR:   {{.*}} = cir.call exception @_ZNSbIcEaSERKS_({{.*}}, {{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>, !cir.ptr<!rec_std3A3Abasic_string3Cchar3E>) -> !cir.ptr<!rec_std3A3Abasic_string3Cchar3E> cleanup {
+// CIR:     cir.call @_ZNSbIcED1Ev({{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>) -> ()
+// CIR:     cir.call @_ZNSbIcED1Ev({{.*}}) : (!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>) -> ()
+// CIR:     cir.yield
+// CIR:   }
+// CIR:   cir.store {{.*}}, {{.*}} : !cir.ptr<!rec_std3A3Abasic_string3Cchar3E>, !cir.ptr<!cir.ptr<!rec_std3A3Abasic_string3Cchar3E>>
+// CIR:   cir.yield
+// CIR: } catch [#cir.unwind {
+// CIR:   cir.resume
+// CIR: }]
+
+// LLVM:  invoke void @_ZNSbIcEC1EPKcRKNS_9AllocatorE(ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+// LLVM:           to label {{.*}} unwind label %[[B18:.*]]
+// LLVM: [[B18]]
+// LLVM:   call void @_ZNSbIcED1Ev(ptr {{.*}})
+// LLVM:   br label %[[B22:.*]]
+// LLVM: [[B22]]
+// LLVM:   resume { ptr, i32 } {{.*}}
+// LLVM: {{.*}}:
+// LLVM:   {{.*}} = invoke ptr @_ZNSbIcEaSERKS_(ptr {{.*}}, ptr {{.*}})
+// LLVM:           to label {{.*}} unwind label %[[B31:.*]]
+// LLVM: [[B31]]
+// LLVM:   call void @_ZNSbIcED1Ev(ptr {{.*}})
+// LLVM:   br label %[[B35:.*]]
+// LLVM: [[B35]]
+// LLVM:   resume { ptr, i32 } {{.*}}
+// LLVM: {{.*}}:
+// LLVM:   call void @_ZNSbIcED1Ev(ptr {{.*}})
+// LLVM:   br label {{.*}}
+// LLVM: {{.*}}:
+// LLVM:   invoke void @_ZNSbIcEC1EPKcRKNS_9AllocatorE(ptr {{.*}}, ptr {{.*}}, ptr {{.*}})
+// LLVM:           to label {{.*}} unwind label %[[B46:.*]]
+// LLVM: [[B46]]
+// LLVM:   br label %[[B50:.*]]
+// LLVM: [[B50]]
+// LLVM:   resume { ptr, i32 } {{.*}}
+// LLVM: {{.*}}:
+// LLVM:   {{.*}} = invoke ptr @_ZNSbIcEaSERKS_(ptr {{.*}}, ptr {{.*}})
+// LLVM:           to label {{.*}} unwind label %[[B59:.*]]
+// LLVM: [[B59]]
+// LLVM:   call void @_ZNSbIcED1Ev(ptr {{.*}})
+// LLVM:   call void @_ZNSbIcED1Ev(ptr {{.*}})
+// LLVM:   br label %[[B63:.*]]
+// LLVM: [[B63]]
+// LLVM:   resume { ptr, i32 } {{.*}}
diff --git a/clang/test/CIR/Incubator/CodeGen/tbaa-bitinit.c b/clang/test/CIR/Incubator/CodeGen/tbaa-bitinit.c
new file mode 100644
index 0000000000000..ca90bfc6e96ea
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tbaa-bitinit.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// CIR: #tbaa[[BitInt33:.*]] = #cir.tbaa_scalar<id = "_BitInt(33)", type = !cir.int<s, 33>>
+// CIR: #tbaa[[BitInt31:.*]] = #cir.tbaa_scalar<id = "_BitInt(31)", type = !cir.int<s, 31>>
+
+_BitInt(33) a;
+_BitInt(31) b;
+void c() {
+  // CIR-LABEL: cir.func {{.*}} @c()
+  // CIR: %{{.*}} = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.int<s, 33>>, !cir.int<s, 33> tbaa(#tbaa[[BitInt33]])
+  // CIR: cir.store{{.*}} %{{.*}}, %{{.*}} : !cir.int<s, 31>, !cir.ptr<!cir.int<s, 31>> tbaa(#tbaa[[BitInt31]])
+
+  // LLVM-LABEL: define {{.*}} void @c()
+  // LLVM: %{{.*}} = load i33, ptr @a, align 8, !tbaa [[tbaa_tag_bitint_33:!.*]]
+  // LLVM: store i31 %{{.*}}, ptr @b, align 4, !tbaa [[tbaa_tag_bitint_31:!.*]]
+  b = a;
+}
+// LLVM: [[tbaa_tag_bitint_33]] = !{[[TYPE_bitint_33:!.*]], [[TYPE_bitint_33]], i64 0}
+// LLVM: [[TYPE_bitint_33]] = !{!"_BitInt(33)", [[TYPE_char:!.*]], i64 0}
+// LLVM: [[TYPE_char]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]], i64 0}
+// LLVM: [[TAG_c_tbaa]] = !{!"Simple C/C++ TBAA"}
+// LLVM: [[tbaa_tag_bitint_31]] = !{[[TYPE_bitint_31:!.*]], [[TYPE_bitint_31]], i64 0}
+// LLVM: [[TYPE_bitint_31]] = !{!"_BitInt(31)", [[TYPE_char:!.*]], i64 0}
diff --git a/clang/test/CIR/Incubator/CodeGen/tbaa-enum.c b/clang/test/CIR/Incubator/CodeGen/tbaa-enum.c
new file mode 100644
index 0000000000000..f094cddc93c5b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tbaa-enum.c
@@ -0,0 +1,148 @@
+// This is inspired from clang/test/CodeGen/tbaa.c, with both CIR and LLVM checks.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1 -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -relaxed-aliasing -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0 -disable-llvm-passes -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+
+// NO-TBAA-NOT: !tbaa
+
+// CIR: #tbaa[[CHAR:.*]] = #cir.tbaa_omnipotent_char
+// CIR: #tbaa[[INT:.*]] = #cir.tbaa_scalar<id = "int", type = !s32i>
+// CIR: #tbaa[[LONG_LONG:.*]] = #cir.tbaa_scalar<id = "long long", type = !s64i>
+// CIR: #tbaa[[LONG:.*]] = #cir.tbaa_scalar<id = "long", type = !s64i>
+// CIR: #tbaa[[SHORT:.*]] = #cir.tbaa_scalar<id = "short", type = !s16i>
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+
+typedef enum {
+  RED_AUTO_32,
+  GREEN_AUTO_32,
+  BLUE_AUTO_32
+} EnumAuto32;
+
+typedef enum {
+  RED_AUTO_64,
+  GREEN_AUTO_64,
+  BLUE_AUTO_64 = 0x100000000ull
+} EnumAuto64;
+
+typedef enum : uint16_t {
+  RED_16,
+  GREEN_16,
+  BLUE_16
+} Enum16;
+
+typedef enum : uint8_t {
+  RED_8,
+  GREEN_8,
+  BLUE_8
+} Enum8;
+
+uint32_t g0(EnumAuto32 *E, uint32_t *val) {
+  // CIR-LABEL: cir.func {{.*}} @g0
+  // CIR: %[[C5:.*]] = cir.const #cir.int<5> : !s32i
+  // CIR: %[[U_C5:.*]] = cir.cast integral %[[C5]] : !s32i -> !u32i
+  // CIR: %[[VAL_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+  // CIR: cir.store{{.*}} %[[U_C5]], %[[VAL_PTR]] : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[INT]])
+  // CIR: %[[C0:.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: %[[U_C0:.*]] = cir.cast integral %[[C0]] : !s32i -> !u32i
+  // CIR: %[[E_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+  // CIR: cir.store{{.*}} %[[U_C0]], %[[E_PTR]] : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[INT]])
+  // CIR: %[[RET_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+  // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RET_PTR]] : !cir.ptr<!u32i>, !u32i tbaa(#tbaa[[INT]])
+  // CIR: cir.store{{.*}} %[[RET]], %{{.*}} : !u32i, !cir.ptr<!u32i>
+
+  // LLVM-LABEL: define{{.*}} i32 @g0(
+  // LLVM: store i32 5, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
+  // LLVM: store i32 0, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: load i32, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  *val = 5;
+  *E = RED_AUTO_32;
+  return *val;
+}
+
+uint64_t g1(EnumAuto64 *E, uint64_t *val) {
+  // CIR-LABEL: cir.func {{.*}} @g1
+  // CIR: %[[C5:.*]] = cir.const #cir.int<5> : !s32i
+  // CIR: %[[U_C5:.*]] = cir.cast integral %[[C5]] : !s32i -> !u64i
+  // CIR: %[[VAL_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+  // CIR: cir.store{{.*}} %[[U_C5]], %[[VAL_PTR]] : !u64i, !cir.ptr<!u64i> tbaa(#tbaa[[LONG_LONG]])
+  // CIR: %[[C0:.*]] = cir.const #cir.int<0> : !u64i
+  // CIR: %[[E_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+  // CIR: cir.store{{.*}} %[[C0]], %[[E_PTR]] : !u64i, !cir.ptr<!u64i> tbaa(#tbaa[[LONG]])
+  // CIR: %[[RET_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+  // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RET_PTR]] : !cir.ptr<!u64i>, !u64i tbaa(#tbaa[[LONG_LONG]])
+  // CIR: cir.store{{.*}} %[[RET]], %{{.*}} : !u64i, !cir.ptr<!u64i>
+
+  // LLVM-LABEL: define{{.*}} i64 @g1(
+  // LLVM: store i64 5, ptr %{{.*}}, align 8, !tbaa [[TAG_i64:!.*]]
+  // LLVM: store i64 0, ptr %{{.*}}, align 8, !tbaa [[TAG_long:!.*]]
+  // LLVM: load i64, ptr %{{.*}}, align 8, !tbaa [[TAG_i64]]
+  *val = 5;
+  *E = RED_AUTO_64;
+  return *val;
+}
+
+uint16_t g2(Enum16 *E, uint16_t *val) {
+  // CIR-LABEL: cir.func {{.*}} @g2
+  // CIR: %[[C5:.*]] = cir.const #cir.int<5> : !s32i
+  // CIR: %[[U_C5:.*]] = cir.cast integral %[[C5]] : !s32i -> !u16i
+  // CIR: %[[VAL_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u16i>>, !cir.ptr<!u16i>
+  // CIR: cir.store{{.*}} %[[U_C5]], %[[VAL_PTR]] : !u16i, !cir.ptr<!u16i> tbaa(#tbaa[[SHORT]])
+  // CIR: %[[C0:.*]] = cir.const #cir.int<0> : !u16i
+  // CIR: %[[E_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u16i>>, !cir.ptr<!u16i>
+  // CIR: cir.store{{.*}} %[[C0]], %[[E_PTR]] : !u16i, !cir.ptr<!u16i> tbaa(#tbaa[[SHORT]])
+  // CIR: %[[RET_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u16i>>, !cir.ptr<!u16i>
+  // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RET_PTR]] : !cir.ptr<!u16i>, !u16i tbaa(#tbaa[[SHORT]])
+  // CIR: cir.store{{.*}} %[[RET]], %{{.*}} : !u16i, !cir.ptr<!u16i>
+
+  // LLVM-LABEL: define{{.*}} i16 @g2(
+  // LLVM: store i16 5, ptr %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
+  // LLVM: store i16 0, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]]
+  // LLVM: load i16, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]]
+  *val = 5;
+  *E = RED_16;
+  return *val;
+}
+
+uint8_t g3(Enum8 *E, uint8_t *val) {
+  // CIR-LABEL: cir.func {{.*}} @g3
+  // CIR: %[[C5:.*]] = cir.const #cir.int<5> : !s32i
+  // CIR: %[[U_C5:.*]] = cir.cast integral %[[C5]] : !s32i -> !u8i
+  // CIR: %[[VAL_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+  // CIR: cir.store{{.*}} %[[U_C5]], %[[VAL_PTR]] : !u8i, !cir.ptr<!u8i> tbaa(#tbaa[[CHAR]])
+  // CIR: %[[C0:.*]] = cir.const #cir.int<0> : !u8i
+  // CIR: %[[E_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+  // CIR: cir.store{{.*}} %[[C0]], %[[E_PTR]] : !u8i, !cir.ptr<!u8i> tbaa(#tbaa[[CHAR]])
+  // CIR: %[[RET_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+  // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RET_PTR]] : !cir.ptr<!u8i>, !u8i tbaa(#tbaa[[CHAR]])
+  // CIR: cir.store{{.*}} %[[RET]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+
+  // LLVM-LABEL: define{{.*}} i8 @g3(
+  // LLVM: store i8 5, ptr %{{.*}}, align 1, !tbaa [[TAG_i8:!.*]]
+  // LLVM: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]]
+  // LLVM: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]]
+  *val = 5;
+  *E = RED_8;
+  return *val;
+}
+
+// LLVM: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]],
+// LLVM: [[TAG_c_tbaa]] = !{!"Simple C/C++ TBAA"}
+// LLVM: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0}
+// LLVM: [[TYPE_i32]] = !{!"int", [[TYPE_char]],
+// LLVM: [[TAG_i64]] = !{[[TYPE_i64:!.*]], [[TYPE_i64]], i64 0}
+// LLVM: [[TYPE_i64]] = !{!"long long", [[TYPE_char]],
+// LLVM: [[TAG_long]] = !{[[TYPE_long:!.*]], [[TYPE_long]], i64 0}
+// LLVM: [[TYPE_long]] = !{!"long", [[TYPE_char]], i64 0}
+// LLVM: [[TAG_i16]] = !{[[TYPE_i16:!.*]], [[TYPE_i16]], i64 0}
+// LLVM: [[TYPE_i16]] = !{!"short", [[TYPE_char]],
+// LLVM: [[TAG_i8]] = !{[[TYPE_i8:!.*]], [[TYPE_char]], i64 0}
diff --git a/clang/test/CIR/Incubator/CodeGen/tbaa-enum.cpp b/clang/test/CIR/Incubator/CodeGen/tbaa-enum.cpp
new file mode 100644
index 0000000000000..56e5ce6e49eb0
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tbaa-enum.cpp
@@ -0,0 +1,156 @@
+// This is inspired from clang/test/CodeGen/tbaa.c, with both CIR and LLVM checks.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1 -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -relaxed-aliasing -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0 -disable-llvm-passes -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+
+// NO-TBAA-NOT: !tbaa
+
+// CIR: #tbaa[[CHAR:.*]] = #cir.tbaa_omnipotent_char
+// CIR: #tbaa[[INT:.*]] = #cir.tbaa_scalar<id = "int", type = !s32i>
+// CIR: #tbaa[[EnumAuto32:.*]] = #cir.tbaa_scalar<id = "_ZTS10EnumAuto32", type = !u32i>
+// CIR: #tbaa[[LONG_LONG:.*]] = #cir.tbaa_scalar<id = "long long", type = !s64i>
+// CIR: #tbaa[[EnumAuto64:.*]] = #cir.tbaa_scalar<id = "_ZTS10EnumAuto64", type = !u64i>
+// CIR: #tbaa[[SHORT:.*]] = #cir.tbaa_scalar<id = "short", type = !s16i>
+// CIR: #tbaa[[Enum16:.*]] = #cir.tbaa_scalar<id = "_ZTS6Enum16", type = !u16i>
+// CIR: #tbaa[[Enum8:.*]] = #cir.tbaa_scalar<id = "_ZTS5Enum8", type = !u8i>
+
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+
+typedef enum {
+  RED_AUTO_32,
+  GREEN_AUTO_32,
+  BLUE_AUTO_32
+} EnumAuto32;
+
+typedef enum {
+  RED_AUTO_64,
+  GREEN_AUTO_64,
+  BLUE_AUTO_64 = 0x100000000ull
+} EnumAuto64;
+
+typedef enum : uint16_t {
+  RED_16,
+  GREEN_16,
+  BLUE_16
+} Enum16;
+
+typedef enum : uint8_t {
+  RED_8,
+  GREEN_8,
+  BLUE_8
+} Enum8;
+
+uint32_t g0(EnumAuto32 *E, uint32_t *val) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g0
+  // CIR: %[[C5:.*]] = cir.const #cir.int<5> : !s32i
+  // CIR: %[[U_C5:.*]] = cir.cast integral %[[C5]] : !s32i -> !u32i
+  // CIR: %[[VAL_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+  // CIR: cir.store{{.*}} %[[U_C5]], %[[VAL_PTR]] : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[INT]])
+  // CIR: %[[C0:.*]] = cir.const #cir.int<0> : !u32i
+  // CIR: %[[E_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+  // CIR: cir.store{{.*}} %[[C0]], %[[E_PTR]] : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[EnumAuto32]])
+  // CIR: %[[RET_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u32i>>, !cir.ptr<!u32i>
+  // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RET_PTR]] : !cir.ptr<!u32i>, !u32i tbaa(#tbaa[[INT]])
+  // CIR: cir.store{{.*}} %[[RET]], %{{.*}} : !u32i, !cir.ptr<!u32i>
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z2g0
+  // LLVM: store i32 5, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
+  // LLVM: store i32 0, ptr %{{.*}}, align 4, !tbaa [[TAG_EnumAuto32:!.*]]
+  // LLVM: load i32, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  *val = 5;
+  *E = RED_AUTO_32;
+  return *val;
+}
+
+uint64_t g1(EnumAuto64 *E, uint64_t *val) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g1
+  // CIR: %[[C5:.*]] = cir.const #cir.int<5> : !s32i
+  // CIR: %[[U_C5:.*]] = cir.cast integral %[[C5]] : !s32i -> !u64i
+  // CIR: %[[VAL_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+  // CIR: cir.store{{.*}} %[[U_C5]], %[[VAL_PTR]] : !u64i, !cir.ptr<!u64i> tbaa(#tbaa[[LONG_LONG]])
+  // CIR: %[[C0:.*]] = cir.const #cir.int<0> : !u64i
+  // CIR: %[[E_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+  // CIR: cir.store{{.*}} %[[C0]], %[[E_PTR]] : !u64i, !cir.ptr<!u64i> tbaa(#tbaa[[EnumAuto64]])
+  // CIR: %[[RET_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u64i>>, !cir.ptr<!u64i>
+  // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RET_PTR]] : !cir.ptr<!u64i>, !u64i tbaa(#tbaa[[LONG_LONG]])
+  // CIR: cir.store{{.*}} %[[RET]], %{{.*}} : !u64i, !cir.ptr<!u64i>
+
+  // LLVM-LABEL: define{{.*}} i64 @_Z2g1
+  // LLVM: store i64 5, ptr %{{.*}}, align 8, !tbaa [[TAG_i64:!.*]]
+  // LLVM: store i64 0, ptr %{{.*}}, align 8, !tbaa [[TAG_EnumAuto64:!.*]]
+  // LLVM: load i64, ptr %{{.*}}, align 8, !tbaa [[TAG_i64]]
+  *val = 5;
+  *E = RED_AUTO_64;
+  return *val;
+}
+
+uint16_t g2(Enum16 *E, uint16_t *val) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g2
+  // CIR: %[[C5:.*]] = cir.const #cir.int<5> : !s32i
+  // CIR: %[[U_C5:.*]] = cir.cast integral %[[C5]] : !s32i -> !u16i
+  // CIR: %[[VAL_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u16i>>, !cir.ptr<!u16i>
+  // CIR: cir.store{{.*}} %[[U_C5]], %[[VAL_PTR]] : !u16i, !cir.ptr<!u16i> tbaa(#tbaa[[SHORT]])
+  // CIR: %[[C0:.*]] = cir.const #cir.int<0> : !u16i
+  // CIR: %[[E_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u16i>>, !cir.ptr<!u16i>
+  // CIR: cir.store{{.*}} %[[C0]], %[[E_PTR]] : !u16i, !cir.ptr<!u16i> tbaa(#tbaa[[Enum16]])
+  // CIR: %[[RET_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u16i>>, !cir.ptr<!u16i>
+  // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RET_PTR]] : !cir.ptr<!u16i>, !u16i tbaa(#tbaa[[SHORT]])
+  // CIR: cir.store{{.*}} %[[RET]], %{{.*}} : !u16i, !cir.ptr<!u16i>
+
+  // LLVM-LABEL: define{{.*}} i16 @_Z2g2
+  // LLVM: store i16 5, ptr %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
+  // LLVM: store i16 0, ptr %{{.*}}, align 2, !tbaa [[TAG_Enum16:!.*]]
+  // LLVM: load i16, ptr %{{.*}}, align 2, !tbaa [[TAG_i16]]
+  *val = 5;
+  *E = RED_16;
+  return *val;
+}
+
+uint8_t g3(Enum8 *E, uint8_t *val) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g3
+  // CIR: %[[C5:.*]] = cir.const #cir.int<5> : !s32i
+  // CIR: %[[U_C5:.*]] = cir.cast integral %[[C5]] : !s32i -> !u8i
+  // CIR: %[[VAL_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+  // CIR: cir.store{{.*}} %[[U_C5]], %[[VAL_PTR]] : !u8i, !cir.ptr<!u8i> tbaa(#tbaa[[CHAR]])
+  // CIR: %[[C0:.*]] = cir.const #cir.int<0> : !u8i
+  // CIR: %[[E_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+  // CIR: cir.store{{.*}} %[[C0]], %[[E_PTR]] : !u8i, !cir.ptr<!u8i> tbaa(#tbaa[[Enum8]])
+  // CIR: %[[RET_PTR:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+  // CIR: %[[RET:.*]] = cir.load{{.*}} %[[RET_PTR]] : !cir.ptr<!u8i>, !u8i tbaa(#tbaa[[CHAR]])
+  // CIR: cir.store{{.*}} %[[RET]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+
+  // LLVM-LABEL: define{{.*}} i8 @_Z2g3
+  // LLVM: store i8 5, ptr %{{.*}}, align 1, !tbaa [[TAG_i8:!.*]]
+  // LLVM: store i8 0, ptr %{{.*}}, align 1, !tbaa [[TAG_Enum8:!.*]]
+  // LLVM: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_i8]]
+  *val = 5;
+  *E = RED_8;
+  return *val;
+}
+
+// LLVM: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_c_tbaa:!.*]],
+// LLVM: [[TAG_c_tbaa]] = !{!"Simple C++ TBAA"}
+// LLVM: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0}
+// LLVM: [[TYPE_i32]] = !{!"int", [[TYPE_char]],
+// LLVM: [[TAG_EnumAuto32]] = !{[[TYPE_EnumAuto32:!.*]], [[TYPE_EnumAuto32]], i64 0}
+// LLVM: [[TYPE_EnumAuto32]] = !{!"_ZTS10EnumAuto32", [[TYPE_char]],
+// LLVM: [[TAG_i64]] = !{[[TYPE_i64:!.*]], [[TYPE_i64]], i64 0}
+// LLVM: [[TYPE_i64]] = !{!"long long", [[TYPE_char]],
+// LLVM: [[TAG_EnumAuto64]] = !{[[TYPE_EnumAuto64:!.*]], [[TYPE_EnumAuto64]], i64 0}
+// LLVM: [[TYPE_EnumAuto64]] = !{!"_ZTS10EnumAuto64", [[TYPE_char]],
+// LLVM: [[TAG_i16]] = !{[[TYPE_i16:!.*]], [[TYPE_i16]], i64 0}
+// LLVM: [[TYPE_i16]] = !{!"short", [[TYPE_char]],
+// LLVM: [[TAG_Enum16]] = !{[[TYPE_Enum16:!.*]], [[TYPE_Enum16]], i64 0}
+// LLVM: [[TYPE_Enum16]] = !{!"_ZTS6Enum16", [[TYPE_char]],
+// LLVM: [[TAG_Enum8]] = !{[[TYPE_Enum8:!.*]], [[TYPE_Enum8]], i64 0}
+// LLVM: [[TYPE_Enum8]] = !{!"_ZTS5Enum8", [[TYPE_char]],
diff --git a/clang/test/CIR/Incubator/CodeGen/tbaa-pointer.cpp b/clang/test/CIR/Incubator/CodeGen/tbaa-pointer.cpp
new file mode 100644
index 0000000000000..605038a737054
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tbaa-pointer.cpp
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1 -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1 -pointer-tbaa
+// RUN: FileCheck --check-prefix=CIR-POINTER-TBAA --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -pointer-tbaa
+// RUN: FileCheck --check-prefix=LLVM-POINTER-TBAA --input-file=%t.ll %s
+
+// CIR: #tbaa[[CHAR:.*]] = #cir.tbaa_omnipotent_char
+// CIR: #tbaa[[INT:.*]] = #cir.tbaa_scalar<id = "int", type = !s32i>
+// CIR: #tbaa[[PTR_TO_A:.*]] = #cir.tbaa_scalar<id = "any pointer", type = !cir.ptr<!rec_A>>
+// CIR: #tbaa[[STRUCT_A:.*]] = #cir.tbaa_struct<id = "_ZTS1A", members = {<#tbaa[[INT]], 0>, <#tbaa[[INT]], 4>}>
+// CIR: #tbaa[[TAG_STRUCT_A_a:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_A]], access = #tbaa[[INT]], offset = 0>
+
+// CIR-POINTER-TBAA: #tbaa[[CHAR:.*]] = #cir.tbaa_omnipotent_char
+// CIR-POINTER-TBAA: #tbaa[[INT:.*]] = #cir.tbaa_scalar<id = "int", type = !s32i>
+// CIR-POINTER-TBAA-DAG: #tbaa[[p1_INT:.*]] = #cir.tbaa_scalar<id = "p1 int", type = !cir.ptr<!s32i>
+// CIR-POINTER-TBAA-DAG: #tbaa[[p2_INT:.*]] = #cir.tbaa_scalar<id = "p2 int", type = !cir.ptr<!cir.ptr<!s32i>>
+// CIR-POINTER-TBAA-DAG: #tbaa[[p3_INT:.*]] = #cir.tbaa_scalar<id = "p3 int", type = !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>
+// CIR-POINTER-TBAA-DAG: #tbaa[[STRUCT_A:.*]] = #cir.tbaa_struct<id = "_ZTS1A", members = {<#tbaa[[INT]], 0>, <#tbaa[[INT]], 4>}>
+// CIR-POINTER-TBAA-DAG: #tbaa[[p1_STRUCT_A:.*]] = #cir.tbaa_scalar<id = "p1 _ZTS1A", type = !cir.ptr<!rec_A>
+// CIR-POINTER-TBAA-DAG: #tbaa[[p2_STRUCT_A:.*]] = #cir.tbaa_scalar<id = "p2 _ZTS1A", type = !cir.ptr<!cir.ptr<!rec_A>>
+// CIR-POINTER-TBAA-DAG: #tbaa[[p3_STRUCT_A:.*]] = #cir.tbaa_scalar<id = "p3 _ZTS1A", type = !cir.ptr<!cir.ptr<!cir.ptr<!rec_A>>>
+
+int test_scalar_pointer(int*** p3) {
+    int* p1;
+    int** p2;
+    p2 = *p3;
+    p1 = *p2;
+    int t = *p1;
+
+    // CIR-POINTER-TBAA-LABEL: _Z19test_scalar_pointerPPPi
+    // CIR-POINTER-TBAA: %{{.*}} = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>> tbaa(#tbaa[[p3_INT]])
+    // CIR-POINTER-TBAA: %{{.*}} = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>, !cir.ptr<!cir.ptr<!s32i>> tbaa(#tbaa[[p2_INT]])
+    // CIR-POINTER-TBAA: %{{.*}} = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> tbaa(#tbaa[[p1_INT]])
+
+    // LLVM-LABEL: _Z19test_scalar_pointerPPPi
+    // LLVM: %[[p2:.*]] = load ptr, ptr %{{.*}}, align 8, !tbaa ![[TBAA_ANY_PTR:.*]]
+    // LLVM: %[[p1:.*]] = load ptr, ptr %[[p2]], align 8, !tbaa ![[TBAA_ANY_PTR]]
+    // LLVM: %[[t:.*]] = load i32, ptr %[[p1]], align 4, !tbaa ![[TBAA_INT:.*]]
+
+    // LLVM-POINTER-TBAA-LABEL: _Z19test_scalar_pointerPPPi
+    // LLVM-POINTER-TBAA: %[[p2:.*]] = load ptr, ptr %{{.*}}, align 8, !tbaa ![[TBAA_p2_INT:.*]]
+    // LLVM-POINTER-TBAA: %[[p1:.*]] = load ptr, ptr %[[p2]], align 8, !tbaa ![[TBAA_p1_INT:.*]]
+    // LLVM-POINTER-TBAA: %[[t:.*]] = load i32, ptr %[[p1]], align 4, !tbaa ![[TBAA_INT:.*]]
+    return t;
+}
+
+struct A {
+    int a;
+    int b;
+};
+
+int test_struct_pointer(A*** p3, int A::***m3) {
+    A* p1;
+    A** p2;
+    p2 = *p3;
+    p1 = *p2;
+
+    // CIR-POINTER-TBAA-LABEL: _Z19test_struct_pointerPPP1APPMS_i
+    // CIR-POINTER-TBAA: %{{.*}} = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!cir.ptr<!cir.ptr<!rec_A>>>>, !cir.ptr<!cir.ptr<!cir.ptr<!rec_A>>> tbaa(#tbaa[[p3_STRUCT_A]])
+    // CIR-POINTER-TBAA: %{{.*}} = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!cir.ptr<!rec_A>>>, !cir.ptr<!cir.ptr<!rec_A>> tbaa(#tbaa[[p2_STRUCT_A]])
+    // CIR-POINTER-TBAA: %{{.*}} = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A> tbaa(#tbaa[[p1_STRUCT_A]])
+
+    // LLVM-LABEL: _Z19test_struct_pointerPPP1APPMS_i
+    // LLVM: %[[p2:.*]] = load ptr, ptr %{{.*}}, align 8, !tbaa ![[TBAA_ANY_PTR]]
+    // LLVM: %[[p1:.*]] = load ptr, ptr %[[p2]], align 8, !tbaa ![[TBAA_ANY_PTR]]
+    // LLVM: %[[t:.*]] = load i32, ptr %[[p1]], align 4, !tbaa ![[TBAA_STRUCT_A_a:.*]]
+
+    // LLVM-POINTER-TBAA-LABEL: _Z19test_struct_pointerPPP1APPMS_i
+    // LLVM-POINTER-TBAA: %[[p2:.*]] = load ptr, ptr %{{.*}}, align 8, !tbaa ![[TBAA_p2_STRUCT_A:.*]]
+    // LLVM-POINTER-TBAA: %[[p1:.*]] = load ptr, ptr %[[p2]], align 8, !tbaa ![[TBAA_p1_STRUCT_A:.*]]
+    // LLVM-POINTER-TBAA: %[[t:.*]] = load i32, ptr %[[p1]], align 4, !tbaa ![[TBAA_STRUCT_A_a:.*]]
+    return p1->a;
+}
+
+void test_member_pointer(A& a, int A::***m3, int val) {
+
+    // CIR-LABEL: _Z19test_member_pointerR1APPMS_ii
+    // CIR: %{{.*}} = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.data_member<!s32i in !rec_A>>, !cir.data_member<!s32i in !rec_A> tbaa(#tbaa[[CHAR]])
+
+    // CIR-POINTER-TBAA-LABEL: _Z19test_member_pointerR1APPMS_ii
+    // CIR-POINTER-TBAA: %{{.*}} = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.data_member<!s32i in !rec_A>>, !cir.data_member<!s32i in !rec_A> tbaa(#tbaa[[CHAR]])
+
+    // LLVM-LABEL: _Z19test_member_pointerR1APPMS_ii
+    // LLVM: %[[m2:.*]] = load ptr, ptr %{{.*}}, align 8, !tbaa ![[TBAA_ANY_PTR:.*]]
+    // LLVM: %[[m1:.*]] = load i64, ptr %[[m2]], align 8, !tbaa ![[TBAA_member_ptr:.*]]
+    // LLVM: %[[A_a:.*]] = getelementptr i8, ptr %{{.*}}, i64 %[[m1]]
+    // LLVM: store i32 %{{.*}}, ptr %[[A_a]], align 4, !tbaa ![[TBAA_INT]]
+
+    // LLVM-POINTER-TBAA-LABEL: _Z19test_member_pointerR1APPMS_ii
+    // LLVM-POINTER-TBAA: %[[m2:.*]] = load ptr, ptr %{{.*}}, align 8, !tbaa ![[TBAA_ANY_PTR:.*]]
+    // LLVM-POINTER-TBAA: %[[m1:.*]] = load i64, ptr %[[m2]], align 8, !tbaa ![[TBAA_member_ptr:.*]]
+    // LLVM-POINTER-TBAA: %[[A_a:.*]] = getelementptr i8, ptr %{{.*}}, i64 %[[m1]]
+    // LLVM-POINTER-TBAA: store i32 %{{.*}}, ptr %[[A_a]], align 4, !tbaa ![[TBAA_INT]]
+    a.***m3 = val; 
+}
+
+// LLVM: ![[TBAA_ANY_PTR]] = !{![[TBAA_ANY_PTR_PARENT:.*]], ![[TBAA_ANY_PTR_PARENT]], i64 0}
+// LLVM: ![[TBAA_ANY_PTR_PARENT]] = !{!"any pointer", ![[CHAR:.*]], i64 0}
+// LLVM: ![[CHAR]] = !{!"omnipotent char", ![[ROOT:.*]], i64 0}
+// LLVM: ![[ROOT]] = !{!"Simple C++ TBAA"}
+// LLVM: ![[TBAA_INT]] = !{![[TBAA_INT_PARENT:.*]], ![[TBAA_INT_PARENT]], i64 0}
+// LLVM: ![[TBAA_INT_PARENT]] = !{!"int", ![[CHAR]], i64 0}
+// LLVM: ![[TBAA_STRUCT_A_a]] = !{![[TBAA_STRUCT_A:.*]], ![[TBAA_INT_PARENT]], i64 0}
+// LLVM: ![[TBAA_STRUCT_A]] = !{!"_ZTS1A", ![[TBAA_INT_PARENT]], i64 0, ![[TBAA_INT_PARENT]], i64 4}
+// LLVM: ![[TBAA_member_ptr]] = !{![[CHAR]], ![[CHAR]], i64 0}
+
+// LLVM-POINTER-TBAA: ![[TBAA_p2_INT]] = !{![[TBAA_p2_INT_PARENT:.*]], ![[TBAA_p2_INT_PARENT]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_p2_INT_PARENT]] = !{!"p2 int", ![[TBAA_ANY_PTR_PARENT:.*]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_ANY_PTR_PARENT]] = !{!"any pointer", ![[CHAR:.*]], i64 0}
+// LLVM-POINTER-TBAA: ![[CHAR]] = !{!"omnipotent char", ![[ROOT:.*]], i64 0}
+// LLVM-POINTER-TBAA: ![[ROOT]] = !{!"Simple C++ TBAA"}
+// LLVM-POINTER-TBAA: ![[TBAA_p1_INT]] = !{![[TBAA_p1_INT_PARENT:.*]], ![[TBAA_p1_INT_PARENT]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_p1_INT_PARENT]] = !{!"p1 int", ![[TBAA_ANY_PTR_PARENT]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_INT]] = !{![[TBAA_INT_PARENT:.*]], ![[TBAA_INT_PARENT]], i64 
+// LLVM-POINTER-TBAA: ![[TBAA_INT_PARENT]] = !{!"int", ![[CHAR]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_p2_STRUCT_A]] = !{![[TBAA_p2_STRUCT_A_PARENT:.*]], ![[TBAA_p2_STRUCT_A_PARENT]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_p2_STRUCT_A_PARENT]] = !{!"p2 _ZTS1A", ![[TBAA_ANY_PTR_PARENT]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_p1_STRUCT_A]] = !{![[TBAA_p1_STRUCT_A_PARENT:.*]], ![[TBAA_p1_STRUCT_A_PARENT]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_p1_STRUCT_A_PARENT]] = !{!"p1 _ZTS1A", ![[TBAA_ANY_PTR_PARENT]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_STRUCT_A_a]] = !{![[TBAA_STRUCT_A:.*]], ![[TBAA_INT_PARENT]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_STRUCT_A]] = !{!"_ZTS1A", ![[TBAA_INT_PARENT]], i64 0, ![[TBAA_INT_PARENT]], i64 4}
+// LLVM-POINTER-TBAA: ![[TBAA_ANY_PTR]] = !{![[TBAA_ANY_PTR_PARENT]], ![[TBAA_ANY_PTR_PARENT]], i64 0}
+// LLVM-POINTER-TBAA: ![[TBAA_member_ptr]] = !{![[CHAR]], ![[CHAR]], i64 0}
diff --git a/clang/test/CIR/Incubator/CodeGen/tbaa-scalar.c b/clang/test/CIR/Incubator/CodeGen/tbaa-scalar.c
new file mode 100644
index 0000000000000..721e93b7e8243
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tbaa-scalar.c
@@ -0,0 +1,143 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -relaxed-aliasing
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+
+// NO-TBAA-NOT: !tbaa
+
+// CIR: #tbaa[[CHAR:.*]] = #cir.tbaa_omnipotent_char
+// CIR: #tbaa[[FLOAT:.*]] = #cir.tbaa_scalar<id = "float", type = !cir.float>
+// CIR: #tbaa[[DOUBLE:.*]] = #cir.tbaa_scalar<id = "double", type = !cir.double>
+// CIR: #tbaa[[LONG_DOUBLE:.*]] = #cir.tbaa_scalar<id = "long double", type = !cir.long_double<!cir.f80>>
+// CIR: #tbaa[[INT:.*]] = #cir.tbaa_scalar<id = "int", type = !s32i>
+// CIR: #tbaa[[LONG:.*]] = #cir.tbaa_scalar<id = "long", type = !s64i>
+// CIR: #tbaa[[LONG_LONG:.*]] = #cir.tbaa_scalar<id = "long long", type = !s64i>
+
+void test_int_and_float(int *a, float *b) {
+  // CIR-LABEL: cir.func {{.*}} @test_int_and_float
+  // CIR: cir.scope
+  // CIR: %[[TMP1:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CIR: %[[TMP2:.*]] = cir.load{{.*}} %[[TMP1]] : !cir.ptr<!s32i>, !s32i tbaa(#tbaa[[INT]])
+  // CIR: cir.if
+  // CIR: %[[C2:.*]] = cir.const #cir.fp<2
+  // CIR: %[[TMP3:.*]] = cir.load deref{{.*}} %[[ARG_b:.*]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float>
+  // CIR: cir.store{{.*}} %[[C2]], %[[TMP3]] : !cir.float, !cir.ptr<!cir.float> tbaa(#tbaa[[FLOAT]])
+  // CIR: else
+  // CIR: %[[C3:.*]] = cir.const #cir.fp<3
+  // CIR: %[[TMP4:.*]] = cir.load deref{{.*}} %[[ARG_b]] : !cir.ptr<!cir.ptr<!cir.float>>, !cir.ptr<!cir.float>
+  // CIR: cir.store{{.*}} %[[C3]], %[[TMP4]] : !cir.float, !cir.ptr<!cir.float> tbaa(#tbaa[[FLOAT]])
+
+  // LLVM-LABEL: void @test_int_and_float
+  // LLVM: %[[ARG_a:.*]] = load i32, ptr %{{.*}}, align 4, !tbaa ![[TBAA_INT:.*]]
+  // LLVM: %[[COND:.*]] = icmp eq i32 %[[ARG_a]], 1
+  // LLVM: %[[RET:.*]] = select i1 %[[COND]], float 2.000000e+00, float 3.000000e+00
+  // LLVM: store float %[[RET]], ptr %{{.*}}, align 4, !tbaa ![[TBAA_FLOAT:.*]]
+  // LLVM: ret void
+  if (*a == 1) {
+    *b = 2.0f;
+  } else {
+    *b = 3.0f;
+  }
+}
+
+void test_long_and_double(long *a, double *b) {
+  // CIR-LABEL: cir.func {{.*}} @test_long_and_double
+  // CIR: cir.scope
+  // CIR: %[[TMP1:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+  // CIR: %[[TMP2:.*]] = cir.load{{.*}} %[[TMP1]] : !cir.ptr<!s64i>, !s64i tbaa(#tbaa[[LONG]])
+  // CIR: cir.if
+  // CIR: %[[C2:.*]] = cir.const #cir.fp<2
+  // CIR: %[[TMP3:.*]] = cir.load deref{{.*}} %[[ARG_b:.*]] : !cir.ptr<!cir.ptr<!cir.double>>, !cir.ptr<!cir.double>
+  // CIR: cir.store{{.*}} %[[C2]], %[[TMP3]] : !cir.double, !cir.ptr<!cir.double> tbaa(#tbaa[[DOUBLE]])
+  // CIR: else
+  // CIR: %[[C3:.*]] = cir.const #cir.fp<3
+  // CIR: %[[TMP4:.*]] = cir.load deref{{.*}} %[[ARG_b]] : !cir.ptr<!cir.ptr<!cir.double>>, !cir.ptr<!cir.double>
+  // CIR: cir.store{{.*}} %[[C3]], %[[TMP4]] : !cir.double, !cir.ptr<!cir.double> tbaa(#tbaa[[DOUBLE]])
+
+  // LLVM-LABEL: void @test_long_and_double
+  // LLVM: %[[ARG_a:.*]] = load i64, ptr %{{.*}}, align 8, !tbaa ![[TBAA_LONG:.*]]
+  // LLVM: %[[COND:.*]] = icmp eq i64 %[[ARG_a]], 1
+  // LLVM: %[[RET:.*]] = select i1 %[[COND]], double 2.000000e+00, double 3.000000e+00
+  // LLVM: store double %[[RET]], ptr %{{.*}}, align 8, !tbaa ![[TBAA_DOUBLE:.*]]
+  // LLVM: ret void
+  if (*a == 1L) {
+    *b = 2.0;
+  } else {
+    *b = 3.0;
+  }
+}
+void test_long_long_and_long_double(long long *a, long double *b) {
+  // CIR-LABEL: cir.func {{.*}} @test_long_long_and_long_double
+  // CIR: cir.scope
+  // CIR: %[[TMP1:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!s64i>>, !cir.ptr<!s64i>
+  // CIR: %[[TMP2:.*]] = cir.load{{.*}} %[[TMP1]] : !cir.ptr<!s64i>, !s64i tbaa(#tbaa[[LONG_LONG]])
+  // CIR: cir.if
+  // CIR: %[[C2:.*]] = cir.const #cir.fp<2
+  // CIR: %[[TMP3:.*]] = cir.load deref{{.*}} %[[ARG_b:.*]] : !cir.ptr<!cir.ptr<!cir.long_double<!cir.f80>>>, !cir.ptr<!cir.long_double<!cir.f80>>
+  // CIR: cir.store{{.*}} %[[C2]], %[[TMP3]] : !cir.long_double<!cir.f80>, !cir.ptr<!cir.long_double<!cir.f80>> tbaa(#tbaa[[LONG_DOUBLE]])
+  // CIR: else
+  // CIR: %[[C3:.*]] = cir.const #cir.fp<3
+  // CIR: %[[TMP4:.*]] = cir.load deref{{.*}} %[[ARG_b]] : !cir.ptr<!cir.ptr<!cir.long_double<!cir.f80>>>, !cir.ptr<!cir.long_double<!cir.f80>>
+  // CIR: cir.store{{.*}} %[[C3]], %[[TMP4]] : !cir.long_double<!cir.f80>, !cir.ptr<!cir.long_double<!cir.f80>> tbaa(#tbaa[[LONG_DOUBLE]])
+
+  // LLVM-LABEL: void @test_long_long_and_long_double
+  // LLVM: %[[ARG_a:.*]] = load i64, ptr %{{.*}}, align 8, !tbaa ![[TBAA_LONG_LONG:.*]]
+  // LLVM: %[[COND:.*]] = icmp eq i64 %[[ARG_a]], 1
+  // LLVM: %[[RET:.*]] = select i1 %[[COND]], x86_fp80 0xK40008000000000000000, x86_fp80 0xK4000C000000000000000
+  // LLVM: store x86_fp80 %[[RET]], ptr %{{.*}}, align 16, !tbaa ![[TBAA_LONG_DOUBLE:.*]]
+  // LLVM: ret void
+  if (*a == 1L) {
+    *b = 2.0L;
+  } else {
+    *b = 3.0L;
+  }
+}
+
+void test_char(char *a, char* b) {
+  // CIR-LABEL: cir.func {{.*}} @test_char
+  // CIR: cir.scope
+  // CIR: %[[TMP1:.*]] = cir.load deref{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+  // CIR: %[[TMP2:.*]] = cir.load{{.*}} %[[TMP1]] : !cir.ptr<!s8i>, !s8i tbaa(#tbaa[[CHAR]])
+  // CIR: cir.if
+  // CIR: %[[C2:.*]] = cir.const #cir.int<98> : !s32i
+  // CIR: %[[C2_CHAR:.*]] = cir.cast integral %[[C2]] : !s32i -> !s8i
+  // CIR: %[[TMP3:.*]] = cir.load deref{{.*}} %[[ARG_b:.*]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+  // CIR: cir.store{{.*}} %[[C2_CHAR]], %[[TMP3]] : !s8i, !cir.ptr<!s8i> tbaa(#tbaa[[CHAR]])
+  // CIR: else
+  // CIR: %[[C3:.*]] = cir.const #cir.int<0> : !s32i
+  // CIR: %[[C3_CHAR:.*]] = cir.cast integral %[[C3]] : !s32i -> !s8i
+  // CIR: %[[TMP4:.*]] = cir.load deref{{.*}} %[[ARG_b]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+  // CIR: cir.store{{.*}} %[[C3_CHAR]], %[[TMP4]] : !s8i, !cir.ptr<!s8i> tbaa(#tbaa[[CHAR]])
+
+
+  // LLVM-LABEL: void @test_char
+  // LLVM: %[[ARG_a:.*]] = load i8, ptr %{{.*}}, align 1, !tbaa ![[TBAA_CHAR:.*]]
+  // LLVM: %[[COND:.*]] = icmp eq i8 %[[ARG_a]], 97
+  // LLVM: %[[RET:.*]] = select i1 %[[COND]], i8 98, i8 0
+  // LLVM: store i8 %[[RET]], ptr %{{.*}}, align 1, !tbaa ![[TBAA_CHAR]]
+  // LLVM: ret void
+  if (*a == 'a') {
+    *b = 'b';
+  }
+  else {
+    *b = '\0';
+  }
+}
+
+// LLVM: ![[TBAA_INT]] = !{![[TBAA_INT_PARENT:.*]], ![[TBAA_INT_PARENT]], i64 0}
+// LLVM: ![[TBAA_INT_PARENT]] = !{!"int", ![[CHAR:.*]], i64 0}
+// LLVM: ![[CHAR]] = !{!"omnipotent char", ![[ROOT:.*]], i64 0}
+// LLVM: ![[ROOT]] = !{!"Simple C/C++ TBAA"}
+// LLVM: ![[TBAA_FLOAT]] = !{![[TBAA_FLOAT_PARENT:.*]], ![[TBAA_FLOAT_PARENT]], i64 0}
+// LLVM: ![[TBAA_FLOAT_PARENT]] = !{!"float", ![[CHAR]], i64 0}
+// LLVM: ![[TBAA_LONG]] = !{![[TBAA_LONG_PARENT:.*]], ![[TBAA_LONG_PARENT]], i64 0}
+// LLVM: ![[TBAA_LONG_PARENT]] = !{!"long", ![[CHAR]], i64 0}
+// LLVM: ![[TBAA_DOUBLE]] = !{![[TBAA_DOUBLE_PARENT:.*]], ![[TBAA_DOUBLE_PARENT]], i64 0}
+// LLVM: ![[TBAA_DOUBLE_PARENT]] = !{!"double", ![[CHAR]], i64 0}
+// LLVM: ![[TBAA_LONG_DOUBLE]] = !{![[TBAA_LONG_DOUBLE_PARENT:.*]], ![[TBAA_LONG_DOUBLE_PARENT]], i64 0}
+// LLVM: ![[TBAA_LONG_DOUBLE_PARENT]] = !{!"long double", ![[CHAR]], i64 0}
+// LLVM: ![[TBAA_CHAR]] = !{![[CHAR]], ![[CHAR]], i64 0}
diff --git a/clang/test/CIR/Incubator/CodeGen/tbaa-struct.cpp b/clang/test/CIR/Incubator/CodeGen/tbaa-struct.cpp
new file mode 100644
index 0000000000000..68e918dc66258
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tbaa-struct.cpp
@@ -0,0 +1,403 @@
+// This is inspired from clang/test/CodeGen/tbaa.cpp, with both CIR and LLVM checks.
+// g13 is not supported due to DiscreteBitFieldABI is NYI.
+// see clang/lib/CIR/CodeGen/CIRRecordLayoutBuilder.cpp CIRRecordLowering::accumulateBitFields
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1 -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -no-struct-path-tbaa -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=CHECK --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -no-pointer-tbaa
+// RUN: FileCheck --check-prefixes=PATH,OLD-PATH --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -disable-llvm-passes -relaxed-aliasing -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0 -disable-llvm-passes -no-pointer-tbaa
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+
+// NO-TBAA-NOT: !tbaa
+// CIR: #tbaa[[CHAR:.*]] = #cir.tbaa_omnipotent_char
+// CIR: #tbaa[[INT:.*]] = #cir.tbaa_scalar<id = "int", type = !s32i>
+// CIR: #tbaa[[SHORT:.*]] = #cir.tbaa_scalar<id = "short", type = !s16i>
+// CIR: #tbaa[[STRUCT_six:.*]] = #cir.tbaa_struct<id = "_ZTS3six", members = {<#tbaa[[CHAR]], 0>, <#tbaa[[CHAR]], 4>, <#tbaa[[CHAR]], 5>}>
+// CIR: #tbaa[[STRUCT_StructA:.*]] = #cir.tbaa_struct<id = "_ZTS7StructA", members = {<#tbaa[[SHORT]], 0>, <#tbaa[[INT]], 4>, <#tbaa[[SHORT]], 8>, <#tbaa[[INT]], 12>}>
+// CIR: #tbaa[[STRUCT_StructS:.*]] = #cir.tbaa_struct<id = "_ZTS7StructS", members = {<#tbaa[[SHORT]], 0>, <#tbaa[[INT]], 4>}>
+// CIR: #tbaa[[STRUCT_StructS2:.*]] = #cir.tbaa_struct<id = "_ZTS8StructS2", members = {<#tbaa[[SHORT]], 0>, <#tbaa[[INT]], 4>}>
+// CIR: #tbaa[[TAG_six_b:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_six]], access = #tbaa[[CHAR]], offset = 4>
+// CIR: #tbaa[[TAG_StructA_f32:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructA]], access = #tbaa[[INT]], offset = 4>
+// CIR: #tbaa[[TAG_StructA_f16:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructA]], access = #tbaa[[SHORT]], offset = 0>
+// CIR: #tbaa[[TAG_StructS_f32:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructS]], access = #tbaa[[INT]], offset = 4>
+// CIR: #tbaa[[TAG_StructS_f16:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructS]], access = #tbaa[[SHORT]], offset = 0>
+// CIR: #tbaa[[TAG_StructS2_f32:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructS2]], access = #tbaa[[INT]], offset = 4>
+// CIR: #tbaa[[TAG_StructS2_f16:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructS2]], access = #tbaa[[SHORT]], offset = 0>
+// CIR: #tbaa[[STRUCT_StructB:.*]] = #cir.tbaa_struct<id = "_ZTS7StructB", members = {<#tbaa[[SHORT]], 0>, <#tbaa[[STRUCT_StructA]], 4>, <#tbaa[[INT]], 20>}>
+// CIR: #tbaa[[TAG_StructB_a_f32:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructB]], access = #tbaa[[INT]], offset = 8>
+// CIR: #tbaa[[TAG_StructB_a_f16:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructB]], access = #tbaa[[SHORT]], offset = 4>
+// CIR: #tbaa[[TAG_StructB_f32:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructB]], access = #tbaa[[INT]], offset = 20>
+// CIR: #tbaa[[TAG_StructB_a_f32_2:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructB]], access = #tbaa[[INT]], offset = 16>
+// CIR: #tbaa[[STRUCT_StructC:.*]] = #cir.tbaa_struct<id = "_ZTS7StructC", members = {<#tbaa[[SHORT]], 0>, <#tbaa[[STRUCT_StructB]], 4>, <#tbaa[[INT]], 28>}>
+// CIR: #tbaa[[STRUCT_StructD:.*]] = #cir.tbaa_struct<id = "_ZTS7StructD", members = {<#tbaa[[SHORT]], 0>, <#tbaa[[STRUCT_StructB]], 4>, <#tbaa[[INT]], 28>, <#tbaa[[CHAR]], 32>}>
+// CIR: #tbaa[[TAG_StructC_b_a_f32:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructC]], access = #tbaa[[INT]], offset = 12>
+// CIR: #tbaa[[TAG_StructD_b_a_f32:.*]] = #cir.tbaa_tag<base = #tbaa[[STRUCT_StructD]], access = #tbaa[[INT]], offset = 12>
+
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+typedef struct
+{
+   uint16_t f16;
+   uint32_t f32;
+   uint16_t f16_2;
+   uint32_t f32_2;
+} StructA;
+typedef struct
+{
+   uint16_t f16;
+   StructA a;
+   uint32_t f32;
+} StructB;
+typedef struct
+{
+   uint16_t f16;
+   StructB b;
+   uint32_t f32;
+} StructC;
+typedef struct
+{
+   uint16_t f16;
+   StructB b;
+   uint32_t f32;
+   uint8_t f8;
+} StructD;
+
+typedef struct
+{
+   uint16_t f16;
+   uint32_t f32;
+} StructS;
+typedef struct
+{
+   uint16_t f16;
+   uint32_t f32;
+} StructS2;
+
+uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z1g
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[INT]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructA_f32]])
+
+
+  // CHECK-LABEL: define{{.*}} i32 @_Z1g
+  // CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
+  // CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // PATH-LABEL: define{{.*}} i32 @_Z1g
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
+  // PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32:!.*]]
+  *s = 1;
+  A->f32 = 4;
+  return *s;
+}
+
+uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g2
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[INT]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u16i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u16i, !cir.ptr<!u16i> tbaa(#tbaa[[TAG_StructA_f16]])
+
+  // CHECK-LABEL: define{{.*}} i32 @_Z2g2
+  // CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // CHECK: store i16 4, ptr %{{.*}}, align {{4|2}}, !tbaa [[TAG_i16:!.*]]
+  // PATH-LABEL: define{{.*}} i32 @_Z2g2
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // PATH: store i16 4, ptr %{{.*}}, align {{4|2}}, !tbaa [[TAG_A_f16:!.*]]
+  *s = 1;
+  A->f16 = 4;
+  return *s;
+}
+
+uint32_t g3(StructA *A, StructB *B, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g3
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructA_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructB_a_f32]])
+
+  // CHECK-LABEL: define{{.*}} i32 @_Z2g3
+  // CHECK: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // CHECK: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // PATH-LABEL: define{{.*}} i32 @_Z2g3
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
+  // PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_B_a_f32:!.*]]
+  A->f32 = 1;
+  B->a.f32 = 4;
+  return A->f32;
+}
+
+uint32_t g4(StructA *A, StructB *B, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g4
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructA_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u16i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u16i, !cir.ptr<!u16i> tbaa(#tbaa[[TAG_StructB_a_f16]])
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z2g4
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i16]]
+  // PATH-LABEL: define{{.*}} i32 @_Z2g4
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
+  // PATH: store i16 4, ptr %{{.*}}, align {{4|2}}, !tbaa [[TAG_B_a_f16:!.*]]
+  A->f32 = 1;
+  B->a.f16 = 4;
+  return A->f32;
+}
+
+uint32_t g5(StructA *A, StructB *B, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g5
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructA_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructB_f32]])
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z2g5
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // PATH-LABEL: define{{.*}} i32 @_Z2g5
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
+  // PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_B_f32:!.*]]
+  A->f32 = 1;
+  B->f32 = 4;
+  return A->f32;
+}
+
+uint32_t g6(StructA *A, StructB *B, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g6
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructA_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructB_a_f32_2]])
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z2g6
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // PATH-LABEL: define{{.*}} i32 @_Z2g6
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
+  // PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_B_a_f32_2:!.*]]
+  A->f32 = 1;
+  B->a.f32_2 = 4;
+  return A->f32;
+}
+
+uint32_t g7(StructA *A, StructS *S, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g7
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructA_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructS_f32]])
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z2g7
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // PATH-LABEL: define{{.*}} i32 @_Z2g7
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
+  // PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
+  A->f32 = 1;
+  S->f32 = 4;
+  return A->f32;
+}
+
+uint32_t g8(StructA *A, StructS *S, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g8
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructA_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u16i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u16i, !cir.ptr<!u16i> tbaa(#tbaa[[TAG_StructS_f16]])
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z2g8
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i16]]
+  // PATH-LABEL: define{{.*}} i32 @_Z2g8
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
+  // PATH: store i16 4, ptr %{{.*}}, align {{4|2}}, !tbaa [[TAG_S_f16:!.*]]
+  A->f32 = 1;
+  S->f16 = 4;
+  return A->f32;
+}
+
+uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z2g9
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructS_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructS2_f32]])
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z2g9
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // PATH-LABEL: define{{.*}} i32 @_Z2g9
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
+  // PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_S2_f32:!.*]]
+  S->f32 = 1;
+  S2->f32 = 4;
+  return S->f32;
+}
+
+uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z3g10
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructS_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u16i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u16i, !cir.ptr<!u16i> tbaa(#tbaa[[TAG_StructS2_f16]])
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z3g10
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i16 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i16]]
+  // PATH-LABEL: define{{.*}} i32 @_Z3g10
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
+  // PATH: store i16 4, ptr %{{.*}}, align {{4|2}}, !tbaa [[TAG_S2_f16:!.*]]
+  S->f32 = 1;
+  S2->f16 = 4;
+  return S->f32;
+}
+
+uint32_t g11(StructC *C, StructD *D, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z3g11
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructC_b_a_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructD_b_a_f32]])
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z3g11
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // PATH-LABEL: define{{.*}} i32 @_Z3g11
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_C_b_a_f32:!.*]]
+  // PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_D_b_a_f32:!.*]]
+  C->b.a.f32 = 1;
+  D->b.a.f32 = 4;
+  return C->b.a.f32;
+}
+
+uint32_t g12(StructC *C, StructD *D, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z3g12
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructB_a_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructB_a_f32]])
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z3g12
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // TODO(cir): differentiate the two accesses.
+  // PATH-LABEL: define{{.*}} i32 @_Z3g12
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
+  // PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
+  StructB *b1 = &(C->b);
+  StructB *b2 = &(D->b);
+  // b1, b2 have different context.
+  b1->a.f32 = 1;
+  b2->a.f32 = 4;
+  return b1->a.f32;
+}
+
+struct six {
+  char a;
+  int :0;
+  char b;
+  char c;
+};
+char g14(struct six *a, struct six *b) {
+  // CIR-LABEL: cir.func {{.*}} @_Z3g14
+  // CIR: %[[TMP1:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!rec_six>>, !cir.ptr<!rec_six>
+  // CIR: %[[TMP2:.*]] = cir.get_member %[[TMP1]][2] {name = "b"} : !cir.ptr<!rec_six> -> !cir.ptr<!s8i>
+  // CIR: %[[TMP3:.*]] = cir.load{{.*}} %[[TMP2]] : !cir.ptr<!s8i>, !s8i tbaa(#tbaa[[TAG_six_b]])
+
+  // LLVM-LABEL: define{{.*}} i8 @_Z3g14
+  // LLVM: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_char]]
+  // PATH-LABEL: define{{.*}} i8 @_Z3g14
+  // PATH: load i8, ptr %{{.*}}, align 1, !tbaa [[TAG_six_b:!.*]]
+  return a->b;
+}
+
+// Types that differ only by name may alias.
+typedef StructS StructS3;
+uint32_t g15(StructS *S, StructS3 *S3, uint64_t count) {
+  // CIR-LABEL: cir.func {{.*}} @_Z3g15
+  // CIR: %[[INT_1:.*]] = cir.const #cir.int<1> : !s32i
+  // CIR: %[[UINT_1:.*]] = cir.cast integral %[[INT_1]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_1]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructS_f32]])
+  // CIR: %[[INT_4:.*]] = cir.const #cir.int<4> : !s32i
+  // CIR: %[[UINT_4:.*]] = cir.cast integral %[[INT_4]] : !s32i -> !u32i
+  // CIR: cir.store{{.*}} %[[UINT_4]], %{{.*}} : !u32i, !cir.ptr<!u32i> tbaa(#tbaa[[TAG_StructS_f32]])
+
+
+  // LLVM-LABEL: define{{.*}} i32 @_Z3g15
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // LLVM: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_i32]]
+  // PATH-LABEL: define{{.*}} i32 @_Z3g15
+  // PATH: store i32 1, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
+  // PATH: store i32 4, ptr %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
+  S->f32 = 1;
+  S3->f32 = 4;
+  return S->f32;
+}
+
+// LLVM: [[TYPE_char:!.*]] = !{!"omnipotent char", [[TAG_cxx_tbaa:!.*]],
+// LLVM: [[TAG_cxx_tbaa]] = !{!"Simple C++ TBAA"}
+// LLVM: [[TAG_i32]] = !{[[TYPE_i32:!.*]], [[TYPE_i32]], i64 0}
+// LLVM: [[TYPE_i32]] = !{!"int", [[TYPE_char]],
+// LLVM: [[TAG_i16]] = !{[[TYPE_i16:!.*]], [[TYPE_i16]], i64 0}
+// LLVM: [[TYPE_i16]] = !{!"short", [[TYPE_char]],
+// LLVM: [[TAG_char]] = !{[[TYPE_char]], [[TYPE_char]], i64 0}
+
+// OLD-PATH: [[TYPE_CHAR:!.*]] = !{!"omnipotent char", [[TAG_cxx_tbaa:!.*]],
+// OLD-PATH: [[TAG_cxx_tbaa]] = !{!"Simple C++ TBAA"}
+// OLD-PATH: [[TAG_i32]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0}
+// OLD-PATH: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR]], i64 0}
+// OLD-PATH: [[TAG_A_f32]] = !{[[TYPE_A:!.*]], [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TYPE_A]] = !{!"_ZTS7StructA", [[TYPE_SHORT:!.*]], i64 0, [[TYPE_INT]], i64 4, [[TYPE_SHORT]], i64 8, [[TYPE_INT]], i64 12}
+// OLD-PATH: [[TYPE_SHORT:!.*]] = !{!"short", [[TYPE_CHAR]]
+// OLD-PATH: [[TAG_A_f16]] = !{[[TYPE_A]], [[TYPE_SHORT]], i64 0}
+// OLD-PATH: [[TAG_B_a_f32]] = !{[[TYPE_B:!.*]], [[TYPE_INT]], i64 8}
+// OLD-PATH: [[TYPE_B]] = !{!"_ZTS7StructB", [[TYPE_SHORT]], i64 0, [[TYPE_A]], i64 4, [[TYPE_INT]], i64 20}
+// OLD-PATH: [[TAG_B_a_f16]] = !{[[TYPE_B]], [[TYPE_SHORT]], i64 4}
+// OLD-PATH: [[TAG_B_f32]] = !{[[TYPE_B]], [[TYPE_INT]], i64 20}
+// OLD-PATH: [[TAG_B_a_f32_2]] = !{[[TYPE_B]], [[TYPE_INT]], i64 16}
+// OLD-PATH: [[TAG_S_f32]] = !{[[TYPE_S:!.*]], [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TYPE_S]] = !{!"_ZTS7StructS", [[TYPE_SHORT]], i64 0, [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TAG_S_f16]] = !{[[TYPE_S]], [[TYPE_SHORT]], i64 0}
+// OLD-PATH: [[TAG_S2_f32]] = !{[[TYPE_S2:!.*]], [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TYPE_S2]] = !{!"_ZTS8StructS2", [[TYPE_SHORT]], i64 0, [[TYPE_INT]], i64 4}
+// OLD-PATH: [[TAG_S2_f16]] = !{[[TYPE_S2]], [[TYPE_SHORT]], i64 0}
+// OLD-PATH: [[TAG_C_b_a_f32]] = !{[[TYPE_C:!.*]], [[TYPE_INT]], i64 12}
+// OLD-PATH: [[TYPE_C]] = !{!"_ZTS7StructC", [[TYPE_SHORT]], i64 0, [[TYPE_B]], i64 4, [[TYPE_INT]], i64 28}
+// OLD-PATH: [[TAG_D_b_a_f32]] = !{[[TYPE_D:!.*]], [[TYPE_INT]], i64 12}
+// OLD-PATH: [[TYPE_D]] = !{!"_ZTS7StructD", [[TYPE_SHORT]], i64 0, [[TYPE_B]], i64 4, [[TYPE_INT]], i64 28, [[TYPE_CHAR]], i64 32}
+// OLD-PATH: [[TAG_six_b]] = !{[[TYPE_six:!.*]], [[TYPE_CHAR]], i64 4}
+// OLD-PATH: [[TYPE_six]] = !{!"_ZTS3six", [[TYPE_CHAR]], i64 0, [[TYPE_CHAR]], i64 4, [[TYPE_CHAR]], i64 5}
diff --git a/clang/test/CIR/Incubator/CodeGen/tbaa-union.c b/clang/test/CIR/Incubator/CodeGen/tbaa-union.c
new file mode 100644
index 0000000000000..79f6df87caf98
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tbaa-union.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -relaxed-aliasing
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+
+// NO-TBAA-NOT: !tbaa
+// CIR: #tbaa[[CHAR:.*]] = #cir.tbaa_omnipotent_char
+typedef struct {
+  union {
+    int a, b;
+  };
+  int c;
+} S;
+
+void foo(S *s) {
+  // CIR-LABEL: cir.func {{.*}} @foo
+  // CIR: %[[C1:.*]] = cir.const #cir.int<1> : !s32i loc(#loc6)
+  // CIR: %{{.*}} = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+  // CIR: cir.store{{.*}} %[[C1]], %{{.*}} : !s32i, !cir.ptr<!s32i> tbaa(#tbaa[[CHAR]])
+
+  // LLVM-LABEL: void @foo
+  // LLVM: store i32 1, ptr %{{.*}}, align 4, !tbaa ![[TBAA_TAG:.*]]
+  s->a = 1;
+}
+
+// LLVM: ![[TBAA_TAG]] = !{![[CHAR:.*]], ![[CHAR]], i64 0}
+// LLVM: ![[CHAR]] = !{!"omnipotent char", ![[ROOT:.*]], i64 0}
+// LLVM: ![[ROOT]] = !{!"Simple C/C++ TBAA"}
diff --git a/clang/test/CIR/Incubator/CodeGen/tbaa-vptr.cpp b/clang/test/CIR/Incubator/CodeGen/tbaa-vptr.cpp
new file mode 100644
index 0000000000000..01602bc4d8d80
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tbaa-vptr.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -O1
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O1 -relaxed-aliasing
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll -O0
+// RUN: FileCheck --check-prefix=NO-TBAA --input-file=%t.ll %s
+
+// NO-TBAA-NOT: !tbaa
+
+// CIR: #tbaa[[VPTR:.*]] = #cir.tbaa_vptr<type = !cir.vptr>
+
+struct Member {
+  ~Member();
+};
+
+struct A {
+  virtual ~A();
+};
+
+struct B : A {
+  Member m;
+  virtual ~B();
+};
+B::~B() { }
+
+// CIR-LABEL: _ZN1BD2Ev
+// CIR: cir.store{{.*}} %{{.*}}, %{{.*}} : !cir.vptr, !cir.ptr<!cir.vptr> tbaa(#tbaa[[VPTR]])
+
+// LLVM-LABEL: _ZN1BD2Ev
+// LLVM: store ptr getelementptr inbounds nuw (i8, ptr @_ZTV1B, i64 16), ptr %{{.*}}, align 8, !tbaa ![[TBAA_VPTR:.*]]
+// LLVM: ![[TBAA_VPTR]] = !{![[TBAA_VPTR_PARENT:.*]], ![[TBAA_VPTR_PARENT]], i64 0}
+// LLVM: ![[TBAA_VPTR_PARENT]] = !{!"vtable pointer", !
diff --git a/clang/test/CIR/Incubator/CodeGen/temporaries.cpp b/clang/test/CIR/Incubator/CodeGen/temporaries.cpp
new file mode 100644
index 0000000000000..1d147f085fda6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/temporaries.cpp
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -fcxx-exceptions -fexceptions -emit-cir %s -o %t.eh.cir
+// RUN: FileCheck --input-file=%t.eh.cir %s -check-prefix=CIR_EH
+// RUN: cir-translate %t.cir -cir-to-llvmir --disable-cc-lowering -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM_CIR
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-og.ll
+// RUN: FileCheck --input-file=%t-og.ll %s -check-prefix=OGCG
+
+struct E {
+  ~E();
+  E operator!();
+};
+
+void f() {
+  !E();
+}
+
+//      CIR: cir.func private @_ZN1EC1Ev(!cir.ptr<!rec_E>) special_member<#cir.cxx_ctor<!rec_E, default>> extra(#fn_attr)
+// CIR-NEXT: cir.func private @_ZN1EntEv(!cir.ptr<!rec_E>) -> !rec_E
+// CIR-NEXT: cir.func private @_ZN1ED1Ev(!cir.ptr<!rec_E>) special_member<#cir.cxx_dtor<!rec_E>> extra(#fn_attr)
+// Trivial default constructor call is lowered away.
+// CIR-NEXT: cir.func {{.*}} @_Z1fv() {{.*}} {
+// CIR-NEXT:   cir.scope {
+// CIR-NEXT:     %[[ONE:[0-9]+]] = cir.alloca !rec_E, !cir.ptr<!rec_E>, ["agg.tmp.ensured"] {alignment = 1 : i64}
+// CIR-NEXT:     %[[TWO:[0-9]+]] = cir.alloca !rec_E, !cir.ptr<!rec_E>, ["ref.tmp0"] {alignment = 1 : i64}
+// CIR-NEXT:     %[[THREE:[0-9]+]] = cir.call @_ZN1EntEv(%[[TWO]]) : (!cir.ptr<!rec_E>) -> !rec_E
+// CIR-NEXT:     cir.store{{.*}} %[[THREE]], %[[ONE]] : !rec_E, !cir.ptr<!rec_E>
+// CIR-NEXT:     cir.call @_ZN1ED1Ev(%[[ONE]]) : (!cir.ptr<!rec_E>) -> () extra(#fn_attr)
+// CIR-NEXT:     cir.call @_ZN1ED1Ev(%[[TWO]]) : (!cir.ptr<!rec_E>) -> () extra(#fn_attr)
+// CIR-NEXT:   }
+// CIR-NEXT:   cir.return
+// CIR-NEXT: }
+
+// CIR_EH-LABEL: @_Z1fv
+// CIR_EH: %[[AGG_TMP:.*]] = cir.alloca {{.*}} ["agg.tmp.ensured"]
+// CIR_EH: cir.try synthetic cleanup {
+// CIR_EH:   %[[RVAL:.*]] = cir.call exception {{.*}} cleanup {
+// CIR_EH:     cir.call @_ZN1ED1Ev
+// CIR_EH:     cir.yield
+// CIR_EH:   }
+// CIR_EH:   cir.store{{.*}} %[[RVAL]], %[[AGG_TMP]]
+// CIR_EH:   cir.yield
+// CIR_EH: } catch [#cir.unwind {
+
+const unsigned int n = 1234;
+const int &r = (const int&)n;
+
+//      CIR: cir.global "private" constant internal @_ZGR1r_ = #cir.int<1234> : !s32i
+// CIR-NEXT: cir.global constant external @r = #cir.global_view<@_ZGR1r_> : !cir.ptr<!s32i> {alignment = 8 : i64}
+
+//      LLVM: @_ZGR1r_ = internal constant i32 1234, align 4
+// LLVM-NEXT: @r = constant ptr @_ZGR1r_, align 8
+
+// LLVM_CIR-LABEL: define {{.*}} @_Z1fv
+// LLVM_CIR-NOT:     call {{.*}} @_ZN1EC1Ev
+// LLVM_CIR:         call {{.*}} @_ZN1EntEv
+// LLVM_CIR:         call {{.*}} @_ZN1ED1Ev
+
+// OGCG-LABEL: define {{.*}} @_Z1fv
+// OGCG-NOT:     call {{.*}} @_ZN1EC1Ev
+// OGCG:         call {{.*}} @_ZN1EntEv
+// OGCG:         call {{.*}} @_ZN1ED1Ev
diff --git a/clang/test/CIR/Incubator/CodeGen/temporary-materialization.cpp b/clang/test/CIR/Incubator/CodeGen/temporary-materialization.cpp
new file mode 100644
index 0000000000000..4a8f3208ade4a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/temporary-materialization.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int make_int();
+
+int test() {
+  const int &x = make_int();
+  return x;
+}
+
+//      CHECK: cir.func {{.*}} @_Z4testv()
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   %[[#TEMP_SLOT:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init] {alignment = 4 : i64}
+// CHECK-NEXT:   %[[#x:]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["x", init, const] {alignment = 8 : i64}
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %[[#TEMP_VALUE:]] = cir.call @_Z8make_intv() : () -> !s32i
+// CHECK-NEXT:     cir.store{{.*}} %[[#TEMP_VALUE]], %[[#TEMP_SLOT]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.store{{.*}} %[[#TEMP_SLOT]], %[[#x]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+//      CHECK: }
+
+int test_scoped() {
+  int x = make_int();
+  {
+    const int &y = make_int();
+    x = y;
+  }
+  return x;
+}
+
+//      CHECK: cir.func {{.*}} @_Z11test_scopedv()
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK-NEXT:   %{{.+}} = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+//      CHECK:   cir.scope {
+// CHECK-NEXT:     %[[#TEMP_SLOT:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["ref.tmp0", init] {alignment = 4 : i64}
+// CHECK-NEXT:     %[[#y:]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["y", init, const] {alignment = 8 : i64}
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %[[#TEMP_VALUE:]] = cir.call @_Z8make_intv() : () -> !s32i
+// CHECK-NEXT:       cir.store{{.*}} %[[#TEMP_VALUE]], %[[#TEMP_SLOT]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.store{{.*}} %[[#TEMP_SLOT]], %[[#y]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+//      CHECK:   }
+//      CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/tempref.cpp b/clang/test/CIR/Incubator/CodeGen/tempref.cpp
new file mode 100644
index 0000000000000..992063b7bad8d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tempref.cpp
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: cir-translate %t.cir -cir-to-llvmir -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+struct A { ~A(); };
+A &&a = dynamic_cast<A&&>(A{});
+
+//      CHECK: cir.func {{.*}} @_ZN1AD1Ev(!cir.ptr<!rec_A>) special_member<#cir.cxx_dtor<!rec_A>>
+// CHECK-NEXT: cir.global external @a = #cir.ptr<null> : !cir.ptr<!rec_A> {alignment = 8 : i64, ast = #cir.var.decl.ast}
+// CHECK-NEXT: cir.func internal private @__cxx_global_var_init() {
+// CHECK-NEXT:   cir.scope {
+// CHECK-NEXT:     %[[SEVEN:[0-9]+]] = cir.get_global @a : !cir.ptr<!cir.ptr<!rec_A>>
+// CHECK-NEXT:     %[[EIGHT:[0-9]+]] = cir.get_global @_ZGR1a_ : !cir.ptr<!rec_A>
+// CHECK-NEXT:     cir.store{{.*}} %[[EIGHT]], %[[SEVEN]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.func {{.*}} @_GLOBAL__sub_I_tempref.cpp() {
+// CHECK-NEXT:   cir.call @__cxx_global_var_init() : () -> ()
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+//      LLVM: @_ZGR1a_ = internal global %struct.A undef
+// LLVM-DAG: @a = global ptr null, align 8
+// LLVM-DAG: @llvm.global_ctors = appending constant [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr null }]
+
+// LLVM-DAG: declare {{.*}} void @_ZN1AD1Ev(ptr)
+
+// LLVM-DAG: define internal void @__cxx_global_var_init()
+// LLVM-DAG:   br label %[[L1:[0-9]+]]
+// LLVM-DAG: [[L1]]:
+// LLVM-DAG:   store ptr @_ZGR1a_, ptr @a, align 8
+// LLVM-DAG:   br label %[[L2:[0-9]+]]
+// LLVM-DAG: [[L2]]:
+// LLVM-DAG:   ret void
+// LLVM-DAG: }
+
+// LLVM-DAG: define void @_GLOBAL__sub_I_tempref.cpp()
+// LLVM-DAG:   call void @__cxx_global_var_init()
+// LLVM-DAG:   ret void
+// LLVM-DAG: }
diff --git a/clang/test/CIR/Incubator/CodeGen/ternary.c b/clang/test/CIR/Incubator/CodeGen/ternary.c
new file mode 100644
index 0000000000000..ee6c715495768
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/ternary.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fno-clangir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+#include <stdarg.h>
+
+double f1(int cond, int n, ...) {
+  va_list valist;
+  va_start(valist, n);
+  double res = cond ? va_arg(valist, double) : 0;
+  va_end(valist);
+  return res;
+}
+
+// Fine enough to check it passes the verifying.
+// CIR: cir.ternary
+
+int unconditional_evaluation(_Bool cond) {
+  return cond ? 123 : 456;
+  // CIR: %[[TRUE_CONST:.+]] = cir.const #cir.int<123>
+  // CIR: %[[FALSE_CONST:.+]] = cir.const #cir.int<456>
+  // CIR: cir.select if {{.+}} then %[[TRUE_CONST]] else %[[FALSE_CONST]] : (!cir.bool, !s32i, !s32i) -> !s32i
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/ternary.cpp b/clang/test/CIR/Incubator/CodeGen/ternary.cpp
new file mode 100644
index 0000000000000..92bac9463a7f5
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/ternary.cpp
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -O1 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int x(int y) {
+  return y > 0 ? 3 : 5;
+}
+
+// CHECK: cir.func {{.*}} @_Z1xi
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// CHECK:     %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:     %2 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:     %3 = cir.const #cir.int<0> : !s32i
+// CHECK:     %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+// CHECK:     %5 = cir.const #cir.int<3> : !s32i
+// CHECK:     %6 = cir.const #cir.int<5> : !s32i
+// CHECK:     %7 = cir.select if %4 then %5 else %6 : (!cir.bool, !s32i, !s32i) -> !s32i
+// CHECK:     cir.store{{.*}} %7, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK:     %8 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.return %8 : !s32i
+// CHECK:   }
+
+typedef enum {
+  API_A,
+  API_EnumSize = 0x7fffffff
+} APIType;
+
+void oba(const char *);
+
+void m(APIType api) {
+  ((api == API_A) ? (static_cast<void>(0)) : oba("yo.cpp"));
+}
+
+// CHECK:  cir.func {{.*}} @_Z1m7APIType
+// CHECK:    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["api", init] {alignment = 4 : i64}
+// CHECK:    cir.store %arg0, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:    %1 = cir.load{{.*}} %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:    %2 = cir.cast integral %1 : !u32i -> !s32i
+// CHECK:    %3 = cir.const #cir.int<0> : !u32i
+// CHECK:    %4 = cir.cast integral %3 : !u32i -> !s32i
+// CHECK:    %5 = cir.cmp(eq, %2, %4) : !s32i, !cir.bool
+// CHECK:    cir.ternary(%5, true {
+// CHECK:      %6 = cir.const #cir.int<0> : !s32i
+// CHECK:      cir.yield
+// CHECK:    }, false {
+// CHECK:      %6 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 7>>
+// CHECK:      %7 = cir.cast array_to_ptrdecay %6 : !cir.ptr<!cir.array<!s8i x 7>> -> !cir.ptr<!s8i>
+// CHECK:      cir.call @_Z3obaPKc(%7) : (!cir.ptr<!s8i>) -> ()
+// CHECK:      cir.yield
+// CHECK:    }) : (!cir.bool) -> ()
+// CHECK:    cir.return
+// CHECK:  }
+
+int foo(int a, int b) {
+  if (a < b ? 0 : a)
+    return -1;
+  return 0;
+}
+
+// CHECK:  cir.func {{.*}} @_Z3fooii
+// CHECK:   [[A0:%.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK:   [[B0:%.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK:   [[CMP:%.*]] = cir.cmp(lt, [[A0]], [[B0]]) : !s32i, !cir.bool
+// CHECK:   [[RES:%.*]] = cir.ternary([[CMP]], true {
+// CHECK:     [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:     cir.yield [[ZERO]] : !s32i
+// CHECK:   }, false {
+// CHECK:     [[A1:%.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.yield [[A1]] : !s32i
+// CHECK:   }) : (!cir.bool) -> !s32i
+// CHECK:   [[RES_CAST:%.*]] = cir.cast int_to_bool [[RES]] : !s32i -> !cir.bool
+// CHECK:   cir.if [[RES_CAST]]
+
+void maybe_has_side_effects();
+
+bool func(bool a, bool b) {
+    return (maybe_has_side_effects(), a) ?: b;
+}
+
+// CHECK:  cir.func {{.*}} @_Z4funcbb([[ARG_A:%.*]]: !cir.bool {{.*}}, [[ARG_B:%.*]]: !cir.bool {{.*}}
+// CHECK:    [[ALLOC_A:%.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a", init]
+// CHECK:    [[ALLOC_B:%.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b", init]
+// CHECK:    [[ALLOC_RET:%.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["__retval"]
+// CHECK:    cir.store [[ARG_A]], [[ALLOC_A]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:    cir.store [[ARG_B]], [[ALLOC_B]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:    cir.call @_Z22maybe_has_side_effectsv() : () -> ()
+// CHECK:    [[A0:%.*]] = cir.load{{.*}} [[ALLOC_A]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK:    [[RES:%.*]] = cir.ternary([[A0]], true {
+// CHECK:      [[A1:%.*]] = cir.load{{.*}} [[ALLOC_A]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK:      cir.yield [[A1]] : !cir.bool
+// CHECK:    }, false {
+// CHECK:      [[B0:%.*]] = cir.load{{.*}} [[ALLOC_B]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK:      cir.yield [[B0]] : !cir.bool
+// CHECK:    }) : (!cir.bool) -> !cir.bool
+// CHECK:    cir.store{{.*}} [[RES]], [[ALLOC_RET]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK:    [[R:%.*]] = cir.load{{.*}} [[ALLOC_RET]] : !cir.ptr<!cir.bool>, !cir.bool
+// CHECK:    cir.return [[R]] : !cir.bool
diff --git a/clang/test/CIR/Incubator/CodeGen/thread-local.cpp b/clang/test/CIR/Incubator/CodeGen/thread-local.cpp
new file mode 100644
index 0000000000000..6c1e1832c491e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/thread-local.cpp
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.og.ll %s
+
+// Test basic thread_local variable with constant initialization
+thread_local int tls_const = 42;
+// CIR: cir.global{{.*}}tls_dyn{{.*}}@tls_const = #cir.int<42> : !s32i
+// LLVM: @tls_const = thread_local global i32 42
+// OGCG: @tls_const = thread_local global i32 42
+
+// Test __thread (GNU-style) thread_local
+__thread int tls_gnu_style = 10;
+// CIR: cir.global{{.*}}tls_dyn{{.*}}@tls_gnu_style = #cir.int<10> : !s32i
+// LLVM: @tls_gnu_style = thread_local global i32 10
+// OGCG: @tls_gnu_style = thread_local global i32 10
+
+// Test thread_local function-local static (constant init)
+int get_tls_static() {
+  thread_local int tls_func_static = 100;
+  return ++tls_func_static;
+}
+// CIR-LABEL: cir.func{{.*}}@_Z14get_tls_staticv
+// CIR: cir.get_global{{.*}}@_ZZ14get_tls_staticvE15tls_func_static
+// LLVM-LABEL: @_Z14get_tls_staticv
+// LLVM: load{{.*}}@_ZZ14get_tls_staticvE15tls_func_static
+// OGCG-LABEL: @_Z14get_tls_staticv
+// OGCG: @llvm.threadlocal.address.p0(ptr{{.*}}@_ZZ14get_tls_staticvE15tls_func_static)
+
+// Test reading from thread_local variable
+int read_tls() {
+  return tls_const;
+}
+// CIR-LABEL: cir.func{{.*}}@_Z8read_tlsv
+// CIR: cir.get_global thread_local @tls_const
+// LLVM-LABEL: @_Z8read_tlsv
+// LLVM: @llvm.threadlocal.address.p0(ptr @tls_const)
+// OGCG-LABEL: @_Z8read_tlsv
+// OGCG: @llvm.threadlocal.address.p0(ptr{{.*}}@tls_const)
+
+// Test writing to thread_local variable
+void write_tls(int val) {
+  tls_const = val;
+}
+// CIR-LABEL: cir.func{{.*}}@_Z9write_tlsi
+// CIR: cir.get_global thread_local @tls_const
+// CIR: cir.store
+// LLVM-LABEL: @_Z9write_tlsi
+// LLVM: @llvm.threadlocal.address.p0(ptr @tls_const)
+// OGCG-LABEL: @_Z9write_tlsi
+// OGCG: @llvm.threadlocal.address.p0(ptr{{.*}}@tls_const)
+
+// Test extern thread_local
+extern thread_local int tls_extern;
+int use_extern_tls() {
+  return tls_extern;
+}
+// CIR-LABEL: cir.func{{.*}}@_Z14use_extern_tlsv
+// CIR: cir.get_global thread_local @tls_extern
+// LLVM-LABEL: @_Z14use_extern_tlsv
+// LLVM: @llvm.threadlocal.address.p0(ptr @tls_extern)
+// OGCG-LABEL: @_Z14use_extern_tlsv
+// OGCG: call ptr @_ZTW10tls_extern()
diff --git a/clang/test/CIR/Incubator/CodeGen/three-way-comparison.cpp b/clang/test/CIR/Incubator/CodeGen/three-way-comparison.cpp
new file mode 100644
index 0000000000000..1c9fabc250e3d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/three-way-comparison.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -DNON_CANONICAL_CMP_RESULTS -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=NONCANONICAL-BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -DNON_CANONICAL_CMP_RESULTS -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=NONCANONICAL-AFTER
+
+#include "Inputs/std-compare.h"
+
+// BEFORE: #cmp3way_info_partial_ltn1eq0gt1unn127 = #cir.cmp3way_info<partial, lt = -1, eq = 0, gt = 1, unordered = -127>
+// BEFORE: #cmp3way_info_strong_ltn1eq0gt1 = #cir.cmp3way_info<strong, lt = -1, eq = 0, gt = 1>
+// BEFORE: !rec_std3A3A__13A3Apartial_ordering = !cir.record<class "std::__1::partial_ordering" {!s8i}
+// BEFORE: !rec_std3A3A__13A3Astrong_ordering = !cir.record<class "std::__1::strong_ordering" {!s8i}
+
+auto three_way_strong(int x, int y) {
+  return x <=> y;
+}
+
+// BEFORE: cir.func {{.*}} @_Z16three_way_strongii
+// BEFORE:   %{{.+}} = cir.cmp3way(%{{.+}} : !s32i, %{{.+}}, #cmp3way_info_strong_ltn1eq0gt1) : !s8i
+// BEFORE: }
+
+// AFTER: cir.func {{.*}} @_Z16three_way_strongii
+// AFTER:   %{{.+}} = cir.cmp3way(%{{.+}} : !s32i, %{{.+}}, #cmp3way_info_strong_ltn1eq0gt1) : !s8i
+// AFTER: }
+
+// NONCANONICAL-BEFORE: #cmp3way_info_strong_lt1eq2gt3 = #cir.cmp3way_info<strong, lt = 1, eq = 2, gt = 3>
+// NONCANONICAL-BEFORE: cir.func {{.*}} @_Z16three_way_strongii
+// NONCANONICAL-BEFORE:   %{{.+}} = cir.cmp3way(%{{.+}} : !s32i, %{{.+}}, #cmp3way_info_strong_lt1eq2gt3) : !s8i
+// NONCANONICAL-BEFORE: }
+
+//      NONCANONICAL-AFTER: #cmp3way_info_strong_ltn1eq0gt1 = #cir.cmp3way_info<strong, lt = -1, eq = 0, gt = 1>
+//      NONCANONICAL-AFTER: cir.func {{.*}} @_Z16three_way_strongii
+//      NONCANONICAL-AFTER:   %[[#CMP3WAY_RESULT:]] = cir.cmp3way(%{{.+}} : !s32i, %{{.+}}, #cmp3way_info_strong_ltn1eq0gt1) : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#NEGONE:]] = cir.const #cir.int<-1> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#ONE:]] = cir.const #cir.int<1> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#CMP_TO_NEGONE:]] = cir.cmp(eq, %[[#CMP3WAY_RESULT]], %[[#NEGONE]]) : !s8i, !cir.bool
+// NONCANONICAL-AFTER-NEXT:   %[[#A:]] = cir.select if %[[#CMP_TO_NEGONE]] then %[[#ONE]] else %[[#CMP3WAY_RESULT]] : (!cir.bool, !s8i, !s8i) -> !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#ZERO:]] = cir.const #cir.int<0> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#TWO:]] = cir.const #cir.int<2> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#CMP_TO_ZERO:]] = cir.cmp(eq, %[[#A]], %[[#ZERO]]) : !s8i, !cir.bool
+// NONCANONICAL-AFTER-NEXT:   %[[#B:]] = cir.select if %[[#CMP_TO_ZERO]] then %[[#TWO]] else %[[#A]] : (!cir.bool, !s8i, !s8i) -> !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#ONE2:]] = cir.const #cir.int<1> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#THREE:]] = cir.const #cir.int<3> : !s8i
+// NONCANONICAL-AFTER-NEXT:   %[[#CMP_TO_ONE:]] = cir.cmp(eq, %[[#B]], %[[#ONE2]]) : !s8i, !cir.bool
+// NONCANONICAL-AFTER-NEXT:   %{{.+}} = cir.select if %[[#CMP_TO_ONE]] then %[[#THREE]] else %[[#B]] : (!cir.bool, !s8i, !s8i) -> !s8i
+//      NONCANONICAL-AFTER: }
+
+auto three_way_weak(float x, float y) {
+  return x <=> y;
+}
+
+// BEFORE: cir.func {{.*}} @_Z14three_way_weakff
+// BEFORE:   %{{.+}} = cir.cmp3way(%{{.+}} : !cir.float, %{{.+}}, #cmp3way_info_partial_ltn1eq0gt1unn127) : !s8i
+// BEFORE: }
+
+//      AFTER: cir.func {{.*}} @_Z14three_way_weakff
+//      AFTER:   %[[#LHS:]] = cir.load{{.*}} %0 : !cir.ptr<!cir.float>, !cir.float
+// AFTER-NEXT:   %[[#RHS:]] = cir.load{{.*}} %1 : !cir.ptr<!cir.float>, !cir.float
+// AFTER-NEXT:   %[[#LT:]] = cir.const #cir.int<-1> : !s8i
+// AFTER-NEXT:   %[[#EQ:]] = cir.const #cir.int<0> : !s8i
+// AFTER-NEXT:   %[[#GT:]] = cir.const #cir.int<1> : !s8i
+// AFTER-NEXT:   %[[#UNORDERED:]] = cir.const #cir.int<-127> : !s8i
+// AFTER-NEXT:   %[[#CMP_LT:]] = cir.cmp(lt, %[[#LHS]], %[[#RHS]]) : !cir.float, !cir.bool
+// AFTER-NEXT:   %[[#CMP_EQ:]] = cir.cmp(eq, %[[#LHS]], %[[#RHS]]) : !cir.float, !cir.bool
+// AFTER-NEXT:   %[[#CMP_GT:]] = cir.cmp(gt, %[[#LHS]], %[[#RHS]]) : !cir.float, !cir.bool
+// AFTER-NEXT:   %[[#CMP_EQ_RES:]] = cir.select if %[[#CMP_EQ]] then %[[#EQ]] else %[[#UNORDERED]] : (!cir.bool, !s8i, !s8i) -> !s8i
+// AFTER-NEXT:   %[[#CMP_GT_RES:]] = cir.select if %[[#CMP_GT]] then %[[#GT]] else %[[#CMP_EQ_RES]] : (!cir.bool, !s8i, !s8i) -> !s8i
+// AFTER-NEXT:   %{{.+}} = cir.select if %[[#CMP_LT]] then %[[#LT]] else %[[#CMP_GT_RES]] : (!cir.bool, !s8i, !s8i) -> !s8i
+//      AFTER: }
diff --git a/clang/test/CIR/Incubator/CodeGen/throw.cpp b/clang/test/CIR/Incubator/CodeGen/throw.cpp
new file mode 100644
index 0000000000000..0e616e50c3089
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/throw.cpp
@@ -0,0 +1,434 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll --check-prefix=LLVM %s
+
+double d(int a, int b) {
+   if (b == 0)
+      throw "Division by zero condition!";
+   return (a/b);
+}
+
+//      CIR: cir.if
+// CIR-NEXT:   %[[ADDR:.*]] = cir.alloc.exception 8
+// CIR-NEXT:   %[[STR:.*]] = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 28>>
+// CIR-NEXT:   %[[STR_ADD:.*]] = cir.cast array_to_ptrdecay %[[STR]] : !cir.ptr<!cir.array<!s8i x 28>> -> !cir.ptr<!s8i>
+// CIR-NEXT:   cir.store{{.*}} %[[STR_ADD]], %[[ADDR]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR-NEXT:   cir.throw %[[ADDR]] : !cir.ptr<!cir.ptr<!s8i>>, @_ZTIPKc
+// CIR-NEXT:   cir.unreachable
+// CIR-NEXT: ^bb1:  // no predecessors
+// CIR-NEXT:   cir.yield
+// CIR-NEXT: }
+
+// LLVM: %[[ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 8)
+// LLVM: store ptr @.str, ptr %[[ADDR]], align 16
+// LLVM: call void @__cxa_throw(ptr %[[ADDR]], ptr @_ZTIPKc, ptr null)
+// LLVM: unreachable
+
+struct S {
+  S() {}
+};
+
+void refoo1() {
+  int r = 1;
+  try {
+    S s;
+    throw;
+  } catch (...) {
+    ++r;
+  }
+}
+
+// CIR-LABEL: @_Z6refoo1v()
+// CIR:   %[[V0:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["r", init] {alignment = 4 : i64}
+// CIR:   %[[V1:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:   cir.store{{.*}} %[[V1]], %[[V0]] : !s32i, !cir.ptr<!s32i>
+// CIR:   cir.scope {
+// CIR:     %[[V2:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s", init] {alignment = 1 : i64}
+// CIR:     cir.try {
+// CIR:       cir.call exception @_ZN1SC2Ev(%[[V2]]) : (!cir.ptr<!rec_S>) -> ()
+// CIR:       cir.call exception @__cxa_rethrow() : () -> ()
+// CIR:       cir.unreachable
+// CIR:     ^bb1:  // no predecessors
+// CIR:       cir.yield
+// CIR:     } catch [type #cir.all {
+// CIR:       %[[V3:.*]] = cir.catch_param -> !cir.ptr<!void>
+// CIR:       %[[V4:.*]] = cir.load{{.*}} %[[V0]] : !cir.ptr<!s32i>, !s32i
+// CIR:       %[[V5:.*]] = cir.unary(inc, %[[V4]]) nsw : !s32i, !s32i
+// CIR:       cir.store{{.*}} %[[V5]], %[[V0]] : !s32i, !cir.ptr<!s32i>
+// CIR:       cir.yield
+// CIR:     }]
+// CIR:   }
+// CIR:   cir.return
+// CIR: }
+
+// LLVM: define dso_local void @_Z6refoo1v()
+// LLVM:   %[[V1:.*]] = alloca %struct.S, i64 1, align 1
+// LLVM:   %[[V2:.*]] = alloca i32, i64 1, align 4
+// LLVM:   store i32 1, ptr %[[V2]], align 4
+// LLVM:   br label %[[B3:.*]]
+// LLVM: [[B3]]:
+// LLVM:   br label %[[B4:.*]]
+// LLVM: [[B4]]:
+// LLVM:   invoke void @_ZN1SC2Ev(ptr %[[V1]])
+// LLVM:           to label %[[B5:.*]] unwind label %[[B7:.*]]
+// LLVM: [[B5]]:
+// LLVM:   invoke void @__cxa_rethrow()
+// LLVM:           to label %[[B6:.*]] unwind label %[[B11:.*]]
+// LLVM: [[B6]]:
+// LLVM:   unreachable
+// LLVM: [[B7]]:
+// LLVM:   %[[V8:.*]] = landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   %[[V9:.*]] = extractvalue { ptr, i32 } %[[V8]], 0
+// LLVM:   %[[V10:.*]] = extractvalue { ptr, i32 } %[[V8]], 1
+// LLVM:   br label %[[B15:.*]]
+// LLVM: [[B11]]:
+// LLVM:   %[[V12:.*]] = landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   %[[V13:.*]] = extractvalue { ptr, i32 } %[[V12]], 0
+// LLVM:   %[[V14:.*]] = extractvalue { ptr, i32 } %[[V12]], 1
+// LLVM:   br label %[[B15:.*]]
+// LLVM: [[B15]]:
+// LLVM:   %[[V16:.*]] = phi ptr [ %[[V9]], %[[B7]] ], [ %[[V13]], %[[B11]] ]
+// LLVM:   %[[V17:.*]] = call ptr @__cxa_begin_catch(ptr %[[V16]])
+// LLVM:   %[[V18:.*]] = load i32, ptr %[[V2]], align 4
+// LLVM:   %[[V19:.*]] = add nsw i32 %[[V18]], 1
+// LLVM:   store i32 %[[V19]], ptr %[[V2]], align 4
+// LLVM:   call void @__cxa_end_catch()
+
+void refoo2() {
+  int r = 1;
+  try {
+    for (int i = 0; i < 5; i++) {
+      S s;
+      throw;
+    }
+    S s;
+  } catch (...) {
+    ++r;
+  }
+}
+
+// CIR-LABEL: @_Z6refoo2v()
+// CIR:   %[[V0:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["r", init] {alignment = 4 : i64}
+// CIR:   %[[V1:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:   cir.store{{.*}} %[[V1]], %[[V0]] : !s32i, !cir.ptr<!s32i>
+// CIR:   cir.scope {
+// CIR:     %[[V2:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s", init] {alignment = 1 : i64}
+// CIR:     cir.try {
+// CIR:       cir.scope {
+// CIR:         %[[V3:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// CIR:         %[[V4:.*]] = cir.const #cir.int<0> : !s32i
+// CIR:         cir.store{{.*}} %[[V4]], %[[V3]] : !s32i, !cir.ptr<!s32i>
+// CIR:         cir.for : cond {
+// CIR:           %[[V5:.*]] = cir.load{{.*}} %[[V3]] : !cir.ptr<!s32i>, !s32i
+// CIR:           %[[V6:.*]] = cir.const #cir.int<5> : !s32i
+// CIR:           %[[V7:.*]] = cir.cmp(lt, %[[V5]], %[[V6]]) : !s32i, !cir.bool
+// CIR:           cir.condition(%[[V7]])
+// CIR:         } body {
+// CIR:           cir.scope {
+// CIR:             %[[V5:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s", init] {alignment = 1 : i64}
+// CIR:             cir.call exception @_ZN1SC2Ev(%[[V5]]) : (!cir.ptr<!rec_S>) -> ()
+// CIR:             cir.call exception @__cxa_rethrow() : () -> ()
+// CIR:             cir.unreachable
+// CIR:           ^bb1:  // no predecessors
+// CIR:             cir.yield
+// CIR:           }
+// CIR:           cir.yield
+// CIR:         } step {
+// CIR:           %[[V5:.*]] = cir.load{{.*}} %[[V3]] : !cir.ptr<!s32i>, !s32i
+// CIR:           %[[V6:.*]] = cir.unary(inc, %[[V5]]) nsw : !s32i, !s32i
+// CIR:           cir.store{{.*}} %[[V6]], %[[V3]] : !s32i, !cir.ptr<!s32i>
+// CIR:           cir.yield
+// CIR:         }
+// CIR:       }
+// CIR:       cir.call exception @_ZN1SC2Ev(%[[V2]]) : (!cir.ptr<!rec_S>) -> ()
+// CIR:       cir.yield
+// CIR:     } catch [type #cir.all {
+// CIR:       %[[V3:.*]] = cir.catch_param -> !cir.ptr<!void>
+// CIR:       %[[V4:.*]] = cir.load{{.*}} %[[V0]] : !cir.ptr<!s32i>, !s32i
+// CIR:       %[[V5:.*]] = cir.unary(inc, %[[V4]]) nsw : !s32i, !s32i
+// CIR:       cir.store{{.*}} %[[V5]], %[[V0]] : !s32i, !cir.ptr<!s32i>
+// CIR:       cir.yield
+// CIR:     }]
+// CIR:   }
+// CIR:   cir.return
+// CIR: }
+
+// LLVM: {{.*}}:
+// LLVM:   invoke void @_ZN1SC2Ev(ptr %[[V3:.*]])
+// LLVM:           to label %[[B13:.*]] unwind label %[[B22:.*]]
+// LLVM: [[B13]]:
+// LLVM:   invoke void @__cxa_rethrow()
+// LLVM:           to label %[[B14:.*]] unwind label %[[B26:.*]]
+// LLVM: [[B14]]:
+// LLVM:   unreachable
+// LLVM: [[B15]]:
+// LLVM:   br label %[[B16:.*]]
+// LLVM: [[B16]]:
+// LLVM:   %[[V17]] = load i32, ptr {{.*}}, align 4
+// LLVM:   %[[V18]] = add nsw i32 %[[V17]], 1
+// LLVM:   store i32 %[[V18]], ptr {{.*}}, align 4
+// LLVM:   br label {{.*}}
+// LLVM: %[[B19:.*]]
+// LLVM:   br label %[[B20:.*]]
+// LLVM: [[B20]]:
+// LLVM:   invoke void @_ZN1SC2Ev(ptr {{.*}})
+// LLVM:           to label %[[B21:.*]] unwind label %[[B30:.*]]
+// LLVM: [[B21]]:
+// LLVM:   br label {{.*}}
+// LLVM: [[B22]]:
+// LLVM:   %[[V23:.*]] = landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   %[[V24:.*]] = extractvalue { ptr, i32 } %[[V23]], 0
+// LLVM:   %[[V25:.*]] = extractvalue { ptr, i32 } %[[V23]], 1
+// LLVM:   br label %[[B34:.*]]
+// LLVM: [[B26]]:
+// LLVM:   %[[V27:.*]] = landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   %[[V28:.*]] = extractvalue { ptr, i32 } %[[V27]], 0
+// LLVM:   %[[V29:.*]] = extractvalue { ptr, i32 } %[[V27]], 1
+// LLVM:   br label %[[B34:.*]]
+// LLVM: [[B30]]:
+// LLVM:   %[[V31:.*]] = landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   %[[V32:.*]] = extractvalue { ptr, i32 } %[[V31]], 0
+// LLVM:   %[[V33:.*]] = extractvalue { ptr, i32 } %[[V31]], 1
+// LLVM:   br label %[[B34:.*]]
+// LLVM: [[B34]]:
+// LLVM:   %[[V35:.*]] = phi ptr [ %[[V32]], %[[B30]] ], [ %[[V24]], %[[B22]] ], [ %[[V28]], %[[B26]] ]
+// LLVM:   %[[V36:.*]] = call ptr @__cxa_begin_catch(ptr %[[V35]])
+// LLVM:   %[[V37:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:   %[[V38:.*]] = add nsw i32 %[[V37]], 1
+// LLVM:   store i32 %[[V38]], ptr {{.*}}, align 4
+// LLVM:   call void @__cxa_end_catch()
+// LLVM:   br label {{.*}}
+
+void refoo3() {
+  int r = 1;
+  try {
+    throw;
+    S s;
+  } catch (...) {
+    ++r;
+  }
+}
+
+// CIR-LABEL: @_Z6refoo3v()
+// CIR:   %[[V0:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["r", init] {alignment = 4 : i64}
+// CIR:   %[[V1:.*]] = cir.const #cir.int<1> : !s32i
+// CIR:   cir.store{{.*}} %[[V1]], %[[V0]] : !s32i, !cir.ptr<!s32i>
+// CIR:   cir.scope {
+// CIR:     %[[V2:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s", init] {alignment = 1 : i64}
+// CIR:     cir.try {
+// CIR:       cir.call exception @__cxa_rethrow() : () -> ()
+// CIR:       cir.unreachable
+// CIR:     ^bb1:  // no predecessors
+// CIR:       cir.call exception @_ZN1SC2Ev(%[[V2]]) : (!cir.ptr<!rec_S>) -> ()
+// CIR:       cir.yield
+// CIR:     } catch [type #cir.all {
+// CIR:       %[[V3:.*]] = cir.catch_param -> !cir.ptr<!void>
+// CIR:       %[[V4:.*]] = cir.load{{.*}} %[[V0]] : !cir.ptr<!s32i>, !s32i
+// CIR:       %[[V5:.*]] = cir.unary(inc, %[[V4]]) nsw : !s32i, !s32i
+// CIR:       cir.store{{.*}} %[[V5]], %[[V0]] : !s32i, !cir.ptr<!s32i>
+// CIR:       cir.yield
+// CIR:     }]
+// CIR:   }
+// CIR:   cir.return
+// CIR: }
+
+// LLVM:  invoke void @__cxa_rethrow()
+// LLVM:          to label %[[B5:.*]] unwind label %[[B8:.*]]
+// LLVM: [[B5]]:
+// LLVM:  unreachable
+// LLVM: [[B6]]:
+// LLVM:  invoke void @_ZN1SC2Ev(ptr {{.*}})
+// LLVM:          to label %[[B7:.*]] unwind label %[[B12:.*]]
+// LLVM: [[B7]]:
+// LLVM:  br label %[[B21:.*]]
+// LLVM: [[B8]]:
+// LLVM:  %[[V9:.*]] = landingpad { ptr, i32 }
+// LLVM:          catch ptr null
+// LLVM:  %[[V10:.*]] = extractvalue { ptr, i32 } %[[V9]], 0
+// LLVM:  %[[V11:.*]] = extractvalue { ptr, i32 } %[[V9]], 1
+// LLVM:  br label %[[B16:.*]]
+// LLVM: [[B12]]:
+// LLVM:  %[[V13:.*]] = landingpad { ptr, i32 }
+// LLVM:          catch ptr null
+// LLVM:  %[[V14:.*]] = extractvalue { ptr, i32 } %[[V13]], 0
+// LLVM:  %[[V15:.*]] = extractvalue { ptr, i32 } %[[V13]], 1
+// LLVM:  br label %[[B16]]
+// LLVM: [[B16]]:
+// LLVM:  %[[V17:.*]] = phi ptr [ %[[V14]], %[[B12]] ], [ %[[V10]], %[[B8]] ]
+// LLVM:  %[[V18:.*]] = call ptr @__cxa_begin_catch(ptr %[[V17]])
+// LLVM:  %[[V19:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  %[[V20:.*]] = add nsw i32 %[[V19]], 1
+// LLVM:  store i32 %[[V20]], ptr {{.*}}, align 4
+// LLVM:  call void @__cxa_end_catch()
+// LLVM:  br label %[[B21]]
+// LLVM: [[B21]]:
+// LLVM:  br label {{.*}}
+
+void refoo4() {
+  try {
+    for (int i = 0; i < 5; i++) {
+      throw;
+      throw;
+      S s;
+      i++;
+    }
+  } catch (...) {
+    int r = 1;
+  }
+}
+
+// CIR-LABEL: @_Z6refoo4v
+// CIR: cir.call exception @__cxa_rethrow() : () -> ()
+// CIR-NEXT: unreachable
+// CIR: cir.call exception @__cxa_rethrow() : () -> ()
+// CIR-NEXT: unreachable
+// CIR: cir.call exception @_ZN1SC2Ev
+
+// LLVM: invoke void @__cxa_rethrow
+// LLVM: unreachable
+// LLVM: invoke void @__cxa_rethrow
+// LLVM: unreachable
+// LLVM: invoke void @_ZN1SC2Ev
+
+void statements() {
+  throw 0;
+  123 + 456;
+}
+
+// CIR:      cir.func {{.*}} @_Z10statementsv()
+// CIR-NEXT:   %[[V0:.*]] = cir.alloc.exception 4 -> !cir.ptr<!s32i>
+// CIR-NEXT:   %[[V1:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT:   cir.store align(16) %[[V1]], %[[V0]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:   cir.throw %[[V0]] : !cir.ptr<!s32i>, @_ZTIi
+// CIR-NEXT:   cir.unreachable
+// CIR-NEXT: ^bb1:
+// CIR-NEXT:   %[[V2:.*]] = cir.const #cir.int<123> : !s32i
+// CIR-NEXT:   %[[V3:.*]] = cir.const #cir.int<456> : !s32i
+// CIR-NEXT:   %[[V4:.*]] = cir.binop(add, %[[V2]], %[[V3]]) nsw : !s32i
+// CIR-NEXT:   cir.return
+// CIR-NEXT: }
+
+// LLVM: call void @__cxa_throw
+// LLVM: unreachable
+
+void paren_expr() { (throw 0, 123 + 456); }
+
+// CIR:       cir.func {{.*}} @_Z10paren_exprv()
+// CIR-NEXT:   %[[V0:.*]] = cir.alloc.exception 4 -> !cir.ptr<!s32i>
+// CIR-NEXT:   %[[V1:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-NEXT:   cir.store align(16) %[[V1]], %[[V0]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:   cir.throw %[[V0]] : !cir.ptr<!s32i>, @_ZTIi
+// CIR-NEXT:   cir.unreachable
+// CIR-NEXT: ^bb1:
+// CIR-NEXT:   %[[V2:.*]] = cir.const #cir.int<123> : !s32i
+// CIR-NEXT:   %[[V3:.*]] = cir.const #cir.int<456> : !s32i
+// CIR-NEXT:   %[[V4:.*]] = cir.binop(add, %[[V2]], %[[V3]]) nsw : !s32i
+// CIR-NEXT:   cir.return
+// CIR-NEXT: }
+
+// LLVM: call void @__cxa_throw
+// LLVM: unreachable
+
+int ternary_throw1(bool condition, int x) {
+  return condition ? throw x : x;
+}
+
+// CIR:     cir.func {{.*}} @_Z14ternary_throw1bi(%arg0: !cir.bool
+// CIR-NEXT:   %[[V0:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["condition", init] {alignment = 1 : i64}
+// CIR-NEXT:   %[[V1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CIR-NEXT:   %[[V2:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CIR-NEXT:   %[[V3:.*]] = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["cleanup.cond"] {alignment = 1 : i64}
+// CIR-NEXT:   %[[V4:.*]] = cir.const #false
+// CIR-NEXT:   %[[V5:.*]] = cir.const #true
+// CIR-NEXT:   cir.store %arg0, %[[V0]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR-NEXT:   cir.store %arg1, %[[V1]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[V6:.*]] = cir.load align(1) %[[V0]] : !cir.ptr<!cir.bool>, !cir.bool
+// CIR-NEXT:   cir.store align(1) %[[V4]], %[[V3]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR-NEXT:   %[[V7:.*]] = cir.ternary(%[[V6]], true {
+// CIR-NEXT:     %[[V9:.*]] = cir.alloc.exception 4 -> !cir.ptr<!s32i>
+// CIR-NEXT:     cir.store align(1) %[[V5]], %[[V3]] : !cir.bool, !cir.ptr<!cir.bool>
+// CIR-NEXT:     %[[V10:.*]] = cir.load align(4) %[[V1]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:     cir.store align(16) %[[V10]], %[[V9]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:     cir.throw %[[V9]] : !cir.ptr<!s32i>, @_ZTIi
+// CIR-NEXT:     cir.unreachable
+// CIR-NEXT:   ^bb1:  // no predecessors
+// CIR-NEXT:     %[[V11:.*]] = cir.const #cir.int<0> : !s32i loc(#loc173)
+// CIR-NEXT:     cir.yield %[[V11]] : !s32i
+// CIR-NEXT:   }, false {
+// CIR-NEXT:     %[[V9:.*]] = cir.load align(4) %[[V1]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:     cir.yield %[[V9]] : !s32i
+// CIR-NEXT:   }) : (!cir.bool) -> !s32i
+// CIR-NEXT:   cir.store{{.*}} %[[V7]], %[[V2]] : !s32i, !cir.ptr<!s32i>
+// CIR-NEXT:   %[[V8:.*]] = cir.load{{.*}} %[[V2]] : !cir.ptr<!s32i>, !s32i
+// CIR-NEXT:   cir.return %[[V8]] : !s32i
+// CIR-NEXT: }
+
+// LLVM: @_Z14ternary_throw1bi
+// LLVM:   %[[V3:.*]] = alloca i8, i64 1, align 1
+// LLVM:   %[[V4:.*]] = alloca i32, i64 1, align 4
+// LLVM:   %[[V5:.*]] = alloca i32, i64 1, align 4
+// LLVM:   %[[V6:.*]] = alloca i8, i64 1, align 1
+// LLVM:   %[[V7:.*]] = zext i1 %[[V0:.*]] to i8
+// LLVM:   store i8 %[[V7]], ptr %[[V3]], align 1
+// LLVM:   store i32 %[[V1:.*]], ptr %[[V4]], align 4
+// LLVM:   %[[V8:.*]] = load i8, ptr %[[V3]], align 1
+// LLVM:   %[[V9:.*]] = trunc i8 %[[V8]] to i1
+// LLVM:   store i8 0, ptr %[[V6]], align 1
+// LLVM:   br i1 %[[V9]], label %[[B10:.*]], label %[[B14:.*]]
+// LLVM: [[B10]]:
+// LLVM:   %[[V11:.*]] = call ptr @__cxa_allocate_exception(i64 4)
+// LLVM:   store i8 1, ptr %[[V6]], align 1
+// LLVM:   %[[V12:.*]] = load i32, ptr %[[V4]], align 4
+// LLVM:   store i32 %[[V12]], ptr %[[V11]], align 16
+// LLVM:   call void @__cxa_throw(ptr %[[V11]], ptr @_ZTIi, ptr null)
+// LLVM:   unreachable
+// LLVM: [[B13]]:
+// LLVM:   br label %[[B16:.*]]
+// LLVM: [[B14]]:
+// LLVM:   %[[V15:.*]] = load i32, ptr %[[V4]], align 4
+// LLVM:   br label %[[B16]]
+// LLVM: [[B16]]:
+// LLVM:   %[[V17:.*]] = phi i32 [ 0, %[[V13]] ], [ %[[V15]], %[[V14]] ]
+// LLVM:   store i32 %[[V17]], ptr %[[V5]], align 4
+// LLVM:   %[[V18:.*]] = load i32, ptr %[[V5]], align 4
+// LLVM:   ret i32 %[[V18]]
+
+int ternary_throw2(bool condition, int x) {
+  return condition ? x : throw x;
+}
+
+// LLVM: @_Z14ternary_throw2bi
+// LLVM:   %[[V3:.*]] = alloca i8, i64 1, align 1
+// LLVM:   %[[V4:.*]] = alloca i32, i64 1, align 4
+// LLVM:   %[[V5:.*]] = alloca i32, i64 1, align 4
+// LLVM:   %[[V6:.*]] = alloca i8, i64 1, align 1
+// LLVM:   %[[V7:.*]] = zext i1 %[[V0:.*]] to i8
+// LLVM:   store i8 %[[V7]], ptr %[[V3]], align 1
+// LLVM:   store i32 %[[V1]], ptr %[[V4]], align 4
+// LLVM:   %[[V8:.*]] = load i8, ptr %[[V3]], align 1
+// LLVM:   %[[V9:.*]] = trunc i8 %[[V8]] to i1
+// LLVM:   store i8 0, ptr %[[V6]], align 1
+// LLVM:   br i1 %[[V9]], label %[[B10:.*]], label %[[B12:.*]]
+// LLVM: [[B10]]:
+// LLVM:   %[[V11:.*]] = load i32, ptr %[[V4]], align 4
+// LLVM:   br label %[[B16:.*]]
+// LLVM: [[B12]]:
+// LLVM:   %[[V13:.*]] = call ptr @__cxa_allocate_exception(i64 4)
+// LLVM:   store i8 1, ptr %[[V6]], align 1
+// LLVM:   %[[V14:.*]] = load i32, ptr %[[V4]], align 4
+// LLVM:   store i32 %[[V14]], ptr %[[V13]], align 16
+// LLVM:   call void @__cxa_throw(ptr %[[V13]], ptr @_ZTIi, ptr null)
+// LLVM:   unreachable
+// LLVM: [[B15:.*]]:
+// LLVM:   br label %[[B16:.*]]
+// LLVM: [[B16]]:
+// LLVM:   %[[V17:.*]] = phi i32 [ 0, %[[V15]] ], [ %[[V11]], %[[V10]] ]
+// LLVM:   store i32 %[[V17]], ptr %[[V5]], align 4
+// LLVM:   %[[V18:.*]] = load i32, ptr %[[V5]], align 4
+// LLVM:   ret i32 %[[V18]]
diff --git a/clang/test/CIR/Incubator/CodeGen/tls.c b/clang/test/CIR/Incubator/CodeGen/tls.c
new file mode 100644
index 0000000000000..c33e7ad5f28fe
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/tls.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+extern __thread int b;
+int c(void) { return *&b; }
+// CIR: cir.global "private" external tls_dyn @b : !s32i
+// CIR: cir.func {{.*}} @c() -> !s32i
+// CIR:   %[[TLS_ADDR:.*]] = cir.get_global thread_local @b : !cir.ptr<!s32i>
+
+__thread int a;
+// CIR: cir.global external tls_dyn @a = #cir.int<0> : !s32i
+
+// LLVM: @b = external thread_local global i32
+// LLVM: @a = thread_local global i32 0
+
+// LLVM-LABEL: @c
+// LLVM: = call ptr @llvm.threadlocal.address.p0(ptr @b)
diff --git a/clang/test/CIR/Incubator/CodeGen/trap.cpp b/clang/test/CIR/Incubator/CodeGen/trap.cpp
new file mode 100644
index 0000000000000..e1a66df2dc65f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/trap.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo();
+
+void basic() {
+  foo();
+  __builtin_trap();
+}
+
+//      CHECK: cir.func {{.*}} @_Z5basicv()
+// CHECK-NEXT:   cir.call @_Z3foov() : () -> ()
+// CHECK-NEXT:   cir.trap
+// CHECK-NEXT: }
+
+void code_after_unreachable() {
+  foo();
+  __builtin_trap();
+  foo();
+}
+
+//      CHECK: cir.func {{.*}} @_Z22code_after_unreachablev()
+// CHECK-NEXT:   cir.call @_Z3foov() : () -> ()
+// CHECK-NEXT:   cir.trap
+// CHECK-NEXT: ^bb1:
+// CHECK-NEXT:   cir.call @_Z3foov() : () -> ()
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/CodeGen/trivial-copy.cpp b/clang/test/CIR/Incubator/CodeGen/trivial-copy.cpp
new file mode 100644
index 0000000000000..c6b947951b8e1
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/trivial-copy.cpp
@@ -0,0 +1,21 @@
+// RUN:   %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - \
+// RUN:   | FileCheck %s
+
+struct Trivial {
+  int i;
+};
+
+void CopyCTor(Trivial &a) {
+  Trivial b(a);
+  
+// CHECK:         cir.copy
+// CHECK-NOT:     cir.call {{.*}}_ZN7TrivialC2ERKS_
+// CHECK-NOT:     cir.func {{.*}}_ZN7TrivialC2ERKS_
+}
+
+void CopyAssign(Trivial &a) {
+  Trivial b = a;
+// CHECK:         cir.copy
+// CHECK-NOT:     cir.call {{.*}}_ZN7TrivialaSERKS_
+// CHECK-NOT:     cir.func {{.*}}_ZN7TrivialaSERKS_
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/try-catch-dtors.cpp b/clang/test/CIR/Incubator/CodeGen/try-catch-dtors.cpp
new file mode 100644
index 0000000000000..44dc524d91a0f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/try-catch-dtors.cpp
@@ -0,0 +1,454 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir-flat -fno-clangir-call-conv-lowering %s -o %t.flat.cir
+// RUN: FileCheck --input-file=%t.flat.cir --check-prefix=CIR_FLAT %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct Vec {
+  Vec();
+  Vec(Vec&&);
+  ~Vec();
+};
+
+void yo() {
+  int r = 1;
+  try {
+    Vec v;
+  } catch (...) {
+    r++;
+  }
+}
+
+// CIR-DAG: ![[VecTy:.*]] = !cir.record<struct "Vec" padded {!u8i}>
+// CIR-DAG: ![[S1:.*]] = !cir.record<struct "S1" {![[VecTy]]}>
+
+// CIR_FLAT-DAG: ![[VecTy:.*]] = !cir.record<struct "Vec" padded {!u8i}>
+// CIR_FLAT-DAG: ![[S1:.*]] = !cir.record<struct "S1" {![[VecTy]]}>
+
+// CIR: cir.scope {
+// CIR:   %[[VADDR:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v", init]
+// CIR:   cir.try {
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[VADDR]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[VADDR]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.yield
+// CIR:   } catch [type #cir.all {
+// CIR:     cir.catch_param -> !cir.ptr<!void>
+// CIR:   }]
+// CIR: }
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z2yov()
+
+// LLVM:   %[[Vec:.*]] = alloca %struct.Vec
+// LLVM:   br label %[[INVOKE_BB:.*]]
+
+// LLVM: [[INVOKE_BB]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[Vec]])
+// LLVM:           to label %[[DTOR_BB:.*]] unwind label %[[LPAD_BB:.*]]
+
+// LLVM: [[DTOR_BB]]:
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[Vec]])
+// LLVM:   br label %15
+
+// LLVM: [[LPAD_BB]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   br label %[[CATCH_BB:.*]]
+
+// LLVM: [[CATCH_BB]]:
+// LLVM:   call ptr @__cxa_begin_catch
+// LLVM:   call void @__cxa_end_catch()
+// LLVM:   br label %[[RET_BB:.*]]
+
+// LLVM: [[RET_BB]]:
+// LLVM:   ret void
+
+struct S1 {
+  Vec v;
+};
+
+void yo2() {
+  int r = 1;
+  try {
+    Vec v;
+    S1((Vec&&) v);
+  } catch (...) {
+    r++;
+  }
+}
+// CIR-LABEL: @_Z3yo2v
+// CIR:   cir.scope {
+// CIR:     cir.alloca ![[VecTy]]
+// CIR:     cir.try {
+// CIR:       cir.call exception @_ZN3VecC1Ev
+// CIR:       cir.scope {
+// CIR:         cir.alloca ![[S1:.*]], !cir.ptr<![[S1:.*]]>, ["agg.tmp.ensured"]
+// CIR:         cir.call exception @_ZN3VecC1EOS_{{.*}} cleanup {
+// CIR:           cir.call @_ZN3VecD1Ev
+// CIR:           cir.yield
+// CIR:         cir.call @_ZN2S1D2Ev
+// CIR:       }
+// CIR:       cir.call @_ZN3VecD1Ev
+// CIR:       cir.yield
+// CIR:     } catch [type #cir.all {
+// CIR:       cir.catch_param -> !cir.ptr<!void>
+// CIR:       cir.yield
+// CIR:     }]
+// CIR:   }
+// CIR:   cir.return
+// CIR: }
+
+// CIR_FLAT-LABEL: @_Z3yo2v
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%[[vec:.+]]) ^[[NEXT_CALL_PREP:.*]], ^[[PAD_NODTOR:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[NEXT_CALL_PREP]]:
+// CIR_FLAT:    cir.br ^[[NEXT_CALL:.*]] loc
+// CIR_FLAT:  ^[[NEXT_CALL]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1EOS_({{.*}}) ^[[CONT0:.*]], ^[[PAD_DTOR:.*]] :
+// CIR_FLAT:  ^[[CONT0]]:
+// CIR_FLAT:    cir.call @_ZN2S1D2Ev
+// CIR_FLAT:    cir.br ^[[CONT1:.*]] loc
+// CIR_FLAT:  ^[[CONT1]]:
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev
+// CIR_FLAT:    cir.br ^[[AFTER_TRY:.*]] loc
+// CIR_FLAT:  ^[[PAD_NODTOR]]:
+// CIR_FLAT:    %exception_ptr, %type_id = cir.eh.inflight_exception
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN:.*]](%exception_ptr : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[PAD_DTOR]]:
+// CIR_FLAT:    %exception_ptr_0, %type_id_1 = cir.eh.inflight_exception
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[vec]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN]](%exception_ptr_0 : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CATCH_BEGIN]](
+// CIR_FLAT:    cir.catch_param begin
+// CIR_FLAT:    cir.br ^[[AFTER_TRY]]
+// CIR_FLAT:  ^[[AFTER_TRY]]:
+// CIR_FLAT:    cir.return
+// CIR_FLAT:  }
+
+void yo3(bool x) {
+  int r = 1;
+  try {
+    Vec v1, v2, v3, v4;
+  } catch (...) {
+    r++;
+  }
+}
+
+// CIR-LABEL: @_Z3yo3b
+// CIR: cir.scope {
+// CIR:   %[[V1:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v1"
+// CIR:   %[[V2:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v2"
+// CIR:   %[[V3:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v3"
+// CIR:   %[[V4:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v4"
+// CIR:   cir.try {
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.yield
+// CIR:     }
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.yield
+// CIR:     }
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.yield
+// CIR:     }
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.yield
+// CIR:   } catch [type #cir.all {
+// CIR:   }]
+// CIR: }
+// CIR: cir.return
+
+// CIR_FLAT-LABEL: @_Z3yo3b
+// CIR_FLAT:   %[[V1:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v1"
+// CIR_FLAT:   %[[V2:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v2"
+// CIR_FLAT:   %[[V3:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v3"
+// CIR_FLAT:   %[[V4:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v4"
+// CIR_FLAT:    cir.br ^[[CALL0:.*]] loc
+// CIR_FLAT:  ^[[CALL0]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%[[V1]]) ^[[CALL1:.*]], ^[[CLEANUP_V1:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[CALL1]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%[[V2]]) ^[[CALL2:.*]], ^[[CLEANUP_V2:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[CALL2]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%[[V3]]) ^[[CALL3:.*]], ^[[CLEANUP_V3:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[CALL3]]:
+// CIR_FLAT:    cir.try_call @_ZN3VecC1Ev(%[[V4]]) ^[[NOTROW_CLEANUP:.*]], ^[[CLEANUP_V4:.*]] : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:  ^[[NOTROW_CLEANUP]]:
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[AFTER_TRY:.*]] loc
+// CIR_FLAT:  ^[[CLEANUP_V1]]:
+// CIR_FLAT:    %exception_ptr, %type_id = cir.eh.inflight_exception
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN:.*]](%exception_ptr : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CLEANUP_V2]]:
+// CIR_FLAT:    %exception_ptr_0, %type_id_1 = cir.eh.inflight_exception
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN]](%exception_ptr_0 : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CLEANUP_V3]]:
+// CIR_FLAT:    %exception_ptr_2, %type_id_3 = cir.eh.inflight_exception
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN]](%exception_ptr_2 : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CLEANUP_V4]]:
+// CIR_FLAT:    %exception_ptr_4, %type_id_5 = cir.eh.inflight_exception
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR_FLAT:    cir.br ^[[CATCH_BEGIN]](%exception_ptr_4 : !cir.ptr<!void>)
+// CIR_FLAT:  ^[[CATCH_BEGIN]]({{.*}}
+// CIR_FLAT:    cir.catch_param begin
+// CIR_FLAT:    cir.br ^[[AFTER_TRY]]
+// CIR_FLAT:  ^[[AFTER_TRY]]:
+// CIR_FLAT:    cir.return
+
+// LLVM-LABEL: @_Z3yo3b
+// LLVM:   %[[V1:.*]] = alloca %struct.Vec
+// LLVM:   %[[V2:.*]] = alloca %struct.Vec
+// LLVM:   %[[V3:.*]] = alloca %struct.Vec
+// LLVM:   %[[V4:.*]] = alloca %struct.Vec
+// LLVM:   br label %[[CALL0:.*]]
+// LLVM: [[CALL0]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[V1]])
+// LLVM:           to label %[[CALL1:.*]] unwind label %[[LPAD0:.*]]
+// LLVM: [[CALL1]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[V2]])
+// LLVM:           to label %[[CALL2:.*]] unwind label %[[LPAD1:.*]]
+// LLVM: [[CALL2]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[V3]])
+// LLVM:           to label %[[CALL3:.*]] unwind label %[[LPAD2:.*]]
+// LLVM: [[CALL3]]:
+// LLVM:   invoke void @_ZN3VecC1Ev(ptr %[[V4]])
+// LLVM:           to label %[[REGULAR_CLEANUP:.*]] unwind label %[[LPAD3:.*]]
+// LLVM: [[REGULAR_CLEANUP]]:
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V4]])
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V3]])
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V2]])
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V1]])
+// LLVM:   br label %[[RET:.*]]
+// LLVM: [[LPAD0]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   br label %[[CATCH:.*]]
+// LLVM: [[LPAD1]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V1]])
+// LLVM:   br label %[[CATCH]]
+// LLVM: [[LPAD2]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V2]])
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V1]])
+// LLVM:   br label %[[CATCH]]
+// LLVM: [[LPAD3]]:
+// LLVM:   landingpad { ptr, i32 }
+// LLVM:           catch ptr null
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V3]])
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V2]])
+// LLVM:   call void @_ZN3VecD1Ev(ptr %[[V1]])
+// LLVM:   br label %[[CATCH]]
+// LLVM: [[CATCH]]:
+// LLVM:   call ptr @__cxa_begin_catch
+// LLVM:   br label %[[RET]]
+// LLVM: [[RET]]:
+// LLVM:   ret void
+
+void yo2(bool x) {
+  int r = 1;
+  try {
+    Vec v1, v2;
+    try {
+        Vec v3, v4;
+    } catch (...) {
+    r++;
+    }
+  } catch (...) {
+    r++;
+  }
+}
+
+// CIR: cir.scope {
+// CIR:   %[[V1:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v1"
+// CIR:   %[[V2:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v2"
+// CIR:   cir.try {
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call exception @_ZN3VecC1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:       cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:       cir.yield
+// CIR:     }
+// CIR:     cir.scope {
+// CIR:       %[[V3:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v3"
+// CIR:       %[[V4:.*]] = cir.alloca ![[VecTy]], !cir.ptr<![[VecTy]]>, ["v4"
+// CIR:       cir.try {
+// CIR:         cir.call exception @_ZN3VecC1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:         cir.call exception @_ZN3VecC1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> () cleanup {
+// CIR:           cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:           cir.yield
+// CIR:         }
+// CIR:         cir.call @_ZN3VecD1Ev(%[[V4]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:         cir.call @_ZN3VecD1Ev(%[[V3]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:         cir.yield
+// CIR:       } catch [type #cir.all {
+// CIR:         cir.catch_param -> !cir.ptr<!void>
+// CIR:       }]
+// CIR:     }
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V2]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.call @_ZN3VecD1Ev(%[[V1]]) : (!cir.ptr<![[VecTy]]>) -> ()
+// CIR:     cir.yield
+// CIR:   } catch [type #cir.all {
+// CIR:     cir.catch_param -> !cir.ptr<!void>
+// CIR:   }]
+
+
+int foo() { return 42; }
+
+struct A {
+  ~A() {}
+};
+
+void bar() {
+  A a;
+  int b = foo();
+}
+
+// CIR-LABEL: @_Z3barv
+// CIR:  %[[V0:.*]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a"] {alignment = 1 : i64}
+// CIR:  %[[V1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+// CIR:  %[[V2:.*]] = cir.call @_Z3foov() : () -> !s32i
+// CIR:  cir.store align(4) %[[V2]], %[[V1]] : !s32i, !cir.ptr<!s32i>
+// CIR:  cir.call @_ZN1AD2Ev(%[[V0]]) : (!cir.ptr<!rec_A>) -> ()
+// CIR:  cir.return
+
+// LLVM: ; Function Attrs: noinline nounwind optnone
+// LLVM-NEXT: _Z3foo
+// LLVM: @_Z3barv()
+// LLVM:   %[[V1:.*]] = alloca %struct.A, i64 1, align 1
+// LLVM:   %[[V2:.*]] = alloca i32, i64 1, align 4
+// LLVM:   %[[V3:.*]] = call i32 @_Z3foov()
+// LLVM:   store i32 %[[V3]], ptr %[[V2]], align 4
+// LLVM:   call void @_ZN1AD2Ev(ptr %[[V1]])
+// LLVM:   ret void
+
+class C {
+public:
+  ~C();
+  void operator=(C);
+};
+
+void d() {
+  C a, b;
+  a = b;
+}
+
+// CIR: %[[V0:.*]] = cir.alloca !rec_C, !cir.ptr<!rec_C>, ["a"] {alignment = 1 : i64}
+// CIR-NEXT: %[[V1:.*]] = cir.alloca !rec_C, !cir.ptr<!rec_C>, ["b"] {alignment = 1 : i64}
+// CIR-NEXT: cir.scope {
+// CIR-NEXT:   %[[V2:.*]] = cir.alloca !rec_C, !cir.ptr<!rec_C>, ["agg.tmp0"] {alignment = 1 : i64}
+// CIR-NEXT:   cir.copy %[[V1]] to %[[V2]] : !cir.ptr<!rec_C>
+// CIR-NEXT:   %[[V3:.*]] = cir.load{{.*}} %[[V2]] : !cir.ptr<!rec_C>, !rec_C
+// CIR-NEXT:   cir.try synthetic cleanup {
+// CIR-NEXT:     cir.call exception @_ZN1CaSES_(%[[V0]], %[[V3]]) : (!cir.ptr<!rec_C>, !rec_C) -> () cleanup {
+// CIR-NEXT:       cir.call @_ZN1CD1Ev(%[[V2]]) : (!cir.ptr<!rec_C>) -> ()
+// CIR-NEXT:       cir.call @_ZN1CD1Ev(%[[V1]]) : (!cir.ptr<!rec_C>) -> ()
+// CIR-NEXT:       cir.yield
+// CIR-NEXT:     }
+// CIR-NEXT:     cir.yield
+// CIR-NEXT:   } catch [#cir.unwind {
+// CIR-NEXT:     cir.resume
+// CIR-NEXT:   }]
+// CIR-NEXT:   cir.call @_ZN1CD1Ev(%[[V2]]) : (!cir.ptr<!rec_C>) -> ()
+// CIR-NEXT: }
+// CIR-NEXT: cir.call @_ZN1CD1Ev(%[[V1]]) : (!cir.ptr<!rec_C>) -> ()
+// CIR-NEXT: cir.call @_ZN1CD1Ev(%[[V0]]) : (!cir.ptr<!rec_C>) -> ()
+// CIR-NEXT: cir.return
+
+template <typename> class a;
+
+template <> class a<void> {
+public:
+  struct b {
+    typedef a<int> c;
+  };
+};
+
+template <typename> class a {
+public:
+  template <typename d> a(d) noexcept;
+  ~a();
+};
+
+struct e {
+  using f = a<void>::b::c;
+};
+
+template <typename, typename> using g = e::f;
+
+template <typename h> void i(h);
+
+class j {
+
+public:
+  using k = g<int, j>;
+};
+
+class l {
+public:
+  template <typename m, typename n> l(m p1, n) : l(p1, 0, a<void>()) {}
+  template <typename m, typename n, typename h> l(m, n, h o) {
+    try {
+      j::k p(o);
+      i(p);
+    } catch (...) {
+    }
+  }
+};
+
+class G {
+public:
+  template <typename q, typename n> G(q p1, n) : r(p1, 0) {}
+  l r;
+};
+
+class s : G {
+public:
+  int t;
+  s() : G(t, 0) {}
+};
+
+void fn3() { s(); }
+
+// CIR: cir.func {{.*}} @_ZN1lC2Iii1aIvEEET_T0_T1_
+// CIR:   cir.scope
+// CIR:     %[[V5:.*]] = cir.alloca !rec_a3Cint3E, !cir.ptr<!rec_a3Cint3E>
+// CIR:     %[[V6:.*]] = cir.alloca !rec_a3Cvoid3E, !cir.ptr<!rec_a3Cvoid3E>
+// CIR:     cir.try {
+// CIR:       cir.copy {{.*}} to %[[V6]] : !cir.ptr<!rec_a3Cvoid3E>
+// CIR:       %[[V7:.*]] = cir.load align(1) %[[V6]] : !cir.ptr<!rec_a3Cvoid3E>, !rec_a3Cvoid3E
+// CIR:       cir.call @_ZN1aIiEC1IS_IvEEET_(%[[V5]], %[[V7]]) : (!cir.ptr<!rec_a3Cint3E>, !rec_a3Cvoid3E) -> ()
+// CIR:       cir.scope {
+// CIR:         %[[V8:.*]] = cir.alloca !rec_a3Cint3E, !cir.ptr<!rec_a3Cint3E>
+// CIR:         cir.copy %[[V5]] to %[[V8]] : !cir.ptr<!rec_a3Cint3E>
+// CIR:         %[[V9:.*]] = cir.load align(1) %[[V8]] : !cir.ptr<!rec_a3Cint3E>, !rec_a3Cint3E
+// CIR-NEXT:         cir.call exception @_Z1iI1aIiEEvT_(%[[V9]]) : (!rec_a3Cint3E) -> () cleanup {
+// CIR-NEXT:           cir.call @_ZN1aIiED1Ev(%[[V8]]) : (!cir.ptr<!rec_a3Cint3E>) -> ()
+// CIR-NEXT:           cir.call @_ZN1aIiED1Ev(%[[V5]]) : (!cir.ptr<!rec_a3Cint3E>) -> ()
+// CIR-NEXT:           cir.yield
+// CIR-NEXT:         }
+// CIR-NEXT:         cir.call @_ZN1aIiED1Ev(%[[V8]]) : (!cir.ptr<!rec_a3Cint3E>) -> ()
+// CIR-NEXT:       }
+// CIR-NEXT:       cir.call @_ZN1aIiED1Ev(%[[V5]]) : (!cir.ptr<!rec_a3Cint3E>) -> ()
+// CIR-NEXT:       cir.yield
+// CIR:     } catch [type #cir.all {
+// CIR:       %[[V7:.*]] = cir.catch_param -> !cir.ptr<!void>
+// CIR:       cir.yield
+// CIR:     }]
diff --git a/clang/test/CIR/Incubator/CodeGen/try-catch.cpp b/clang/test/CIR/Incubator/CodeGen/try-catch.cpp
new file mode 100644
index 0000000000000..8b1cdc66cab28
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/try-catch.cpp
@@ -0,0 +1,207 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-cir-flat %s -o %t.cir.flat
+// RUN: FileCheck --check-prefix=FLAT --input-file=%t.cir.flat %s
+
+double division(int a, int b);
+
+// CHECK: cir.func {{.*}} @_Z2tcv()
+unsigned long long tc() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    // CHECK: cir.scope {
+    // CHECK: %[[local_a:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+    // CHECK: %[[msg:.*]] = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["msg"]
+    // CHECK: %[[idx:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx"]
+
+    // CHECK: cir.try {
+    int a = 4;
+    z = division(x, y);
+    // CHECK: %[[div_res:.*]] = cir.call exception @_Z8divisionii({{.*}}) : (!s32i, !s32i) -> !cir.double
+    a++;
+
+  } catch (int idx) {
+    // CHECK: } catch [type #cir.global_view<@_ZTIi> : !cir.ptr<!u8i> {
+    // CHECK:   %[[catch_idx_addr:.*]] = cir.catch_param -> !cir.ptr<!s32i>
+    // CHECK:   %[[idx_load:.*]] = cir.load{{.*}} %[[catch_idx_addr]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   cir.store{{.*}} %[[idx_load]], %[[idx]] : !s32i, !cir.ptr<!s32i>
+    z = 98;
+    idx++;
+  } catch (const char* msg) {
+    // CHECK: }, type #cir.global_view<@_ZTIPKc> : !cir.ptr<!u8i> {
+    // CHECK:   %[[msg_addr:.*]] = cir.catch_param -> !cir.ptr<!s8i>
+    // CHECK:   cir.store{{.*}} %[[msg_addr]], %[[msg]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+    z = 99;
+    (void)msg[0];
+  } // CHECK: }, #cir.unwind {
+    // CHECK: cir.resume
+    // CHECK-NEXT: }
+
+  return z;
+}
+
+// CHECK: cir.func {{.*}} @_Z3tc2v
+unsigned long long tc2() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    int a = 4;
+    z = division(x, y);
+    a++;
+  } catch (int idx) {
+    z = 98;
+    idx++;
+  } catch (const char* msg) {
+    z = 99;
+    (void)msg[0];
+  } catch (...) {
+    // CHECK: }, type #cir.all {
+    // CHECK:   cir.catch_param
+    // CHECK:   cir.const #cir.int<100> : !s32i
+    z = 100;
+  }
+
+  return z;
+}
+
+// CHECK: cir.func {{.*}} @_Z3tc3v
+unsigned long long tc3() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    z = division(x, y);
+  } catch (...) {
+    // CHECK: } catch [type #cir.all {
+    // CHECK:   cir.catch_param
+    // CHECK:   cir.const #cir.int<100> : !s32i
+    z = 100;
+  }
+
+  return z;
+}
+
+// CHECK: cir.func {{.*}} @_Z3tc4v()
+unsigned long long tc4() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  // CHECK-NOT: cir.try
+  try {
+    int a = 4;
+    a++;
+
+    // CHECK: cir.scope {
+    // CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+    // CHECK-NOT: cir.alloca !cir.ptr<!cir.eh.info>
+    // CHECK: cir.const #cir.int<4> : !s32i
+    // CHECK: cir.unary(inc,
+    // CHECK: cir.store{{.*}} %11, %8 : !s32i, !cir.ptr<!s32i>
+  } catch (int idx) {
+    z = 98;
+    idx++;
+  }
+
+  return z;
+}
+
+struct S {
+  S() {};
+  int a;
+};
+
+// CHECK: cir.func {{.*}} @_Z3tc5v()
+void tc5() {
+  try {
+    S s;
+  } catch (...) {
+    tc5();
+  }
+}
+
+// CHECK: cir.try {
+// CHECK: cir.call exception @_ZN1SC2Ev({{.*}}) : (!cir.ptr<!rec_S>) -> ()
+// CHECK: cir.yield
+// CHECK: } catch [type #cir.all {
+// CHECK:  {{.*}} = cir.catch_param -> !cir.ptr<!void>
+// CHECK:  cir.call exception @_Z3tc5v() : () -> ()
+// CHECK:  cir.yield
+// CHECK: }]
+
+// CHECK: cir.func {{.*}} @_Z3tc6v()
+void tc6() {
+  int r = 1;
+  try {
+    return;
+    ++r;
+  } catch (...) {
+  }
+}
+
+// CHECK: cir.scope {
+// CHECK:   cir.try {
+// CHECK:     cir.return
+// CHECK:   ^bb1:  // no predecessors
+// CHECK:     %[[V2:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[V3:.*]] = cir.unary(inc, %[[V2]]) nsw : !s32i, !s32i
+// CHECK:     cir.store{{.*}} %[[V3]], {{.*}} : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.yield
+// CHECK:   }
+// CHECK: }
+
+// CHECK: cir.func {{.*}} @_Z3tc7v()
+void tc7() {
+  int r = 1;
+  try {
+    ++r;
+    return;
+  } catch (...) {
+  }
+}
+
+// CHECK: cir.scope {
+// CHECK:   cir.try {
+// CHECK:     %[[V2:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s32i>, !s32i
+// CHECK:     %[[V3:.*]] = cir.unary(inc, %[[V2]]) nsw : !s32i, !s32i
+// CHECK:     cir.store{{.*}} %[[V3]], {{.*}} : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.return
+// CHECK:   }
+// CHECK: }
+
+struct S2 {
+  int a, b;
+};
+
+void tc8() {
+  try {
+    S2 s{1, 2};
+  } catch (...) {
+  }
+}
+
+// CHECK: cir.scope {
+// CHECK:   %[[V0:.*]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, ["s", init] {alignment = 4 : i64}
+// CHECK:   cir.try {
+// CHECK:     %[[V1:.*]] = cir.const #cir.const_record<{#cir.int<1> : !s32i, #cir.int<2> : !s32i}> : !rec_S2
+// CHECK:     cir.store align(4) %[[V1]], %[[V0]] : !rec_S2, !cir.ptr<!rec_S2>
+// CHECK:     cir.yield
+// CHECK:   }
+// CHECK: }
+
+// FLAT: cir.func {{.*}} @_Z3tc8v()
+// FLAT:   %[[V0:.*]] = cir.alloca !rec_S2, !cir.ptr<!rec_S2>, ["s", init] {alignment = 4 : i64}
+// FLAT:   cir.br ^bb[[#B1:]]
+// FLAT: ^bb[[#B1]]:
+// FLAT:   cir.br ^bb[[#B2:]]
+// FLAT: ^bb[[#B2]]:
+// FLAT:   %[[V1:.*]] = cir.const #cir.const_record<{#cir.int<1> : !s32i, #cir.int<2> : !s32i}> : !rec_S2
+// FLAT:   cir.store align(4) %[[V1]], %[[V0]] : !rec_S2, !cir.ptr<!rec_S2>
+// FLAT:   cir.br ^bb[[#B3:]]
+// FLAT: ^bb[[#B3]]:
+// FLAT:   cir.br ^bb[[#B4:]]
+// FLAT: ^bb[[#B4]]:
+// FLAT:   cir.return
+// FLAT: }
diff --git a/clang/test/CIR/Incubator/CodeGen/type-trait.cpp b/clang/test/CIR/Incubator/CodeGen/type-trait.cpp
new file mode 100644
index 0000000000000..a117c63db0a59
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/type-trait.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -fclangir -emit-llvm %s -o - | FileCheck %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -emit-llvm %s -o - | FileCheck %s --check-prefix=LLVM
+
+// CIR-LABEL: cir.func {{.*}} @_Z4testv() -> !cir.bool
+// CIR:       %[[RETVAL:.+]] = cir.alloca !cir.bool
+// CIR:       %[[CONST_TRUE:.+]] = cir.const #true
+// CIR:       cir.store{{.*}} %[[CONST_TRUE]], %[[RETVAL]]
+// CIR:       %[[LOADED_VAL:.+]] = cir.load{{.*}} %[[RETVAL]]
+// CIR:       cir.return %[[LOADED_VAL]]
+
+// LLVM-LABEL: define dso_local {{.*}}i1 @_Z4testv()
+// LLVM:         ret i1 true
+
+namespace B {
+template <class _0p> class B {
+public:
+  typedef _0p A;
+  B() { __has_trivial_destructor(A); }
+};
+template <class _0p, class _0e0uence = B<_0p>> class A { _0e0uence A; };
+} // namespace B
+
+class A { public: B::A<A> A; };
+
+bool test() {
+  return __has_trivial_destructor(A);
+}
+
diff --git a/clang/test/CIR/Incubator/CodeGen/typedef.c b/clang/test/CIR/Incubator/CodeGen/typedef.c
new file mode 100644
index 0000000000000..7ca2210b30a0a
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/typedef.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+void local_typedef() {
+  typedef struct {int a;} Struct;
+  Struct s;
+}
+
+//CHECK:  cir.func {{.*}} @local_typedef()
+//CHECK:    {{.*}} = cir.alloca !rec_Struct, !cir.ptr<!rec_Struct>, ["s"] {alignment = 4 : i64}
+//CHECK:    cir.return
diff --git a/clang/test/CIR/Incubator/CodeGen/typeinfo b/clang/test/CIR/Incubator/CodeGen/typeinfo
new file mode 100644
index 0000000000000..a68b10302c6fe
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/typeinfo
@@ -0,0 +1,24 @@
+namespace std {
+  class type_info {
+  public:
+    virtual ~type_info();
+    const char* name() const { return __name; }
+    bool operator==(const type_info& __arg) const {
+     return __name == __arg.__name;
+    }
+
+    bool operator!=(const type_info& __arg) const {
+      return !operator==(__arg);
+    }
+
+    bool before(const type_info& __arg) const {
+      return __name < __arg.__name;
+    }
+
+    unsigned long hash_code() const {
+      return reinterpret_cast<unsigned long long>(__name);
+    }
+  protected:
+    const char *__name;
+  };
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/types-IEEE-quad.c b/clang/test/CIR/Incubator/CodeGen/types-IEEE-quad.c
new file mode 100644
index 0000000000000..4a6738cbeaa73
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/types-IEEE-quad.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir --check-prefix=CIR %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+long double i = 0;
+long double t2(long double i2) {
+    return i2 + i ;
+}
+
+// CIR: cir.global external @i = #cir.fp<0.000000e+00> : !cir.long_double<!cir.f128> {alignment = 16 : i64} loc({{.*}})
+// CIR-LABEL:   cir.func {{.*}} @t2(%arg0: !cir.long_double<!cir.f128> loc({{.*}})) -> !cir.long_double<!cir.f128>
+// CIR-NEXT:    %[[#I2:]] = cir.alloca !cir.long_double<!cir.f128>, !cir.ptr<!cir.long_double<!cir.f128>>, ["i2", init] {alignment = 16 : i64}
+// CIR-NEXT:    %[[#RETVAL:]] = cir.alloca !cir.long_double<!cir.f128>, !cir.ptr<!cir.long_double<!cir.f128>>, ["__retval"] {alignment = 16 : i64}
+// CIR-NEXT:    cir.store %arg0, %[[#I2]] : !cir.long_double<!cir.f128>, !cir.ptr<!cir.long_double<!cir.f128>>
+// CIR-NEXT:    %[[#I2_LOAD:]] = cir.load{{.*}} %[[#I2]] : !cir.ptr<!cir.long_double<!cir.f128>>, !cir.long_double<!cir.f128>
+// CIR-NEXT:    %[[#I:]] = cir.get_global @i : !cir.ptr<!cir.long_double<!cir.f128>>
+// CIR-NEXT:    %[[#I_LOAD:]] = cir.load{{.*}} %[[#I]] : !cir.ptr<!cir.long_double<!cir.f128>>, !cir.long_double<!cir.f128>
+// CIR-NEXT:    %[[#ADD:]] = cir.binop(add, %[[#I2_LOAD]], %[[#I_LOAD]]) : !cir.long_double<!cir.f128>
+// CIR-NEXT:    cir.store{{.*}} %[[#ADD]], %[[#RETVAL]] : !cir.long_double<!cir.f128>, !cir.ptr<!cir.long_double<!cir.f128>>
+// CIR-NEXT:    %[[#RETVAL_LOAD:]] = cir.load{{.*}} %[[#RETVAL]] : !cir.ptr<!cir.long_double<!cir.f128>>, !cir.long_double<!cir.f128>
+// CIR-NEXT:    cir.return %[[#RETVAL_LOAD]] : !cir.long_double<!cir.f128>
+
+//LLVM:         @i = global fp128 0xL00000000000000000000000000000000, align 16
+//LLVM-LABEL:   define dso_local fp128 @t2(fp128 noundef %i2)
+//LLVM-NEXT :   entry:
+//LLVM-NEXT :   %[[#I2_ADDR:]]= alloca fp128, align 16
+//LLVM-NEXT :   store fp128 %i2, ptr %[[#I2_ADDR]], align 16
+//LLVM-NEXT :   %[[#I2_LOAD:]] = load fp128, ptr %[[#I2_ADDR]], align 16
+//LLVM-NEXT :   %[[#I_LOAD:]] = load fp128, ptr @i, align 16
+//LLVM-NEXT :   %[[#RETVAL:]] = fadd fp128 %[[#I2_LOAD]], %[[#I_LOAD]]
+//LLVM-NEXT :   ret fp128 %[[#RETVAL]]
diff --git a/clang/test/CIR/Incubator/CodeGen/types-nullptr.cpp b/clang/test/CIR/Incubator/CodeGen/types-nullptr.cpp
new file mode 100644
index 0000000000000..a598a1a724f6e
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/types-nullptr.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef decltype(nullptr) nullptr_t;
+void f() { nullptr_t t = nullptr; }
+
+// CHECK: %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK: %1 = cir.const #cir.ptr<null> : !cir.ptr<!void>
+// CHECK: cir.store{{.*}} %1, %0 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
diff --git a/clang/test/CIR/Incubator/CodeGen/types.c b/clang/test/CIR/Incubator/CodeGen/types.c
new file mode 100644
index 0000000000000..adeba2d6e84ae
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/types.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cpp.cir
+// RUN: FileCheck --input-file=%t.cpp.cir --check-prefix=CHECK-CPP %s
+
+int t0(int i) { return i; }
+unsigned int t1(unsigned int i) { return i; }
+
+char t2(char i) { return i; }
+unsigned char t3(unsigned char i) { return i; }
+
+short t4(short i) { return i; }
+unsigned short t5(unsigned short i) { return i; }
+
+float t6(float i) { return i; }
+double t7(double i) { return i; }
+long double t10(long double i) { return i; }
+
+void t8(void) {}
+
+#ifdef __cplusplus
+bool t9(bool b) { return b; }
+#endif
+
+// CHECK: cir.func {{.*}} @t0(%arg0: !s32i loc({{.*}})) -> !s32i
+// CHECK: cir.func {{.*}} @t1(%arg0: !u32i loc({{.*}})) -> !u32i
+// CHECK: cir.func {{.*}} @t2(%arg0: !s8i loc({{.*}})) -> !s8i
+// CHECK: cir.func {{.*}} @t3(%arg0: !u8i loc({{.*}})) -> !u8i
+// CHECK: cir.func {{.*}} @t4(%arg0: !s16i loc({{.*}})) -> !s16i
+// CHECK: cir.func {{.*}} @t5(%arg0: !u16i loc({{.*}})) -> !u16i
+// CHECK: cir.func {{.*}} @t6(%arg0: !cir.float loc({{.*}})) -> !cir.float
+// CHECK: cir.func {{.*}} @t7(%arg0: !cir.double loc({{.*}})) -> !cir.double
+// CHECK: cir.func {{.*}} @t10(%arg0: !cir.long_double<!cir.f80> loc({{.*}})) -> !cir.long_double<!cir.f80>
+// CHECK: cir.func {{.*}} @t8()
+
+// CHECK-CPP: cir.func {{.*}} @_Z2t0i(%arg0: !s32i loc({{.*}})) -> !s32i
+// CHECK-CPP: cir.func {{.*}} @_Z2t1j(%arg0: !u32i loc({{.*}})) -> !u32i
+// CHECK-CPP: cir.func {{.*}} @_Z2t2c(%arg0: !s8i loc({{.*}})) -> !s8i
+// CHECK-CPP: cir.func {{.*}} @_Z2t3h(%arg0: !u8i loc({{.*}})) -> !u8i
+// CHECK-CPP: cir.func {{.*}} @_Z2t4s(%arg0: !s16i loc({{.*}})) -> !s16i
+// CHECK-CPP: cir.func {{.*}} @_Z2t5t(%arg0: !u16i loc({{.*}})) -> !u16i
+// CHECK-CPP: cir.func {{.*}} @_Z2t6f(%arg0: !cir.float loc({{.*}})) -> !cir.float
+// CHECK-CPP: cir.func {{.*}} @_Z2t7d(%arg0: !cir.double loc({{.*}})) -> !cir.double
+// CHECK-CPP: cir.func {{.*}} @{{.+}}t10{{.+}}(%arg0: !cir.long_double<!cir.f80> loc({{.*}})) -> !cir.long_double<!cir.f80>
+// CHECK-CPP: cir.func {{.*}} @_Z2t8v()
+// CHECK-CPP: cir.func {{.*}} @_Z2t9b(%arg0: !cir.bool loc({{.*}})) -> !cir.bool
diff --git a/clang/test/CIR/Incubator/CodeGen/unary-deref.cpp b/clang/test/CIR/Incubator/CodeGen/unary-deref.cpp
new file mode 100644
index 0000000000000..39599c86b2e99
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/unary-deref.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+struct MyIntPointer {
+  int *ptr = nullptr;
+  int read() const { return *ptr; }
+};
+
+void foo() {
+  MyIntPointer p;
+  (void)p.read();
+}
+
+// CHECK:  cir.func {{.*}} @_ZNK12MyIntPointer4readEv
+// CHECK:  %2 = cir.load{{.*}} %0
+// CHECK:  %3 = cir.get_member %2[0] {name = "ptr"}
+// CHECK:  %4 = cir.load deref{{.*}} %3 : !cir.ptr<!cir.ptr<!s32i>>
+// CHECK:  %5 = cir.load{{.*}} %4
diff --git a/clang/test/CIR/Incubator/CodeGen/unary.c b/clang/test/CIR/Incubator/CodeGen/unary.c
new file mode 100644
index 0000000000000..9e3293c9e64ff
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/unary.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int valueNegationInt(int i) {
+// CHECK: cir.func {{.*}} @valueNegationInt(
+  return !i;
+  // CHECK: %[[#INT:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!s32i>, !s32i
+  // CHECK: %[[#INT_TO_BOOL:]] = cir.cast int_to_bool %[[#INT]] : !s32i -> !cir.bool
+  // CHECK: = cir.unary(not, %[[#INT_TO_BOOL]]) : !cir.bool, !cir.bool
+}
+
+short valueNegationShort(short s) {
+// CHECK: cir.func {{.*}} @valueNegationShort(
+  return !s;
+  // CHECK: %[[#SHORT:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!s16i>, !s16i
+  // CHECK: %[[#SHORT_TO_BOOL:]] = cir.cast int_to_bool %[[#SHORT]] : !s16i -> !cir.bool
+  // CHECK: = cir.unary(not, %[[#SHORT_TO_BOOL]]) : !cir.bool, !cir.bool
+}
+
+long valueNegationLong(long l) {
+// CHECK: cir.func {{.*}} @valueNegationLong(
+  return !l;
+  // CHECK: %[[#LONG:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!s64i>, !s64i
+  // CHECK: %[[#LONG_TO_BOOL:]] = cir.cast int_to_bool %[[#LONG]] : !s64i -> !cir.bool
+  // CHECK: = cir.unary(not, %[[#LONG_TO_BOOL]]) : !cir.bool, !cir.bool
+}
+
+float valueNegationFloat(float f) {
+// CHECK: cir.func {{.*}} @valueNegationFloat(
+  return !f;
+  // CHECK: %[[#FLOAT:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.float>, !cir.float
+  // CHECK: %[[#FLOAT_TO_BOOL:]] = cir.cast float_to_bool %[[#FLOAT]] : !cir.float -> !cir.bool
+  // CHECK: %[[#FLOAT_NOT:]] = cir.unary(not, %[[#FLOAT_TO_BOOL]]) : !cir.bool, !cir.bool
+  // CHECK: = cir.cast bool_to_int %[[#FLOAT_NOT]] : !cir.bool -> !s32i
+}
+
+double valueNegationDouble(double d) {
+// CHECK: cir.func {{.*}} @valueNegationDouble(
+  return !d;
+  // CHECK: %[[#DOUBLE:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.double>, !cir.double
+  // CHECK: %[[#DOUBLE_TO_BOOL:]] = cir.cast float_to_bool %[[#DOUBLE]] : !cir.double -> !cir.bool
+  // CHECK: %[[#DOUBLE_NOT:]] = cir.unary(not, %[[#DOUBLE_TO_BOOL]]) : !cir.bool, !cir.bool
+  // CHECK: = cir.cast bool_to_int %[[#DOUBLE_NOT]] : !cir.bool -> !s32i
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/unary.cpp b/clang/test/CIR/Incubator/CodeGen/unary.cpp
new file mode 100644
index 0000000000000..033a5d39c4206
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/unary.cpp
@@ -0,0 +1,232 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -Wno-unused-value -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+unsigned up0() {
+  unsigned a = 1;
+  return +a;
+}
+
+// CHECK: cir.func {{.*}} @_Z3up0v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#INPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#OUTPUT:]] = cir.unary(plus, %[[#INPUT]])
+// CHECK: cir.store{{.*}} %[[#OUTPUT]], %[[#RET]]
+
+unsigned um0() {
+  unsigned a = 1;
+  return -a;
+}
+
+// CHECK: cir.func {{.*}} @_Z3um0v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#INPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#OUTPUT:]] = cir.unary(minus, %[[#INPUT]])
+// CHECK: cir.store{{.*}} %[[#OUTPUT]], %[[#RET]]
+
+unsigned un0() {
+  unsigned a = 1;
+  return ~a; // a ^ -1 , not
+}
+
+// CHECK: cir.func {{.*}} @_Z3un0v() -> !u32i
+// CHECK: %[[#RET:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init]
+// CHECK: %[[#INPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#OUTPUT:]] = cir.unary(not, %[[#INPUT]])
+// CHECK: cir.store{{.*}} %[[#OUTPUT]], %[[#RET]]
+
+int inc0() {
+  int a = 1;
+  ++a;
+  return a;
+}
+
+// CHECK: cir.func {{.*}} @_Z4inc0v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store{{.*}} %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#INPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(inc, %[[#INPUT]]) nsw
+// CHECK: cir.store{{.*}} %[[#INCREMENTED]], %[[#A]]
+// CHECK: %[[#A_TO_OUTPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: cir.store{{.*}} %[[#A_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load{{.*}} %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+int dec0() {
+  int a = 1;
+  --a;
+  return a;
+}
+
+// CHECK: cir.func {{.*}} @_Z4dec0v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store{{.*}} %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#INPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(dec, %[[#INPUT]]) nsw
+// CHECK: cir.store{{.*}} %[[#INCREMENTED]], %[[#A]]
+// CHECK: %[[#A_TO_OUTPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: cir.store{{.*}} %[[#A_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load{{.*}} %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+
+int inc1() {
+  int a = 1;
+  a++;
+  return a;
+}
+
+// CHECK: cir.func {{.*}} @_Z4inc1v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store{{.*}} %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#INPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(inc, %[[#INPUT]]) nsw
+// CHECK: cir.store{{.*}} %[[#INCREMENTED]], %[[#A]]
+// CHECK: %[[#A_TO_OUTPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: cir.store{{.*}} %[[#A_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load{{.*}} %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+int dec1() {
+  int a = 1;
+  a--;
+  return a;
+}
+
+// CHECK: cir.func {{.*}} @_Z4dec1v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store{{.*}} %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#INPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(dec, %[[#INPUT]]) nsw
+// CHECK: cir.store{{.*}} %[[#INCREMENTED]], %[[#A]]
+// CHECK: %[[#A_TO_OUTPUT:]] = cir.load{{.*}} %[[#A]]
+// CHECK: cir.store{{.*}} %[[#A_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load{{.*}} %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+// Ensure the increment is performed after the assignment to b.
+int inc2() {
+  int a = 1;
+  int b = a++;
+  return b;
+}
+
+// CHECK: cir.func {{.*}} @_Z4inc2v() -> !s32i
+// CHECK: %[[#RET:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CHECK: %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CHECK: %[[#B:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CHECK: %[[#ATMP:]] = cir.const #cir.int<1> : !s32i
+// CHECK: cir.store{{.*}} %[[#ATMP]], %[[#A]] : !s32i
+// CHECK: %[[#ATOB:]] = cir.load{{.*}} %[[#A]]
+// CHECK: %[[#INCREMENTED:]] = cir.unary(inc, %[[#ATOB]]) nsw
+// CHECK: cir.store{{.*}} %[[#INCREMENTED]], %[[#A]]
+// CHECK: cir.store{{.*}} %[[#ATOB]], %[[#B]]
+// CHECK: %[[#B_TO_OUTPUT:]] = cir.load{{.*}} %[[#B]]
+// CHECK: cir.store{{.*}} %[[#B_TO_OUTPUT]], %[[#RET]]
+// CHECK: %[[#OUTPUT:]] = cir.load{{.*}} %[[#RET]]
+// CHECK: cir.return %[[#OUTPUT]] : !s32i
+
+int *inc_p(int *i) {
+  --i;
+  ++i;
+  return i;
+}
+
+// CHECK: cir.func {{.*}} @_Z5inc_pPi(%arg0: !cir.ptr<!s32i>
+
+// CHECK:   %[[#i_addr:]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["i", init] {alignment = 8 : i64}
+// CHECK:   %[[#i_dec:]] = cir.load{{.*}} %[[#i_addr]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:   %[[#dec_const:]] = cir.const #cir.int<-1> : !s32i
+// CHECK:   = cir.ptr_stride %[[#i_dec]], %[[#dec_const]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+
+// CHECK:   %[[#i_inc:]] = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK:   %[[#inc_const:]] = cir.const #cir.int<1> : !s32i
+// CHECK:   = cir.ptr_stride %[[#i_inc]], %[[#inc_const]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+
+void floats(float f) {
+// CHECK: cir.func {{.*}} @{{.+}}floats{{.+}}
+  f = +f; // CHECK: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.float, !cir.float
+  f = -f; // CHECK: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.float, !cir.float
+  ++f; // CHECK: = cir.unary(inc, %{{[0-9]+}}) : !cir.float, !cir.float
+  --f; // CHECK: = cir.unary(dec, %{{[0-9]+}}) : !cir.float, !cir.float
+  f++; // CHECK: = cir.unary(inc, %{{[0-9]+}}) : !cir.float, !cir.float
+  f--; // CHECK: = cir.unary(dec, %{{[0-9]+}}) : !cir.float, !cir.float
+
+  f = !f;
+  // CHECK: %[[#F_BOOL:]] = cir.cast float_to_bool %{{[0-9]+}} : !cir.float -> !cir.bool
+  // CHECK: = cir.unary(not, %[[#F_BOOL]]) : !cir.bool, !cir.bool
+}
+
+void doubles(double d) {
+// CHECK: cir.func {{.*}} @{{.+}}doubles{{.+}}
+  d = +d; // CHECK: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.double, !cir.double
+  d = -d; // CHECK: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.double, !cir.double
+  ++d; // CHECK: = cir.unary(inc, %{{[0-9]+}}) : !cir.double, !cir.double
+  --d; // CHECK: = cir.unary(dec, %{{[0-9]+}}) : !cir.double, !cir.double
+  d++; // CHECK: = cir.unary(inc, %{{[0-9]+}}) : !cir.double, !cir.double
+  d--; // CHECK: = cir.unary(dec, %{{[0-9]+}}) : !cir.double, !cir.double
+
+  d = !d;
+  // CHECK: %[[#D_BOOL:]] = cir.cast float_to_bool %{{[0-9]+}} : !cir.double -> !cir.bool
+  // CHECK: = cir.unary(not, %[[#D_BOOL]]) : !cir.bool, !cir.bool
+}
+
+void pointers(int *p) {
+// CHECK: cir.func {{.*}} @{{[^ ]+}}pointers
+  // CHECK: %[[#P:]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+  p = +p;
+  // CHECK: cir.unary(plus, %{{.+}}) : !cir.ptr<!s32i>, !cir.ptr<!s32i>
+
+  ++p;
+  // CHECK:  %[[#INC:]] = cir.const #cir.int<1> : !s32i
+  // CHECK:  %[[#RES:]] = cir.ptr_stride %{{.+}}, %[[#INC]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+  // CHECK:  cir.store{{.*}} %[[#RES]], %[[#P]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  --p;
+  // CHECK:  %[[#DEC:]] = cir.const #cir.int<-1> : !s32i
+  // CHECK:  %[[#RES:]] = cir.ptr_stride %{{.+}}, %[[#DEC]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+  // CHECK:  cir.store{{.*}} %[[#RES]], %[[#P]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  p++;
+  // CHECK:  %[[#INC:]] = cir.const #cir.int<1> : !s32i
+  // CHECK:  %[[#RES:]] = cir.ptr_stride %{{.+}}, %[[#INC]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+  // CHECK:  cir.store{{.*}} %[[#RES]], %[[#P]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+  p--;
+  // CHECK:  %[[#DEC:]] = cir.const #cir.int<-1> : !s32i
+  // CHECK:  %[[#RES:]] = cir.ptr_stride %{{.+}}, %[[#DEC]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+  // CHECK:  cir.store{{.*}} %[[#RES]], %[[#P]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+  bool p1 = !p;
+  // %[[BOOLPTR:]] = cir.cast ptr_to_bool %15 : !cir.ptr<!s32i> -> !cir.bool
+  // cir.unary(not, %[[BOOLPTR]]) : !cir.bool, !cir.bool
+}
+
+void chars(char c) {
+// CHECK: cir.func {{.*}} @{{.+}}chars{{.+}}
+
+  int c1 = +c;
+  // CHECK: %[[#PROMO:]] = cir.cast integral %{{.+}} : !s8i -> !s32i
+  // CHECK: cir.unary(plus, %[[#PROMO]]) : !s32i, !s32i
+  int c2 = -c;
+  // CHECK: %[[#PROMO:]] = cir.cast integral %{{.+}} : !s8i -> !s32i
+  // CHECK: cir.unary(minus, %[[#PROMO]]) nsw : !s32i, !s32i
+
+  // Chars can go through some integer promotion codegen paths even when not promoted.
+  ++c; // CHECK: cir.unary(inc, %10) : !s8i, !s8i
+  --c; // CHECK: cir.unary(dec, %12) : !s8i, !s8i
+  c++; // CHECK: cir.unary(inc, %14) : !s8i, !s8i
+  c--; // CHECK: cir.unary(dec, %16) : !s8i, !s8i
+
+  bool c3 = !c;
+  // CHECK: %[[#C_BOOL:]] = cir.cast int_to_bool %{{[0-9]+}} : !s8i -> !cir.bool
+  // CHECK: cir.unary(not, %[[#C_BOOL]]) : !cir.bool, !cir.bool
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/union-array.c b/clang/test/CIR/Incubator/CodeGen/union-array.c
new file mode 100644
index 0000000000000..0976809e96afa
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/union-array.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fno-clangir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+typedef struct {
+  char a;
+} S_1;
+
+typedef struct {
+  long a, b;
+} S_2;
+
+typedef union {
+  S_1 a;
+  S_2 b;
+} U;
+
+typedef union {
+   int f0;
+   int f1;
+} U1;
+
+static U1 g = {5};
+// LLVM: @__const.bar.x = private constant [2 x ptr] [ptr @g, ptr @g]
+// LLVM: @g = internal global { i32 } { i32 5 }
+// FIXME: LLVM output should be: @g = internal global %union.U { i32 5 }
+
+// LLVM: @g2 = global ptr getelementptr inbounds nuw (i8, ptr @g1, i64 24)
+
+void foo() { U arr[2] = {{.b = {1, 2}}, {.a = {1}}}; }
+
+// CIR: cir.const #cir.const_record<{#cir.const_record<{#cir.const_record<{#cir.int<1> : !s64i, #cir.int<2> : !s64i}> : {{.*}}}> : {{.*}}, #cir.const_record<{#cir.const_record<{#cir.int<1> : !s8i}> : {{.*}}, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 15>}>
+// LLVM: store { { %struct.S_2 }, { %struct.S_1, [15 x i8] } } { { %struct.S_2 } { %struct.S_2 { i64 1, i64 2 } }, { %struct.S_1, [15 x i8] } { %struct.S_1 { i8 1 }, [15 x i8] zeroinitializer } }
+
+void bar(void) {
+  int *x[2] = { &g.f0, &g.f0 };
+}
+// CIR: cir.global "private" internal dso_local @g = #cir.const_record<{#cir.int<5> : !s32i}> : !rec_anon_struct
+// CIR: cir.const #cir.const_array<[#cir.global_view<@g> : !cir.ptr<!s32i>, #cir.global_view<@g> : !cir.ptr<!s32i>]> : !cir.array<!cir.ptr<!s32i> x 2>
+
+typedef struct {
+    long s0;
+    int  s1;
+} S_3;
+
+typedef union {
+   int  f0;
+   S_3 f1;
+} U2;
+
+
+static U2 g1[3] = {{0x42},{0x42},{0x42}};
+int* g2 = &g1[1].f1.s1;
+// CIR: cir.global external @g2 = #cir.global_view<@g1, [1, 1, 4]> : !cir.ptr<!s32i>
+
+void baz(void) {
+  (*g2) = 4;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/union-empty.cpp b/clang/test/CIR/Incubator/CodeGen/union-empty.cpp
new file mode 100644
index 0000000000000..43ffb319ada4b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/union-empty.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+union EmptyUnion {
+  EmptyUnion() = default;
+};
+
+void f0() {
+  EmptyUnion e;
+};
+
+// CIR: !rec_EmptyUnion = !cir.record<union "EmptyUnion" padded {!u8i}>
+// CIR: cir.func {{.*}} @_Z2f0v()
+// CIR:   %0 = cir.alloca !rec_EmptyUnion, !cir.ptr<!rec_EmptyUnion>, ["e"] {alignment = 1 : i64}
+// CIR:   cir.return
+
+// LLVM: %union.EmptyUnion = type { i8 }
+// LLVM: define dso_local void @_Z2f0v()
+// LLVM:   %1 = alloca %union.EmptyUnion, i64 1, align 1
+// LLVM:   ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/union-init.c b/clang/test/CIR/Incubator/CodeGen/union-init.c
new file mode 100644
index 0000000000000..18d95b8514d74
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/union-init.c
@@ -0,0 +1,63 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+typedef union {
+  int value;
+  struct {
+    int x : 16;
+    int y : 16;
+  };
+} A;
+
+void foo(int x) {
+  A a = {.x = x};
+}
+
+// CHECK-DAG: ![[anon0:.*]] = !cir.record<struct  {!u32i}>
+// CHECK-DAG: ![[anon:.*]] = !cir.record<struct  {!s32i}>
+// CHECK-DAG: #[[bfi_x:.*]] = #cir.bitfield_info<name = "x", storage_type = !u32i, size = 16, offset = 0, is_signed = true>
+// CHECK-DAG: #[[bfi_y:.*]] = #cir.bitfield_info<name = "y", storage_type = !u32i, size = 16, offset = 16, is_signed = true>
+// CHECK-DAG: ![[anon1:.*]] = !cir.record<union "{{.*}}" {!u32i, !cir.array<!u8i x 4>}
+
+// CHECK-LABEL:   cir.func {{.*}} @foo(
+// CHECK:  %[[VAL_1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK:  %[[VAL_2:.*]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a", init] {alignment = 4 : i64}
+// CHECK:  cir.store{{.*}} {{.*}}, %[[VAL_1]] : !s32i, !cir.ptr<!s32i>
+// CHECK:  %[[VAL_3:.*]] = cir.get_member %[[VAL_2]][1] {name = ""} : !cir.ptr<!rec_A> -> !cir.ptr<!rec_anon2E0>
+// CHECK:  %[[VAL_4:.*]] = cir.get_member %[[VAL_3]][0] {name = "x"} : !cir.ptr<!rec_anon2E0> -> !cir.ptr<!u32i>
+// CHECK:  %[[VAL_5:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!s32i>, !s32i
+// CHECK:  %[[VAL_6:.*]] = cir.set_bitfield align(4) (#[[bfi_x]], %[[VAL_4]] : !cir.ptr<!u32i>, %[[VAL_5]] : !s32i) -> !s32i
+// CHECK:  %[[VAL_7:.*]] = cir.get_member %[[VAL_3]][0] {name = "y"} : !cir.ptr<!rec_anon2E0> -> !cir.ptr<!u32i>
+// CHECK:  %[[VAL_8:.*]] = cir.const #cir.int<0> : !s32i
+// CHECK:  %[[VAL_9:.*]] = cir.set_bitfield align(4) (#[[bfi_y]], %[[VAL_7]] : !cir.ptr<!u32i>, %[[VAL_8]] : !s32i) -> !s32i
+// CHECK:  cir.return
+
+union { int i; float f; } u = { };
+// CHECK: cir.global external @u = #cir.zero : ![[anon]]
+
+unsigned is_little(void) {
+  const union {
+    unsigned int u;
+    unsigned char c[4];
+  } one = {1};
+  return one.c[0];
+}
+
+// CHECK: cir.func {{.*}} @is_little
+// CHECK: %[[VAL_1:.*]] = cir.get_global @is_little.one : !cir.ptr<![[anon0]]>
+// CHECK: %[[VAL_2:.*]] = cir.cast bitcast %[[VAL_1]] : !cir.ptr<![[anon0]]> -> !cir.ptr<![[anon1]]>
+// CHECK: %[[VAL_3:.*]] = cir.get_member %[[VAL_2]][1] {name = "c"} : !cir.ptr<![[anon1]]> -> !cir.ptr<!cir.array<!u8i x 4>>
+
+typedef union {
+  int x;
+} U;
+
+// CHECK: %[[VAL_0:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// CHECK: %[[VAL_1:.*]] = cir.alloca !rec_U, !cir.ptr<!rec_U>, ["u", init] {alignment = 4 : i64}
+// CHECK: cir.store{{.*}} %arg0, %[[VAL_0]] : !s32i, !cir.ptr<!s32i>
+// CHECK: %[[VAL_2:.*]] = cir.cast bitcast %[[VAL_1]] : !cir.ptr<!rec_U> -> !cir.ptr<!s32i>
+// CHECK: %[[VAL_3:.*]] = cir.load{{.*}} %[[VAL_0]] : !cir.ptr<!s32i>, !s32i
+// CHECK: cir.store{{.*}} %[[VAL_3]], %[[VAL_2]] : !s32i, !cir.ptr<!s32i>
+
+void union_cast(int x) {
+  U u = (U) x;
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/union-padding.c b/clang/test/CIR/Incubator/CodeGen/union-padding.c
new file mode 100644
index 0000000000000..ba8c56c6c7be4
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/union-padding.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s -o - | FileCheck %s
+
+typedef union {    
+   short f0;
+   signed   f1 : 11;
+   unsigned f2 : 2;
+   signed   f3 : 5;
+} U;
+
+static U g1[2] = {{65534UL}, {65534UL}};
+static short *g2[1] = {&g1[1].f0};
+static short **g3 = &g2[0];
+
+short use() {
+  U u;
+  return **g3;
+}
+// CHECK:       !rec_U = !cir.record<union "U" padded {!s16i, !u16i, !u8i, !u8i, !cir.array<!u8i x 2>}>
+// CHECK:       !rec_anon_struct = !cir.record<struct  {!s16i, !cir.array<!u8i x 2>}>
+
+// CHECK:       @g3 = #cir.global_view<@g2> : !cir.ptr<!cir.ptr<!s16i>>
+// CHECK:       @g2 = #cir.const_array<[#cir.global_view<@g1, [1]> : !cir.ptr<!s16i>]> : !cir.array<!cir.ptr<!s16i> x 1>
+
+// CHECK:       @g1 = 
+// CHECK-SAME:    #cir.const_array<[
+// CHECK-SAME:      #cir.const_record<{#cir.int<-2> : !s16i, 
+// CHECK-SAME:      #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>}> : !rec_anon_struct, 
+// CHECK-SAME:      #cir.const_record<{#cir.int<-2> : !s16i,
+// CHECK-SAME:      #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array<!u8i x 2>}> : !rec_anon_struct
+// CHECK-SAME:    ]> : !cir.array<!rec_anon_struct x 2>
+
+
diff --git a/clang/test/CIR/Incubator/CodeGen/union.cpp b/clang/test/CIR/Incubator/CodeGen/union.cpp
new file mode 100644
index 0000000000000..0c0166165af14
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/union.cpp
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+typedef struct { int x; } yolo;
+typedef union { yolo y; struct { int lifecnt; }; } yolm;
+typedef union { yolo y; struct { int *lifecnt; int genpad; }; } yolm2;
+typedef union { yolo y; struct { bool life; int genpad; }; } yolm3;
+
+// CHECK-DAG: !rec_U23A3ADummy = !cir.record<struct "U2::Dummy" {!s16i, !cir.float} #cir.record.decl.ast>
+// CHECK-DAG: !rec_anon2E0 = !cir.record<struct "anon.0" {!s32i} #cir.record.decl.ast>
+// CHECK-DAG: !rec_anon2E2 = !cir.record<struct "anon.2" {!cir.bool, !s32i} #cir.record.decl.ast>
+// CHECK-DAG: !rec_yolo = !cir.record<struct "yolo" {!s32i} #cir.record.decl.ast>
+// CHECK-DAG: !rec_anon2E1 = !cir.record<struct "anon.1" {!cir.ptr<!s32i>, !s32i} #cir.record.decl.ast>
+
+// CHECK-DAG: !rec_yolm = !cir.record<union "yolm" {!rec_yolo, !rec_anon2E0}>
+// CHECK-DAG: !rec_yolm3 = !cir.record<union "yolm3" {!rec_yolo, !rec_anon2E2}>
+// CHECK-DAG: !rec_yolm2 = !cir.record<union "yolm2" {!rec_yolo, !rec_anon2E1}>
+
+// Should generate a union type with all members preserved.
+union U {
+  bool b;
+  short s;
+  int i;
+  float f;
+  double d;
+};
+// CHECK-DAG: !rec_U = !cir.record<union "U" {!cir.bool, !s16i, !s32i, !cir.float, !cir.double}>
+
+// Should generate unions with complex members.
+union U2 {
+  bool b;
+  struct Dummy {
+    short s;
+    float f;
+  } s;
+} u2;
+// CHECK-DAG: !cir.record<union "U2" {!cir.bool, !rec_U23A3ADummy} #cir.record.decl.ast>
+
+// Should genereate unions without padding.
+union U3 {
+  short b;
+  U u;
+} u3;
+// CHECK-DAG: !rec_U3 = !cir.record<union "U3" {!s16i, !rec_U} #cir.record.decl.ast>
+
+void m() {
+  yolm q;
+  yolm2 q2;
+  yolm3 q3;
+}
+
+// CHECK:   cir.func {{.*}} @_Z1mv()
+// CHECK:   cir.alloca !rec_yolm, !cir.ptr<!rec_yolm>, ["q"] {alignment = 4 : i64}
+// CHECK:   cir.alloca !rec_yolm2, !cir.ptr<!rec_yolm2>, ["q2"] {alignment = 8 : i64}
+// CHECK:   cir.alloca !rec_yolm3, !cir.ptr<!rec_yolm3>, ["q3"] {alignment = 4 : i64}
+
+void shouldGenerateUnionAccess(union U u) {
+  u.b = true;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[0] {name = "b"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.bool>
+  // CHECK: cir.store{{.*}} %{{.+}}, %[[#BASE]] : !cir.bool, !cir.ptr<!cir.bool>
+  u.b;
+  // CHECK: cir.get_member %0[0] {name = "b"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.bool>
+  u.i = 1;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[2] {name = "i"} : !cir.ptr<!rec_U> -> !cir.ptr<!s32i>
+  // CHECK: cir.store{{.*}} %{{.+}}, %[[#BASE]] : !s32i, !cir.ptr<!s32i>
+  u.i;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[2] {name = "i"} : !cir.ptr<!rec_U> -> !cir.ptr<!s32i>
+  u.f = 0.1F;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[3] {name = "f"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.float>
+  // CHECK: cir.store{{.*}} %{{.+}}, %[[#BASE]] : !cir.float, !cir.ptr<!cir.float>
+  u.f;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[3] {name = "f"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.float>
+  u.d = 0.1;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[4] {name = "d"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.double>
+  // CHECK: cir.store{{.*}} %{{.+}}, %[[#BASE]] : !cir.double, !cir.ptr<!cir.double>
+  u.d;
+  // CHECK: %[[#BASE:]] = cir.get_member %0[4] {name = "d"} : !cir.ptr<!rec_U> -> !cir.ptr<!cir.double>
+}
+
+typedef union {
+  short a;
+  int b;
+} A;
+
+void noCrushOnDifferentSizes() {
+  A a = {0};
+  // CHECK:  %[[#TMP0:]] = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["a"] {alignment = 4 : i64}
+  // CHECK:  %[[#TMP1:]] = cir.cast bitcast %[[#TMP0]] : !cir.ptr<!rec_A> -> !cir.ptr<!rec_anon_struct>
+  // CHECK:  %[[#TMP2:]] = cir.const #cir.zero : !rec_anon_struct
+  // CHECK:  cir.store{{.*}} %[[#TMP2]], %[[#TMP1]] : !rec_anon_struct, !cir.ptr<!rec_anon_struct>
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/unreachable.cpp b/clang/test/CIR/Incubator/CodeGen/unreachable.cpp
new file mode 100644
index 0000000000000..8a99a83a39749
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/unreachable.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+void foo();
+
+void basic() {
+  foo();
+  __builtin_unreachable();
+}
+
+//      CHECK: cir.func {{.*}} @_Z5basicv()
+// CHECK-NEXT:   cir.call @_Z3foov() : () -> ()
+// CHECK-NEXT:   cir.unreachable
+// CHECK-NEXT: }
+
+void code_after_unreachable() {
+  foo();
+  __builtin_unreachable();
+  foo();
+}
+
+// CHECK: cir.func {{.*}} @_Z22code_after_unreachablev()
+// CHECK:   cir.call @_Z3foov() : () -> ()
+// CHECK:   cir.unreachable
+// CHECK: ^{{.+}}:
+// CHECK:   cir.call @_Z3foov() : () -> ()
+// CHECK:   cir.return
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/CodeGen/uwtable.cpp b/clang/test/CIR/Incubator/CodeGen/uwtable.cpp
new file mode 100644
index 0000000000000..174b2f31a7834
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/uwtable.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t-none.cir
+// RUN: FileCheck %s --input-file=%t-none.cir --check-prefix=CIR-NONE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -funwind-tables=0 %s -o %t-none-explicit.cir
+// RUN: FileCheck %s --input-file=%t-none-explicit.cir --check-prefix=CIR-NONE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -funwind-tables=1 %s -o %t-sync.cir
+// RUN: FileCheck %s --input-file=%t-sync.cir --check-prefix=CIR-SYNC
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -funwind-tables=2 %s -o %t-async.cir
+// RUN: FileCheck %s --input-file=%t-async.cir --check-prefix=CIR-ASYNC
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-none.ll
+// RUN: FileCheck %s --input-file=%t-none.ll --check-prefix=LLVM-NONE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -funwind-tables=0 %s -o %t-none-explicit.ll
+// RUN: FileCheck %s --input-file=%t-none-explicit.ll --check-prefix=LLVM-NONE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -funwind-tables=1 %s -o %t-sync.ll
+// RUN: FileCheck %s --input-file=%t-sync.ll --check-prefix=LLVM-SYNC
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -funwind-tables=2 %s -o %t-async.ll
+// RUN: FileCheck %s --input-file=%t-async.ll --check-prefix=LLVM-ASYNC
+
+// CIR-NONE-NOT: #cir.uwtable
+
+// CIR-SYNC-DAG: module {{.*}} attributes {{{.*}}cir.uwtable = #cir.uwtable<sync>
+// CIR-SYNC-DAG:   cir.func {{.*}} @_Z1fv() extra(#[[f_attr:.*]])
+// CIR-SYNC-DAG:   cir.func {{.*}} @_Z1gv() extra(#[[g_attr:.*]])
+// CIR-SYNC-DAG: #[[f_attr]] = #cir<extra({{{.*}}uwtable = #cir.uwtable<sync>
+// CIR-SYNC-DAG: #[[g_attr]] =
+// CIR-SYNC-NOT:   #cir.uwtable
+
+// CIR-ASYNC-DAG: module {{.*}} attributes {{{.*}}cir.uwtable = #cir.uwtable<async>
+// CIR-ASYNC-DAG:   cir.func {{.*}} @_Z1fv() extra(#[[f_attr:.*]])
+// CIR-ASYNC-DAG:   cir.func {{.*}} @_Z1gv() extra(#[[g_attr:.*]])
+// CIR-ASYNC-DAG: #[[f_attr]] = #cir<extra({{{.*}}uwtable = #cir.uwtable<async>
+// CIR-ASYNC-DAG: #[[g_attr]] =
+// CIR-ASYNC-NOT:   #cir.uwtable
+
+// Avoid matching "uwtable" in the ModuleID and source_filename comments.
+// LLVM-NONE:     define {{.*}} @_Z1fv()
+// LLVM-NONE-NOT: uwtable
+
+// LLVM-SYNC:     define {{.*}} @_Z1fv() #[[#F_ATTRS:]]
+// LLVM-SYNC:     define {{.*}} @_Z1gv() #[[#G_ATTRS:]]
+// LLVM-SYNC:     attributes #[[#F_ATTRS]] = {{{.*}}uwtable(sync)
+// LLVM-SYNC:     attributes #[[#G_ATTRS]] =
+// LLVM-SYNC-NOT:   uwtable
+// LLVM-SYNC-DAG: ![[#METADATA:]] = !{i32 7, !"uwtable", i32 1}
+// LLVM-SYNC-DAG: !llvm.module.flags = !{{{.*}}[[#METADATA]]
+
+// LLVM-ASYNC:     define {{.*}} @_Z1fv() #[[#ATTRS:]]
+// LLVM-ASYNC:     define {{.*}} @_Z1gv() #[[#G_ATTRS:]]
+// LLVM-ASYNC:     attributes #[[#ATTRS]] = {{{.*}}uwtable{{ }}
+// LLVM-ASYNC:     attributes #[[#G_ATTRS]] =
+// LLVM-ASYNC-NOT:   uwtable
+// LLVM-ASYNC-DAG: ![[#METADATA:]] = !{i32 7, !"uwtable", i32 2}
+// LLVM-ASYNC-DAG: !llvm.module.flags = !{{{.*}}[[#METADATA]]
+void f() {}
+
+[[clang::nouwtable]] void g() {}
diff --git a/clang/test/CIR/Incubator/CodeGen/var-arg-float.c b/clang/test/CIR/Incubator/CodeGen/var-arg-float.c
new file mode 100644
index 0000000000000..27cf9d1c69446
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/var-arg-float.c
@@ -0,0 +1,118 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+#include <stdarg.h>
+
+double f1(int n, ...) {
+  va_list valist;
+  va_start(valist, n);
+  double res = va_arg(valist, double);
+  va_end(valist);
+  return res;
+}
+
+// BEFORE: !rec___va_list = !cir.record<struct "__va_list" {!cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !s32i, !s32i}
+// BEFORE:  cir.func {{.*}} @f1(%arg0: !s32i, ...) -> !cir.double
+// BEFORE:  [[RETP:%.*]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"]
+// BEFORE:  [[RESP:%.*]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["res", init]
+// BEFORE:  cir.va.start [[VARLIST:%.*]] : !cir.ptr<!rec___va_list>
+// BEFORE:  [[TMP0:%.*]] = cir.va.arg [[VARLIST]] : (!cir.ptr<!rec___va_list>) -> !cir.double
+// BEFORE:  cir.store{{.*}} [[TMP0]], [[RESP]] : !cir.double, !cir.ptr<!cir.double>
+// BEFORE:  cir.va.end [[VARLIST]] : !cir.ptr<!rec___va_list>
+// BEFORE:  [[RES:%.*]] = cir.load{{.*}} [[RESP]] : !cir.ptr<!cir.double>, !cir.double
+// BEFORE:   cir.store{{.*}} [[RES]], [[RETP]] : !cir.double, !cir.ptr<!cir.double>
+// BEFORE:  [[RETV:%.*]] = cir.load{{.*}} [[RETP]] : !cir.ptr<!cir.double>, !cir.double
+// BEFORE:   cir.return [[RETV]] : !cir.double
+
+// beginning block cir code
+// AFTER: !rec___va_list = !cir.record<struct "__va_list" {!cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !s32i, !s32i}
+// AFTER:  cir.func {{.*}} @f1(%arg0: !s32i, ...) -> !cir.double
+// AFTER:  [[RETP:%.*]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"]
+// AFTER:  [[RESP:%.*]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["res", init]
+// AFTER:  cir.va.start [[VARLIST:%.*]] : !cir.ptr<!rec___va_list>
+// AFTER:  [[VR_OFFS_P:%.*]] = cir.get_member [[VARLIST]][4] {name = "vr_offs"} : !cir.ptr<!rec___va_list> -> !cir.ptr<!s32i>
+// AFTER:  [[VR_OFFS:%.*]] = cir.load{{.*}} [[VR_OFFS_P]] : !cir.ptr<!s32i>, !s32i
+// AFTER:  [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// AFTER:  [[CMP0:%.*]] = cir.cmp(ge, [[VR_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT:  cir.brcond [[CMP0]] [[BB_ON_STACK:\^bb.*]], [[BB_MAY_REG:\^bb.*]]
+
+// AFTER: [[BB_MAY_REG]]:
+// AFTER-NEXT: [[SIXTEEN:%.*]] = cir.const #cir.int<16> : !s32i
+// AFTER-NEXT: [[NEW_REG_OFFS:%.*]] = cir.binop(add, [[VR_OFFS]], [[SIXTEEN]]) : !s32i
+// AFTER-NEXT: cir.store{{.*}} [[NEW_REG_OFFS]], [[VR_OFFS_P]] : !s32i, !cir.ptr<!s32i>
+// AFTER-NEXT: [[CMP1:%.*]] = cir.cmp(le, [[NEW_REG_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT: cir.brcond [[CMP1]] [[BB_IN_REG:\^bb.*]], [[BB_ON_STACK]]
+
+
+// AFTER: [[BB_IN_REG]]:
+// AFTER-NEXT: [[VR_TOP_P:%.*]] = cir.get_member [[VARLIST]][2] {name = "vr_top"} : !cir.ptr<!rec___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[VR_TOP:%.*]] = cir.load{{.*}} [[VR_TOP_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[TMP2:%.*]] = cir.cast bitcast [[VR_TOP]] : !cir.ptr<!void> -> !cir.ptr<i8>
+// AFTER-NEXT: [[TMP3:%.*]] = cir.ptr_stride [[TMP2]], [[VR_OFFS]] : (!cir.ptr<i8>, !s32i) -> !cir.ptr<i8>
+// AFTER-NEXT: [[IN_REG_OUTPUT:%.*]] = cir.cast bitcast [[TMP3]] : !cir.ptr<i8> -> !cir.ptr<!void>
+// AFTER-NEXT: cir.br [[BB_END:\^bb.*]]([[IN_REG_OUTPUT]] : !cir.ptr<!void>)
+
+
+// AFTER: [[BB_ON_STACK]]:
+// AFTER-NEXT: [[STACK_P:%.*]] = cir.get_member [[VARLIST]][0] {name = "stack"} : !cir.ptr<!rec___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[STACK_V:%.*]] = cir.load{{.*}} [[STACK_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[EIGHT_IN_PTR_ARITH:%.*]]  = cir.const #cir.int<8> : !u64i
+// AFTER-NEXT: [[TMP4:%.*]] = cir.cast bitcast [[STACK_V]] : !cir.ptr<!void> -> !cir.ptr<i8>
+// AFTER-NEXT: [[TMP5:%.*]] = cir.ptr_stride [[TMP4]], [[EIGHT_IN_PTR_ARITH]] : (!cir.ptr<i8>, !u64i) -> !cir.ptr<i8>
+// AFTER-NEXT: [[NEW_STACK_V:%.*]] = cir.cast bitcast [[TMP5]] : !cir.ptr<i8> -> !cir.ptr<!void>
+// AFTER-NEXT: cir.store{{.*}} [[NEW_STACK_V]], [[STACK_P]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: cir.br [[BB_END]]([[STACK_V]] : !cir.ptr<!void>)
+
+// AFTER-NEXT: [[BB_END]]([[BLK_ARG:%.*]]: !cir.ptr<!void>):  // 2 preds: [[BB_IN_REG]], [[BB_ON_STACK]]
+// AFTER-NEXT:  [[TMP0:%.*]] = cir.cast bitcast [[BLK_ARG]] : !cir.ptr<!void> -> !cir.ptr<!cir.double>
+// AFTER-NEXT:  [[TMP1:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!cir.double>, !cir.double
+// AFTER:   cir.store{{.*}} [[TMP1]], [[RESP]] : !cir.double, !cir.ptr<!cir.double>
+// AFTER:   cir.va.end [[VARLIST]] : !cir.ptr<!rec___va_list>
+// AFTER:   [[RES:%.*]] = cir.load{{.*}} [[RESP]] : !cir.ptr<!cir.double>, !cir.double
+// AFTER:   cir.store{{.*}} [[RES]], [[RETP]] : !cir.double, !cir.ptr<!cir.double>
+// AFTER:  [[RETV:%.*]] = cir.load{{.*}} [[RETP]] : !cir.ptr<!cir.double>, !cir.double
+// AFTER:   cir.return [[RETV]] : !cir.double
+
+// beginning block llvm code
+// LLVM: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+// LLVM: define dso_local double @f1(i32 %0, ...)
+// LLVM: [[ARGN:%.*]] = alloca i32, i64 1, align 4
+// LLVM: [[RETP:%.*]] = alloca double, i64 1, align 8
+// LLVM: [[RESP:%.*]] = alloca double, i64 1, align 8
+// LLVM: call void @llvm.va_start.p0(ptr [[VARLIST:%.*]])
+// LLVM: [[VR_OFFS_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 4
+// LLVM: [[VR_OFFS:%.*]] = load i32, ptr [[VR_OFFS_P]], align 4
+// LLVM-NEXT: [[CMP0:%.*]] = icmp sge i32 [[VR_OFFS]], 0
+// LLVM-NEXT: br i1 [[CMP0]], label %[[BB_ON_STACK:.*]], label %[[BB_MAY_REG:.*]]
+
+// LLVM:  [[BB_MAY_REG]]: ;
+// LLVM-NEXT: [[NEW_REG_OFFS:%.*]] = add i32 [[VR_OFFS]], 16
+// LLVM-NEXT: store i32 [[NEW_REG_OFFS]], ptr [[VR_OFFS_P]], align 4
+// LLVM-NEXT: [[CMP1:%.*]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// LLVM-NEXT: br i1 [[CMP1]], label %[[BB_IN_REG:.*]], label %[[BB_ON_STACK]]
+
+// LLVM:  [[BB_IN_REG]]: ;
+// LLVM-NEXT: [[VR_TOP_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 2
+// LLVM-NEXT: [[VR_TOP:%.*]] = load ptr, ptr [[VR_TOP_P]], align 8
+// LLVM-NEXT: [[EXT64_VR_OFFS:%.*]] = sext i32 [[VR_OFFS]] to i64
+// LLVM-NEXT: [[IN_REG_OUTPUT:%.*]] = getelementptr i8, ptr [[VR_TOP]], i64 [[EXT64_VR_OFFS]]
+// LLVM-NEXT: br label %[[BB_END:.*]]
+
+// LLVM:  [[BB_ON_STACK]]: ;
+// LLVM-NEXT: [[STACK_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 0
+// LLVM-NEXT: [[STACK_V:%.*]] = load ptr, ptr [[STACK_P]], align 8
+// LLVM-NEXT: [[NEW_STACK_V:%.*]] = getelementptr i8, ptr [[STACK_V]], i64 8
+// LLVM-NEXT: store ptr [[NEW_STACK_V]], ptr [[STACK_P]], align 8
+// LLVM-NEXT: br label %[[BB_END]]
+
+// LLVM: [[BB_END]]: ; preds = %[[BB_ON_STACK]], %[[BB_IN_REG]]
+// LLVM-NEXT: [[PHIP:%.*]] = phi ptr [ [[IN_REG_OUTPUT]], %[[BB_IN_REG]] ], [ [[STACK_V]], %[[BB_ON_STACK]] ]
+// LLVM-NEXT: [[PHIV:%.*]] = load double, ptr [[PHIP]], align 8
+// LLVM-NEXT: store double [[PHIV]], ptr [[RESP]], align 8
+// LLVM: call void @llvm.va_end.p0(ptr [[VARLIST]])
+// LLVM: [[RES:%.*]] = load double, ptr [[RESP]], align 8
+// LLVM: store double [[RES]], ptr [[RETP]], align 8
+// LLVM: [[RETV:%.*]] = load double, ptr [[RETP]], align 8
+// LLVM-NEXT: ret double [[RETV]]
diff --git a/clang/test/CIR/Incubator/CodeGen/var-arg-scope.c b/clang/test/CIR/Incubator/CodeGen/var-arg-scope.c
new file mode 100644
index 0000000000000..8dee702223dfb
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/var-arg-scope.c
@@ -0,0 +1,105 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+void f1(__builtin_va_list c) {
+  { __builtin_va_arg(c, void *); }
+}
+
+// BEFORE: cir.func {{.*}} @f1(%arg0: !rec___va_list) attributes
+// BEFORE: [[VAR_LIST:%.*]] = cir.alloca !rec___va_list, !cir.ptr<!rec___va_list>, ["c", init] {alignment = 8 : i64}
+// BEFORE: cir.store %arg0, [[VAR_LIST]] : !rec___va_list, !cir.ptr<!rec___va_list>
+// BEFORE: cir.scope {
+// BEFORE-NEXT: [[TMP:%.*]] = cir.va.arg [[VAR_LIST]] : (!cir.ptr<!rec___va_list>) -> !cir.ptr<!void>
+// BEFORE-NEXT: }
+// BEFORE-NEXT: cir.return
+
+// AFTER: cir.func {{.*}} @f1(%arg0: !rec___va_list) attributes
+// AFTER: [[VARLIST:%.*]] = cir.alloca !rec___va_list, !cir.ptr<!rec___va_list>, ["c", init] {alignment = 8 : i64}
+// AFTER: cir.store %arg0, [[VARLIST]] : !rec___va_list, !cir.ptr<!rec___va_list>
+// AFTER: cir.scope {
+//
+// AFTER-NEXT: [[GR_OFFS_P:%.*]] = cir.get_member [[VARLIST]][3] {name = "gr_offs"} : !cir.ptr<!rec___va_list> -> !cir.ptr<!s32i>
+// AFTER-NEXT: [[GR_OFFS:%.*]] = cir.load [[GR_OFFS_P]] : !cir.ptr<!s32i>
+// AFTER:  [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// AFTER:  [[CMP0:%.*]] = cir.cmp(ge, [[GR_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT:  cir.brcond [[CMP0]] [[BB_ON_STACK:\^bb.*]], [[BB_MAY_REG:\^bb.*]]
+
+// This BB calculates to see if it is possible to pass arg in register.
+// AFTER: [[BB_MAY_REG]]:
+// AFTER-NEXT: [[EIGHT:%.*]] = cir.const #cir.int<8> : !s32i
+// AFTER-NEXT: [[NEW_REG_OFFS:%.*]] = cir.binop(add, [[GR_OFFS]], [[EIGHT]]) : !s32i
+// AFTER-NEXT: cir.store [[NEW_REG_OFFS]], [[GR_OFFS_P]] : !s32i, !cir.ptr<!s32i>
+// AFTER-NEXT: [[CMP1:%.*]] = cir.cmp(le, [[NEW_REG_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT: cir.brcond [[CMP1]] [[BB_IN_REG:\^bb.*]], [[BB_ON_STACK]]
+
+// arg is passed in register.
+// AFTER: [[BB_IN_REG]]:
+// AFTER-NEXT: [[GR_TOP_P:%.*]] = cir.get_member [[VARLIST]][1] {name = "gr_top"} : !cir.ptr<!rec___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[GR_TOP:%.*]] = cir.load [[GR_TOP_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[TMP2:%.*]] = cir.cast bitcast [[GR_TOP]] : !cir.ptr<!void> -> !cir.ptr<i8>
+// AFTER-NEXT: [[TMP3:%.*]] = cir.ptr_stride [[TMP2]], [[GR_OFFS]] : (!cir.ptr<i8>, !s32i) -> !cir.ptr<i8>
+// AFTER-NEXT: [[IN_REG_OUTPUT:%.*]] = cir.cast bitcast [[TMP3]] : !cir.ptr<i8> -> !cir.ptr<!void>
+// AFTER-NEXT: cir.br [[BB_END:\^bb.*]]([[IN_REG_OUTPUT]] : !cir.ptr<!void>)
+
+// arg is passed in stack.
+// AFTER: [[BB_ON_STACK]]:
+// AFTER-NEXT: [[STACK_P:%.*]] = cir.get_member [[VARLIST]][0] {name = "stack"} : !cir.ptr<!rec___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[STACK_V:%.*]] = cir.load [[STACK_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[EIGHT_IN_PTR_ARITH:%.*]]  = cir.const #cir.int<8> : !u64i
+// AFTER-NEXT: [[TMP4:%.*]] = cir.cast bitcast [[STACK_V]] : !cir.ptr<!void> -> !cir.ptr<i8>
+// AFTER-NEXT: [[TMP5:%.*]] = cir.ptr_stride [[TMP4]], [[EIGHT_IN_PTR_ARITH]] : (!cir.ptr<i8>, !u64i) -> !cir.ptr<i8>
+// AFTER-NEXT: [[NEW_STACK_V:%.*]] = cir.cast bitcast [[TMP5]] : !cir.ptr<i8> -> !cir.ptr<!void>
+// AFTER-NEXT: cir.store [[NEW_STACK_V]], [[STACK_P]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: cir.br [[BB_END]]([[STACK_V]] : !cir.ptr<!void>)
+
+// This BB is where different path converges. BLK_ARG is the arg addr which
+// could come from IN_REG block where arg is passed in register, and saved in callee
+// stack's argument saving area.
+// Or from ON_STACK block which means arg is passed in from caller's stack area.
+// AFTER-NEXT: [[BB_END]]([[BLK_ARG:%.*]]: !cir.ptr<!void>):  // 2 preds: [[BB_IN_REG]], [[BB_ON_STACK]]
+// AFTER-NEXT:  [[TMP0:%.*]] = cir.cast bitcast [[BLK_ARG]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT:  [[TMP1:%.*]] = cir.load [[TMP0]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: cir.yield
+// AFTER-NEXT: }
+// AFTER-NEXT: cir.return
+
+// LLVM: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+// LLVM: define dso_local void @f1(%struct.__va_list %0)
+// LLVM: [[VARLIST:%.*]] = alloca %struct.__va_list, i64 1, align 8
+// LLVM: br label %[[SCOPE_FRONT:.*]]
+
+// LLVM: [[SCOPE_FRONT]]: ; preds = %1
+// LLVM: [[GR_OFFS_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 3
+// LLVM: [[GR_OFFS:%.*]] = load i32, ptr [[GR_OFFS_P]], align 4
+// LLVM-NEXT: [[CMP0:%.*]] = icmp sge i32 [[GR_OFFS]], 0
+// LLVM-NEXT: br i1 [[CMP0]], label %[[BB_ON_STACK:.*]], label %[[BB_MAY_REG:.*]]
+
+// LLVM:  [[BB_MAY_REG]]: ;
+// LLVM: [[NEW_REG_OFFS:%.*]] = add i32 [[GR_OFFS]], 8
+// LLVM: store i32 [[NEW_REG_OFFS]], ptr [[GR_OFFS_P]], align 4
+// LLVM-NEXT: [[CMP1:%.*]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// LLVM-NEXT: br i1 [[CMP1]], label %[[BB_IN_REG:.*]], label %[[BB_ON_STACK]]
+
+// LLVM:  [[BB_IN_REG]]: ;
+// LLVM-NEXT: [[GR_TOP_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 1
+// LLVM-NEXT: [[GR_TOP:%.*]] = load ptr, ptr [[GR_TOP_P]], align 8
+// LLVM-NEXT: [[EXT64_GR_OFFS:%.*]] = sext i32 [[GR_OFFS]] to i64
+// LLVM-NEXT: [[IN_REG_OUTPUT:%.*]] = getelementptr i8, ptr [[GR_TOP]], i64 [[EXT64_GR_OFFS]]
+// LLVM-NEXT: br label %[[BB_END:.*]]
+
+// LLVM:  [[BB_ON_STACK]]: ;
+// LLVM-NEXT: [[STACK_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 0
+// LLVM-NEXT: [[STACK_V:%.*]] = load ptr, ptr [[STACK_P]], align 8
+// LLVM-NEXT: [[NEW_STACK_V:%.*]] = getelementptr i8, ptr [[STACK_V]], i64 8
+// LLVM-NEXT: store ptr [[NEW_STACK_V]], ptr [[STACK_P]], align 8
+// LLVM-NEXT: br label %[[BB_END]]
+
+// LLVM: [[BB_END]]: ; preds = %[[BB_ON_STACK]], %[[BB_IN_REG]]
+// LLVM-NEXT: [[PHIP:%.*]] = phi ptr [ [[IN_REG_OUTPUT]], %[[BB_IN_REG]] ], [ [[STACK_V]], %[[BB_ON_STACK]] ]
+// LLVM-NEXT: [[PHIV:%.*]] = load ptr, ptr [[PHIP]], align 8
+// LLVM-NEXT: br label %[[OUT_SCOPE:.*]]
+
+// LLVM: [[OUT_SCOPE]]: ; preds = %[[BB_END]]
+// LLVM-NEXT:  ret void
diff --git a/clang/test/CIR/Incubator/CodeGen/var-arg.c b/clang/test/CIR/Incubator/CodeGen/var-arg.c
new file mode 100644
index 0000000000000..cea9cdb88d6a6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/var-arg.c
@@ -0,0 +1,121 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare -fno-clangir-call-conv-lowering %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=AFTER
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+#include <stdarg.h>
+
+int f1(int n, ...) {
+  va_list valist;
+  va_start(valist, n);
+  int res = va_arg(valist, int);
+  va_end(valist);
+  return res;
+}
+
+// BEFORE: !rec___va_list = !cir.record<struct "__va_list" {!cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !s32i, !s32i}
+// BEFORE:  cir.func {{.*}} @f1(%arg0: !s32i, ...) -> !s32i
+// BEFORE:  [[RETP:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// BEFORE:  [[RESP:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["res", init]
+// BEFORE:  cir.va.start [[VARLIST:%.*]] : !cir.ptr<!rec___va_list>
+// BEFORE:  [[TMP0:%.*]] = cir.va.arg [[VARLIST]] : (!cir.ptr<!rec___va_list>) -> !s32i
+// BEFORE:  cir.store{{.*}} [[TMP0]], [[RESP]] : !s32i, !cir.ptr<!s32i>
+// BEFORE:  cir.va.end [[VARLIST]] : !cir.ptr<!rec___va_list>
+// BEFORE:  [[RES:%.*]] = cir.load{{.*}} [[RESP]] : !cir.ptr<!s32i>, !s32i
+// BEFORE:   cir.store{{.*}} [[RES]], [[RETP]] : !s32i, !cir.ptr<!s32i>
+// BEFORE:  [[RETV:%.*]] = cir.load{{.*}} [[RETP]] : !cir.ptr<!s32i>, !s32i
+// BEFORE:   cir.return [[RETV]] : !s32i
+
+// AFTER: !rec___va_list = !cir.record<struct "__va_list" {!cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !s32i, !s32i}
+// AFTER:  cir.func {{.*}} @f1(%arg0: !s32i, ...) -> !s32i
+// AFTER:  [[RETP:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// AFTER:  [[RESP:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["res", init]
+// AFTER:  cir.va.start [[VARLIST:%.*]] : !cir.ptr<!rec___va_list>
+// AFTER:  [[GR_OFFS_P:%.*]] = cir.get_member [[VARLIST]][3] {name = "gr_offs"} : !cir.ptr<!rec___va_list> -> !cir.ptr<!s32i>
+// AFTER:  [[GR_OFFS:%.*]] = cir.load{{.*}} [[GR_OFFS_P]] : !cir.ptr<!s32i>, !s32i
+// AFTER:  [[ZERO:%.*]] = cir.const #cir.int<0> : !s32i
+// AFTER:  [[CMP0:%.*]] = cir.cmp(ge, [[GR_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT:  cir.brcond [[CMP0]] [[BB_ON_STACK:\^bb.*]], [[BB_MAY_REG:\^bb.*]]
+
+// This BB calculates to see if it is possible to pass arg in register.
+// AFTER: [[BB_MAY_REG]]:
+// AFTER-NEXT: [[EIGHT:%.*]] = cir.const #cir.int<8> : !s32i
+// AFTER-NEXT: [[NEW_REG_OFFS:%.*]] = cir.binop(add, [[GR_OFFS]], [[EIGHT]]) : !s32i
+// AFTER-NEXT: cir.store{{.*}} [[NEW_REG_OFFS]], [[GR_OFFS_P]] : !s32i, !cir.ptr<!s32i>
+// AFTER-NEXT: [[CMP1:%.*]] = cir.cmp(le, [[NEW_REG_OFFS]], [[ZERO]]) : !s32i, !cir.bool
+// AFTER-NEXT: cir.brcond [[CMP1]] [[BB_IN_REG:\^bb.*]], [[BB_ON_STACK]]
+
+// arg is passed in register.
+// AFTER: [[BB_IN_REG]]:
+// AFTER-NEXT: [[GR_TOP_P:%.*]] = cir.get_member [[VARLIST]][1] {name = "gr_top"} : !cir.ptr<!rec___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[GR_TOP:%.*]] = cir.load{{.*}} [[GR_TOP_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[TMP2:%.*]] = cir.cast bitcast [[GR_TOP]] : !cir.ptr<!void> -> !cir.ptr<i8>
+// AFTER-NEXT: [[TMP3:%.*]] = cir.ptr_stride [[TMP2]], [[GR_OFFS]] : (!cir.ptr<i8>, !s32i) -> !cir.ptr<i8>
+// AFTER-NEXT: [[IN_REG_OUTPUT:%.*]] = cir.cast bitcast [[TMP3]] : !cir.ptr<i8> -> !cir.ptr<!void>
+// AFTER-NEXT: cir.br [[BB_END:\^bb.*]]([[IN_REG_OUTPUT]] : !cir.ptr<!void>)
+
+// arg is passed in stack.
+// AFTER: [[BB_ON_STACK]]:
+// AFTER-NEXT: [[STACK_P:%.*]] = cir.get_member [[VARLIST]][0] {name = "stack"} : !cir.ptr<!rec___va_list> -> !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: [[STACK_V:%.*]] = cir.load{{.*}} [[STACK_P]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// AFTER-NEXT: [[EIGHT_IN_PTR_ARITH:%.*]]  = cir.const #cir.int<8> : !u64i
+// AFTER-NEXT: [[TMP4:%.*]] = cir.cast bitcast [[STACK_V]] : !cir.ptr<!void> -> !cir.ptr<i8>
+// AFTER-NEXT: [[TMP5:%.*]] = cir.ptr_stride [[TMP4]], [[EIGHT_IN_PTR_ARITH]] : (!cir.ptr<i8>, !u64i) -> !cir.ptr<i8>
+// AFTER-NEXT: [[NEW_STACK_V:%.*]] = cir.cast bitcast [[TMP5]] : !cir.ptr<i8> -> !cir.ptr<!void>
+// AFTER-NEXT: cir.store{{.*}} [[NEW_STACK_V]], [[STACK_P]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// AFTER-NEXT: cir.br [[BB_END]]([[STACK_V]] : !cir.ptr<!void>)
+
+// This BB is where different path converges. BLK_ARG is the arg addr which
+// could come from IN_REG block where arg is passed in register, and saved in callee
+// stack's argument saving area.
+// Or from ON_STACK block which means arg is passed in from caller's stack area.
+// AFTER-NEXT: [[BB_END]]([[BLK_ARG:%.*]]: !cir.ptr<!void>):  // 2 preds: [[BB_IN_REG]], [[BB_ON_STACK]]
+// AFTER-NEXT:  [[TMP0:%.*]] = cir.cast bitcast [[BLK_ARG]] : !cir.ptr<!void> -> !cir.ptr<!s32i>
+// AFTER-NEXT:  [[TMP1:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!s32i>, !s32i
+// AFTER:   cir.store{{.*}} [[TMP1]], [[RESP]] : !s32i, !cir.ptr<!s32i>
+// AFTER:   cir.va.end [[VARLIST]] : !cir.ptr<!rec___va_list>
+// AFTER:   [[RES:%.*]] = cir.load{{.*}} [[RESP]] : !cir.ptr<!s32i>, !s32i
+// AFTER:   cir.store{{.*}} [[RES]], [[RETP]] : !s32i, !cir.ptr<!s32i>
+// AFTER:  [[RETV:%.*]] = cir.load{{.*}} [[RETP]] : !cir.ptr<!s32i>, !s32i
+// AFTER:   cir.return [[RETV]] : !s32i
+
+// LLVM: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+// LLVM: define dso_local i32 @f1(i32 %0, ...)
+// LLVM: [[ARGN:%.*]] = alloca i32, i64 1, align 4
+// LLVM: [[RETP:%.*]] = alloca i32, i64 1, align 4
+// LLVM: [[RESP:%.*]] = alloca i32, i64 1, align 4
+// LLVM: call void @llvm.va_start.p0(ptr [[VARLIST:%.*]])
+// LLVM: [[GR_OFFS_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 3
+// LLVM: [[GR_OFFS:%.*]] = load i32, ptr [[GR_OFFS_P]], align 4
+// LLVM-NEXT: [[CMP0:%.*]] = icmp sge i32 [[GR_OFFS]], 0
+// LLVM-NEXT: br i1 [[CMP0]], label %[[BB_ON_STACK:.*]], label %[[BB_MAY_REG:.*]]
+
+// LLVM:  [[BB_MAY_REG]]: ;
+// LLVM: [[NEW_REG_OFFS:%.*]] = add i32 [[GR_OFFS]], 8
+// LLVM: store i32 [[NEW_REG_OFFS]], ptr [[GR_OFFS_P]], align 4
+// LLVM-NEXT: [[CMP1:%.*]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// LLVM-NEXT: br i1 [[CMP1]], label %[[BB_IN_REG:.*]], label %[[BB_ON_STACK]]
+
+// LLVM:  [[BB_IN_REG]]: ;
+// LLVM-NEXT: [[GR_TOP_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 1
+// LLVM-NEXT: [[GR_TOP:%.*]] = load ptr, ptr [[GR_TOP_P]], align 8
+// LLVM-NEXT: [[EXT64_GR_OFFS:%.*]] = sext i32 [[GR_OFFS]] to i64
+// LLVM-NEXT: [[IN_REG_OUTPUT:%.*]] = getelementptr i8, ptr [[GR_TOP]], i64 [[EXT64_GR_OFFS]]
+// LLVM-NEXT: br label %[[BB_END:.*]]
+
+// LLVM:  [[BB_ON_STACK]]: ;
+// LLVM-NEXT: [[STACK_P:%.*]] = getelementptr %struct.__va_list, ptr [[VARLIST]], i32 0, i32 0
+// LLVM-NEXT: [[STACK_V:%.*]] = load ptr, ptr [[STACK_P]], align 8
+// LLVM-NEXT: [[NEW_STACK_V:%.*]] = getelementptr i8, ptr [[STACK_V]], i64 8
+// LLVM-NEXT: store ptr [[NEW_STACK_V]], ptr [[STACK_P]], align 8
+// LLVM-NEXT: br label %[[BB_END]]
+
+// LLVM: [[BB_END]]: ; preds = %[[BB_ON_STACK]], %[[BB_IN_REG]]
+// LLVM-NEXT: [[PHIP:%.*]] = phi ptr [ [[IN_REG_OUTPUT]], %[[BB_IN_REG]] ], [ [[STACK_V]], %[[BB_ON_STACK]] ]
+// LLVM-NEXT: [[PHIV:%.*]] = load i32, ptr [[PHIP]], align 4
+// LLVM-NEXT: store i32 [[PHIV]], ptr [[RESP]], align 4
+// LLVM: call void @llvm.va_end.p0(ptr [[VARLIST]])
+// LLVM: [[RES:%.*]] = load i32, ptr [[RESP]], align 4
+// LLVM: store i32 [[RES]], ptr [[RETP]], align 4
+// LLVM: [[RETV:%.*]] = load i32, ptr [[RETP]], align 4
+// LLVM-NEXT: ret i32 [[RETV]]
diff --git a/clang/test/CIR/Incubator/CodeGen/variadic-ctor.cpp b/clang/test/CIR/Incubator/CodeGen/variadic-ctor.cpp
new file mode 100644
index 0000000000000..88c6d299358ab
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/variadic-ctor.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -std=c++20 -fclangir -emit-cir -triple x86_64-unknown-linux-gnu %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+
+class A {
+public:
+    A(void *, ...);
+};
+
+A a(nullptr, 1, "str");
+
+// CIR: cir.func {{.*}} @_ZN1AC1EPvz(!cir.ptr<!rec_A>, !cir.ptr<!void>, ...)
diff --git a/clang/test/CIR/Incubator/CodeGen/variadics.c b/clang/test/CIR/Incubator/CodeGen/variadics.c
new file mode 100644
index 0000000000000..c75198a678062
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/variadics.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -x c++ -std=c++20 -triple aarch64-none-linux-android24  -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir 2>&1 | FileCheck %s
+
+typedef __builtin_va_list va_list;
+
+#define va_start(ap, param) __builtin_va_start(ap, param)
+#define va_end(ap)          __builtin_va_end(ap)
+#define va_arg(ap, type)    __builtin_va_arg(ap, type)
+#define va_copy(dst, src)   __builtin_va_copy(dst, src)
+
+// CHECK: [[VALISTTYPE:!.+va_list.*]] = !cir.record<struct "{{.*}}__va_list
+
+int average(int count, ...) {
+// CHECK: cir.func {{.*}} @{{.*}}average{{.*}}(%arg0: !s32i, ...) -> !s32i
+// AMR64_CHECK: cir.func {{.*}} @{{.*}}average{{.*}}(%arg0: !s32i loc({{.+}}), ...) -> !s32i
+    va_list args, args_copy;
+    va_start(args, count);
+    // CHECK: cir.va.start %{{[0-9]+}} : !cir.ptr<[[VALISTTYPE]]>
+
+    va_copy(args_copy, args);
+    // CHECK: cir.va.copy %{{[0-9]+}} to %{{[0-9]+}} : !cir.ptr<[[VALISTTYPE]]>, !cir.ptr<[[VALISTTYPE]]>
+
+    int sum = 0;
+    for(int i = 0; i < count; i++) {
+        sum += va_arg(args, int);
+        // CHECK: %{{[0-9]+}} = cir.va.arg %{{[0-9]+}} : (!cir.ptr<[[VALISTTYPE]]>) -> !s32i
+    }
+
+    va_end(args);
+    // CHECK: cir.va.end %{{[0-9]+}} : !cir.ptr<[[VALISTTYPE]]>
+
+    return count > 0 ? sum / count : 0;
+}
+
+int test(void) {
+  return average(5, 1, 2, 3, 4, 5);
+  // CHECK: cir.call @{{.*}}average{{.*}}(%{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}) : (!s32i, !s32i, !s32i, !s32i, !s32i, !s32i) -> !s32i
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/vbase.cpp b/clang/test/CIR/Incubator/CodeGen/vbase.cpp
new file mode 100644
index 0000000000000..c54433239a57d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vbase.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+struct A {
+  int a;
+};
+
+struct B:  virtual A {
+  int b;
+};
+
+void ppp() { B b; }
+
+
+// Vtable definition for B
+// CIR:  cir.global constant linkonce_odr @_ZTV1B = #cir.vtable<{#cir.const_array<[#cir.ptr<12 : i64> : !cir.ptr<!u8i>, #cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1B> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}>
+
+// VTT for B.
+// CIR:  cir.global constant linkonce_odr @_ZTT1B = #cir.const_array<[#cir.global_view<@_ZTV1B, [0 : i32, 3 : i32]> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 1>
+
+// CIR:  cir.global "private" external @_ZTVN10__cxxabiv121__vmi_class_type_infoE
+
+// Type info name for B
+// CIR:  cir.global constant linkonce_odr comdat @_ZTS1B = #cir.const_array<"1B" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
+
+// CIR:  cir.global "private" external @_ZTVN10__cxxabiv117__class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+
+// Type info name for A
+// CIR:  cir.global constant linkonce_odr comdat @_ZTS1A = #cir.const_array<"1A" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
+
+// Type info A.
+// CIR:  cir.global constant external @_ZTI1A = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1A> : !cir.ptr<!u8i>}>
+
+// Type info B.
+// CIR:  cir.global constant external @_ZTI1B = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv121__vmi_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1B> : !cir.ptr<!u8i>, #cir.int<0> : !u32i, #cir.int<1> : !u32i, #cir.global_view<@_ZTI1A> : !cir.ptr<!u8i>, #cir.int<-6141> : !s64i}>
+
+// LLVM: $_ZTS1B = comdat any
+// LLVM: $_ZTS1A = comdat any
+
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// LLVM: @_ZTV1B = linkonce_odr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 12 to ptr), ptr null, ptr @_ZTI1B] }
+// LLVM: @_ZTT1B = linkonce_odr constant [1 x ptr] [ptr getelementptr inbounds nuw (i8, ptr @_ZTV1B, i64 24)]
+// LLVM: @_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global ptr
+// LLVM: @_ZTS1B = linkonce_odr constant [2 x i8] c"1B", comdat
+// LLVM: @_ZTVN10__cxxabiv117__class_type_infoE = external global ptr
+// LLVM: @_ZTS1A = linkonce_odr constant [2 x i8] c"1A", comdat
+// LLVM: @_ZTI1A = constant { ptr, ptr } { ptr getelementptr inbounds nuw (i8, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 16), ptr @_ZTS1A }
+// LLVM: @_ZTI1B = constant { ptr, ptr, i32, i32, ptr, i64 } { ptr getelementptr inbounds nuw (i8, ptr @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 16), ptr @_ZTS1B, i32 0, i32 1, ptr @_ZTI1A, i64 -6141 }
diff --git a/clang/test/CIR/Incubator/CodeGen/vector-ext-element.cpp b/clang/test/CIR/Incubator/CodeGen/vector-ext-element.cpp
new file mode 100644
index 0000000000000..1c78365b293a7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vector-ext-element.cpp
@@ -0,0 +1,342 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+typedef int vi2 __attribute__((ext_vector_type(2)));
+typedef int vi4 __attribute__((ext_vector_type(4)));
+
+void element_expr_from_gl() {
+  vi4 a;
+  int x = a.x;
+  int y = a.y;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["a"]
+// CIR: %[[X_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CIR: %[[Y_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init]
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s64i
+// CIR: %[[ELEM_0:.*]] = cir.vec.extract %[[TMP_A]][%[[CONST_0]] : !s64i] : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ELEM_0]], %[[X_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s64i
+// CIR: %[[ELEM_1:.*]] = cir.vec.extract %[[TMP_A]][%[[CONST_1]] : !s64i] : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ELEM_1]], %[[Y_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[X_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[Y_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[ELEM_0:.*]] = extractelement <4 x i32> %4, i64 0
+// LLVM: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[ELEM_1:.*]] = extractelement <4 x i32> %6, i64 1
+// LLVM: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[X_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[Y_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i64 0
+// OGCG: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i64 1
+// OGCG: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4
+
+void element_expr_from_gl_with_vec_result() {
+  vi4 a;
+  vi2 b = a.xy;
+  vi4 c = a.wzyx;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["b", init]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["c", init]
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+// CIR: %[[B_VALUE:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[POISON]] : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 2>
+// CIR: cir.store {{.*}} %[[B_VALUE]], %[[B_ADDR]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+// CIR: %[[C_VALUE:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[POISON]] : !cir.vector<!s32i x 4>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[C_VALUE]], %[[C_ADDR]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: %[[C_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[B_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// LLVM: store <2 x i32> %[[B_VALUE]], ptr %[[B_ADDR]], align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// LLVM: store <4 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8
+// OGCG: %[[C_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[B_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// OGCG: store <2 x i32> %[[B_VALUE]], ptr %[[B_ADDR]], align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// OGCG: store <4 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 16
+
+void element_expr_from_pointer() {
+  vi4 *a;
+  int X = a->x;
+  int Y = a->y;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.ptr<!cir.vector<!s32i x 4>>, !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, ["a"]
+// CIR: %[[X_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["X", init]
+// CIR: %[[Y_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["Y", init]
+// CIR: %[[TMP_A_PTR:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, !cir.ptr<!cir.vector<!s32i x 4>>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[TMP_A_PTR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s64i
+// CIR: %[[ELEM_0:.*]] = cir.vec.extract %[[TMP_A]][%[[CONST_0]] : !s64i] : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ELEM_0]], %[[X_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[TMP_A_PTR:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, !cir.ptr<!cir.vector<!s32i x 4>>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[TMP_A_PTR:.*]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s64i
+// CIR: %[[ELEM_1:.*]] = cir.vec.extract %[[TMP_A]][%[[CONST_1]] : !s64i] : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ELEM_1]], %[[Y_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[A_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM: %[[X_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[Y_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[TMP_A_PTR:.*]] = load ptr, ptr %[[A_ADDR]], align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
+// LLVM: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i64 0
+// LLVM: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
+// LLVM: %[[TMP_A_PTR:.*]] = load ptr, ptr %[[A_ADDR]], align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
+// LLVM: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i64 1
+// LLVM: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[X_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[Y_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[TMP_A_PTR:.*]] = load ptr, ptr %[[A_ADDR]], align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
+// OGCG: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i64 0
+// OGCG: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
+// OGCG: %[[TMP_A_PTR:.*]] = load ptr, ptr %[[A_ADDR]], align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
+// OGCG: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i64 1
+// OGCG: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4
+
+void element_expr_from_pointer_with_vec_result() {
+  vi4 *a;
+  vi2 b = a->xy;
+  vi4 c = a->wzyx;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.ptr<!cir.vector<!s32i x 4>>, !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["b", init]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["c", init]
+// CIR: %[[TMP_A_PTR:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, !cir.ptr<!cir.vector<!s32i x 4>>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[TMP_A_PTR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+// CIR: %[[B_VALUE:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[POISON]] : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 2>
+// CIR: cir.store {{.*}} %[[B_VALUE]], %[[B_ADDR]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+// CIR: %[[TMP_A_PTR:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, !cir.ptr<!cir.vector<!s32i x 4>>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[TMP_A_PTR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+// CIR: %[[C_VALUE:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[POISON]] : !cir.vector<!s32i x 4>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[C_VALUE]], %[[C_ADDR]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: %[[C_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A_PTR:.*]] = load ptr, ptr %[[A_ADDR]], align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
+// LLVM: %[[B_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// LLVM: store <2 x i32> %[[B_VALUE]], ptr %[[B_ADDR]], align 8
+// LLVM: %[[TMP_A_PTR:.*]] = load ptr, ptr %[[A_ADDR]], align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
+// LLVM: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// LLVM: store <4 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca ptr, align 8
+// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8
+// OGCG: %[[C_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A_PTR:.*]] = load ptr, ptr %[[A_ADDR]], align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
+// OGCG: %[[B_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// OGCG: store <2 x i32> %[[B_VALUE]], ptr %[[B_ADDR]], align 8
+// OGCG: %[[TMP_A_PTR:.*]] = load ptr, ptr %[[A_ADDR]], align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[TMP_A_PTR]], align 16
+// OGCG: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// OGCG: store <4 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 16
+
+void element_expr_from_rvalue() {
+  vi4 a;
+  vi4 b;
+  int x = (a + b).x;
+  int y = (a + b).y;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["b"]
+// CIR: %[[X_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CIR: %[[TMP_1_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["tmp"]
+// CIR: %[[Y_ADDR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init]
+// CIR: %[[TMP_2_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["tmp"]
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_1_ADDR:.*]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+// CIR: %[[TMP_1:.*]] = cir.load {{.*}} %[[TMP_1_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s64i
+// CIR: %[[ELEM_0:.*]] = cir.vec.extract %[[TMP_1]][%[[CONST_0]] : !s64i] : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ELEM_0]], %[[X_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_2_ADDR]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+// CIR: %[[TMP_2:.*]] = cir.load {{.*}} %[[TMP_2_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s64i
+// CIR: %[[ELEM_1:.*]] = cir.vec.extract %[[TMP_2]][%[[CONST_1]] : !s64i] : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ELEM_1]], %[[Y_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[X_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[Y_ADDR:.*]] = alloca i32, i64 1, align 4
+// LLVM: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
+// LLVM: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
+// LLVM: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_1]], i64 0
+// LLVM: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
+// LLVM: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
+// LLVM: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_2]], i64 1
+// LLVM: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[X_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[Y_ADDR:.*]] = alloca i32, align 4
+// OGCG: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
+// OGCG: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
+// OGCG: %[[ELEM_0:.*]] = extractelement <4 x i32> %[[TMP_1]], i64 0
+// OGCG: store i32 %[[ELEM_0]], ptr %[[X_ADDR]], align 4
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
+// OGCG: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
+// OGCG: %[[ELEM_1:.*]] = extractelement <4 x i32> %[[TMP_2]], i64 1
+// OGCG: store i32 %[[ELEM_1]], ptr %[[Y_ADDR]], align 4
+
+void element_expr_from_rvalue_with_vec_result() {
+  vi4 a;
+  vi4 b;
+  vi2 c = (a + b).xy;
+  vi4 d = (a + b).wzyx;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["b"]
+// CIR: %[[C_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["c", init]
+// CIR: %[[TMP_1_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["tmp"]
+// CIR: %[[D_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["d", init]
+// CIR: %[[TMP_2_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["tmp"]
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_1_ADDR]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+// CIR: %[[TMP_1:.*]] = cir.load {{.*}} %[[TMP_1_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+// CIR: %[[C_VALUE:.*]] = cir.vec.shuffle(%[[TMP_1]], %[[POISON]] : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 2>
+// CIR: cir.store {{.*}} %[[C_VALUE]], %[[C_ADDR]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[ADD_A_B:.*]] = cir.binop(add, %[[TMP_A]], %[[TMP_B]]) : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[ADD_A_B]], %[[TMP_2_ADDR]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+// CIR: %[[TMP_2:.*]] = cir.load {{.*}} %[[TMP_2_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+// CIR: %[[D_VALUE:.*]] = cir.vec.shuffle(%[[TMP_2]], %[[POISON]] : !cir.vector<!s32i x 4>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<!s32i x 4>
+// CIR: cir.store {{.*}} %[[D_VALUE]], %[[D_ADDR]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[C_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[D_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
+// LLVM: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
+// LLVM: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// LLVM: store <2 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// LLVM: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// LLVM: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
+// LLVM: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
+// LLVM: %[[D_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// LLVM: store <4 x i32> %[[D_VALUE]], ptr %[[D_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[C_ADDR:.*]] = alloca <2 x i32>, align 8
+// OGCG: %[[TMP_1_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[D_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_2_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_1_ADDR]], align 16
+// OGCG: %[[TMP_1:.*]] = load <4 x i32>, ptr %[[TMP_1_ADDR]], align 16
+// OGCG: %[[C_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// OGCG: store <2 x i32> %[[C_VALUE]], ptr %[[C_ADDR]], align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr %[[B_ADDR]], align 16
+// OGCG: %[[ADD_A_B:.*]] = add <4 x i32> %[[TMP_A]], %[[TMP_B]]
+// OGCG: store <4 x i32> %[[ADD_A_B]], ptr %[[TMP_2_ADDR]], align 16
+// OGCG: %[[TMP_2:.*]] = load <4 x i32>, ptr %[[TMP_2_ADDR]], align 16
+// OGCG: %[[D_VALUE:.*]] = shufflevector <4 x i32> %[[TMP_2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// OGCG: store <4 x i32> %[[D_VALUE]], ptr %[[D_ADDR]], align 16
+
+void array_subscript_expr_with_element_expr_base() {
+  vi4 a;
+  a.xyz[1] = 2;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["a"]
+// CIR: %[[CONST_2:.*]] = cir.const #cir.int<2> : !s32i
+// CIR: %[[CONST_1:.*]] = cir.const #cir.int<1> : !s32i
+// CIR: %[[A_PTR:.*]] = cir.cast bitcast %0 : !cir.ptr<!cir.vector<!s32i x 4>> -> !cir.ptr<!s32i>
+// CIR: %[[CONST_0:.*]] = cir.const #cir.int<0> : !s64i
+// CIR: %[[VEC_MEMBER_EXPR:.*]] = cir.ptr_stride %[[A_PTR]], %[[CONST_0]] : (!cir.ptr<!s32i>, !s64i) -> !cir.ptr<!s32i>
+// CIR: %[[VEC_ELEM_PTR:.*]] = cir.ptr_stride %[[VEC_MEMBER_EXPR]], %[[CONST_1]] : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CIR: cir.store {{.*}} %[[CONST_2]], %[[VEC_ELEM_PTR]] : !s32i, !cir.ptr<!s32i>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[VEC_MEMBER_EXPR:.*]] = getelementptr i32, ptr %[[A_ADDR]], i64 0
+// LLVM: %[[VEC_ELEM_PTR:.*]] = getelementptr i32, ptr %[[VEC_MEMBER_EXPR]], i64 1
+// LLVM: store i32 2, ptr %[[VEC_ELEM_PTR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[VEC_MEMBER_EXPR:.*]] = getelementptr inbounds i32, ptr %[[A_ADDR]], i64 0
+// OGCG: %[[VEC_ELEM_PTR:.*]] = getelementptr inbounds i32, ptr %[[VEC_MEMBER_EXPR]], i64 1
+// OGCG: store i32 2, ptr %[[VEC_ELEM_PTR]], align 4
+
diff --git a/clang/test/CIR/Incubator/CodeGen/vector.cpp b/clang/test/CIR/Incubator/CodeGen/vector.cpp
new file mode 100644
index 0000000000000..56387891c9e4d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vector.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+namespace std {
+  template<typename T>
+  void vector<T>::resize(size_type __sz) {
+    size_type __cs = size();
+    if (__cs) {}
+  }
+} // namespace std
+
+// CHECK: cir.func {{.*}} @_ZNSt6vectorIyE6resizeEm(
+// CHECK:   %0 = cir.alloca !cir.ptr<!rec_std3A3Avector3Cunsigned_long_long3E>, !cir.ptr<!cir.ptr<!rec_std3A3Avector3Cunsigned_long_long3E>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   %1 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__sz", init] {alignment = 8 : i64}
+// CHECK:   %2 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__cs", init] {alignment = 8 : i64}
+// CHECK:   cir.store{{.*}} %arg0, %0 : !cir.ptr<!rec_std3A3Avector3Cunsigned_long_long3E>, !cir.ptr<!cir.ptr<!rec_std3A3Avector3Cunsigned_long_long3E>>
+// CHECK:   cir.store{{.*}} %arg1, %1 : !u64i, !cir.ptr<!u64i>
+// CHECK:   %3 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_std3A3Avector3Cunsigned_long_long3E>>, !cir.ptr<!rec_std3A3Avector3Cunsigned_long_long3E>
+// CHECK:   %4 = cir.call @_ZNKSt6vectorIyE4sizeEv(%3) : (!cir.ptr<!rec_std3A3Avector3Cunsigned_long_long3E>) -> !u64i
+// CHECK:   cir.store{{.*}} %4, %2 : !u64i, !cir.ptr<!u64i>
+// CHECK:   cir.scope {
+// CHECK:     %5 = cir.load{{.*}} %2 : !cir.ptr<!u64i>, !u64i
+// CHECK:     %6 = cir.cast int_to_bool %5 : !u64i -> !cir.bool
+// CHECK:     cir.if %6 {
+// CHECK:     }
+// CHECK:   }
+// CHECK:   cir.return
+
+void m() {
+  std::vector<unsigned long long> a;
+  int i = 43;
+  a.resize(i);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/vectype-ext.cpp b/clang/test/CIR/Incubator/CodeGen/vectype-ext.cpp
new file mode 100644
index 0000000000000..ff9a3656e671d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vectype-ext.cpp
@@ -0,0 +1,608 @@
+// RUN: %clang_cc1 -std=c++17 -fclangir -emit-cir -triple x86_64-unknown-linux-gnu %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -std=c++17 -fclangir -emit-llvm -triple x86_64-unknown-linux-gnu %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+typedef int vi4 __attribute__((ext_vector_type(4)));
+typedef int vi6 __attribute__((ext_vector_type(6)));
+typedef unsigned int uvi4 __attribute__((ext_vector_type(4)));
+typedef int vi3 __attribute__((ext_vector_type(3)));
+typedef int vi2 __attribute__((ext_vector_type(2)));
+typedef double vd2 __attribute__((ext_vector_type(2)));
+typedef long vl2 __attribute__((ext_vector_type(2)));
+typedef unsigned short vus2 __attribute__((ext_vector_type(2)));
+
+vi4 vec_a;
+// CIR: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<!s32i x 4>
+
+// LLVM: @[[VEC_A:.*]] = global <4 x i32> zeroinitializer
+
+vi3 vec_b;
+// CIR: cir.global external @[[VEC_B:.*]] = #cir.zero : !cir.vector<!s32i x 3>
+
+// LLVM: @[[VEC_B:.*]] = global <3 x i32> zeroinitializer
+
+vi2 vec_c;
+// CIR: cir.global external @[[VEC_C:.*]] = #cir.zero : !cir.vector<!s32i x 2>
+
+// LLVM: @[[VEC_C:.*]] = global <2 x i32> zeroinitializer
+
+vd2 vec_d;
+
+// CIR: cir.global external @[[VEC_D:.*]] = #cir.zero : !cir.vector<!cir.double x 2>
+
+// LLVM: @[[VEC_D:.*]] = global <2 x double> zeroinitializer
+
+vi4 vec_e = { 1, 2, 3, 4 };
+
+// CIR: cir.global external @[[VEC_E:.*]] = #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> :
+// CIR-SAME: !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+
+// LLVM: @[[VEC_E:.*]] = global <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+
+// CIR: cir.func {{.*}} {{@.*vector_int_test.*}}
+// LLVM: define dso_local void {{@.*vector_int_test.*}}
+void vector_int_test(int x) {
+
+  // Vector constant.
+  vi4 a = { 1, 2, 3, 4 };
+  // CIR: %{{[0-9]+}} = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+  // LLVM: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %{{[0-9]+}}, align 16
+
+  // Non-const vector initialization.
+  vi4 b = { x, 5, 6, x + 1 };
+  // CIR: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}} : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+  // LLVM:      %[[#X1:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#X2:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#SUM:]] = add nsw i32 %[[#X2]], 1
+  // LLVM-NEXT: %[[#VEC1:]] = insertelement <4 x i32> poison, i32 %[[#X1]], i64 0
+  // LLVM-NEXT: %[[#VEC2:]] = insertelement <4 x i32> %[[#VEC1]], i32 5, i64 1
+  // LLVM-NEXT: %[[#VEC3:]] = insertelement <4 x i32> %[[#VEC2]], i32 6, i64 2
+  // LLVM-NEXT: %[[#VEC4:]] = insertelement <4 x i32> %[[#VEC3]], i32 %[[#SUM]], i64 3
+  // LLVM-NEXT: store <4 x i32> %[[#VEC4]], ptr %{{[0-9]+}}, align 16
+
+  // Incomplete vector initialization.
+  vi4 bb = { x, x + 1 };
+  // CIR: %[[#zero:]] = cir.const #cir.int<0> : !s32i
+  // CIR: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}}, %[[#zero]], %[[#zero]] : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+  // LLVM:      %[[#X1:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#X2:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#SUM:]] = add nsw i32 %[[#X2]], 1
+  // LLVM-NEXT: %[[#VEC1:]] = insertelement <4 x i32> poison, i32 %[[#X1]], i64 0
+  // LLVM-NEXT: %[[#VEC2:]] = insertelement <4 x i32> %[[#VEC1]], i32 %[[#SUM]], i64 1
+  // LLVM-NEXT: %[[#VEC3:]] = insertelement <4 x i32> %[[#VEC2]], i32 0, i64 2
+  // LLVM-NEXT: %[[#VEC4:]] = insertelement <4 x i32> %[[#VEC3]], i32 0, i64 3
+  // LLVM-NEXT: store <4 x i32> %[[#VEC4]], ptr %{{[0-9]+}}, align 16
+
+
+  // Scalar to vector conversion, a.k.a. vector splat.  Only valid as an
+  // operand of a binary operator, not as a regular conversion.
+  bb = a + 7;
+  // CIR: %[[#seven:]] = cir.const #cir.int<7> : !s32i
+  // CIR: %{{[0-9]+}} = cir.vec.splat %[[#seven]] : !s32i, !cir.vector<!s32i x 4>
+  // LLVM:      %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#BB:]] = add <4 x i32> %[[#A]], splat (i32 7)
+  // LLVM-NEXT: store <4 x i32> %[[#BB]], ptr %{{[0-9]+}}, align 16
+
+  // Vector to vector conversion
+  vd2 bbb = { };
+  bb = (vi4)bbb;
+  // CIR: %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.vector<!cir.double x 2> -> !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = bitcast <2 x double> %{{[0-9]+}} to <4 x i32>
+
+  // Extract element
+  int c = a[x];
+  // CIR: %{{[0-9]+}} = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // LLVM:      %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#X:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#EXT:]] = extractelement <4 x i32> %[[#A]], i32 %[[#X]]
+  // LLVM-NEXT: store i32 %[[#EXT]], ptr %{{[0-9]+}}, align 4
+
+  // Insert element
+  a[x] = x;
+  // CIR: %[[#LOADEDVI:]] = cir.load{{.*}} %[[#STORAGEVI:]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR: %[[#UPDATEDVI:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOADEDVI]][%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CIR: cir.store{{.*}} %[[#UPDATEDVI]], %[[#STORAGEVI]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+  // LLVM:      %[[#X1:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#X2:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#INS:]] = insertelement <4 x i32> %[[#A]], i32 %[[#X1]], i32 %[[#X2]]
+  // LLVM-NEXT: store <4 x i32> %[[#INS]], ptr %{{[0-9]+}}, align 16
+
+  // Compound assignment
+  a[x] += a[0];
+  // CIR: %[[#RHSCA:]] = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CIR: %[[#LHSCA:]] = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CIR: %[[#SUMCA:]] = cir.binop(add, %[[#LHSCA]], %[[#RHSCA]]) nsw : !s32i
+  // CIR: cir.vec.insert %[[#SUMCA]], %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // LLVM:      %[[#A1:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RHSCA:]] = extractelement <4 x i32> %[[#A1]], i32 0
+  // LLVM-NEXT: %[[#X:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#A2:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#LHSCA:]] = extractelement <4 x i32> %[[#A2]], i32 %[[#X]]
+  // LLVM-NEXT: %[[#SUMCA:]] = add nsw i32 %[[#LHSCA]], %[[#RHSCA]]
+  // LLVM-NEXT: %[[#A3:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RES:]] = insertelement <4 x i32> %[[#A3]], i32 %[[#SUMCA]], i32 %[[#X]]
+  // LLVM-NEXT: store <4 x i32> %[[#RES]], ptr %{{[0-9]+}}, align 16
+
+  // Binary arithmetic operations
+  vi4 d = a + b;
+  // CIR: %{{[0-9]+}} = cir.binop(add, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = add <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 e = a - b;
+  // CIR: %{{[0-9]+}} = cir.binop(sub, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = sub <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 f = a * b;
+  // CIR: %{{[0-9]+}} = cir.binop(mul, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = mul <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 g = a / b;
+  // CIR: %{{[0-9]+}} = cir.binop(div, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = sdiv <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 h = a % b;
+  // CIR: %{{[0-9]+}} = cir.binop(rem, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = srem <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 i = a & b;
+  // CIR: %{{[0-9]+}} = cir.binop(and, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = and <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 j = a | b;
+  // CIR: %{{[0-9]+}} = cir.binop(or, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = or <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  vi4 k = a ^ b;
+  // CIR: %{{[0-9]+}} = cir.binop(xor, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  // LLVM: %{{[0-9]+}} = xor <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+
+  // Unary arithmetic operations
+  vi4 l = +a;
+  // CIR: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM:      %[[#VAL:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: store <4 x i32> %[[#VAL]], ptr %{{[0-9]+}}, align 16
+  vi4 m = -a;
+  // CIR: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM:      %[[#VAL:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RES:]] = sub <4 x i32> zeroinitializer, %[[#VAL]]
+  // LLVM-NEXT: store <4 x i32> %[[#RES]], ptr %{{[0-9]+}}, align 16
+  vi4 n = ~a;
+  // CIR: %{{[0-9]+}} = cir.unary(not, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM:      %[[#VAL:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RES:]] = xor <4 x i32> %[[#VAL]], splat (i32 -1)
+  // LLVM-NEXT: store <4 x i32> %[[#RES]], ptr %{{[0-9]+}}, align 16
+
+  // TODO: Ternary conditional operator
+
+  // Comparisons
+  vi4 o = a == b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(eq, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp eq <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 p = a != b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(ne, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp ne <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 q = a < b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(lt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp slt <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 r = a > b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(gt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp sgt <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 s = a <= b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(le, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp sle <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+  vi4 t = a >= b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(ge, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  // LLVM: %[[#RES:]] = icmp sge <4 x i32> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: %[[#EXT:]] = sext <4 x i1> %[[#RES]] to <4 x i32>
+
+  // __builtin_shufflevector
+  vi4 u = __builtin_shufflevector(a, b, 7, 5, 3, 1);
+  // CIR: %{{[0-9]+}} = cir.vec.shuffle(%{{[0-9]+}}, %{{[0-9]+}} : !cir.vector<!s32i x 4>) [#cir.int<7> : !s64i, #cir.int<5> : !s64i, #cir.int<3> : !s64i, #cir.int<1> : !s64i] : !cir.vector<!s32i x 4>
+
+  // LLVM:      %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#B:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#SHFL:]] = shufflevector <4 x i32> %[[#A]], <4 x i32> %[[#B]], <4 x i32> <i32 7, i32 5, i32 3, i32 1>
+  // LLVM-NEXT: store <4 x i32> %[[#SHFL]], ptr %{{[0-9]+}}, align 16
+
+  vi4 v = __builtin_shufflevector(a, b);
+  // CIR: %{{[0-9]+}} = cir.vec.shuffle.dynamic %{{[0-9]+}} : !cir.vector<!s32i x 4>, %{{[0-9]+}} : !cir.vector<!s32i x 4>
+
+  // LLVM:      %[[#A:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#B:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#IDXMOD:]] = and <4 x i32> %[[#B]], splat (i32 3)
+  // LLVM-NEXT: %[[#IDX0:]] = extractelement <4 x i32> %[[#IDXMOD]], i64 0
+  // LLVM-NEXT: %[[#EXT1:]] = extractelement <4 x i32> %[[#A]], i32 %[[#IDX0]]
+  // LLVM-NEXT: %[[#INS1:]] = insertelement <4 x i32> undef, i32 %[[#EXT1]], i64 0
+  // LLVM-NEXT: %[[#IDX1:]] = extractelement <4 x i32> %[[#IDXMOD]], i64 1
+  // LLVM-NEXT: %[[#EXT2:]] = extractelement <4 x i32> %[[#A]], i32 %[[#IDX1]]
+  // LLVM-NEXT: %[[#INS2:]] = insertelement <4 x i32> %[[#INS1]], i32 %[[#EXT2]], i64 1
+  // LLVM-NEXT: %[[#IDX2:]] = extractelement <4 x i32> %[[#IDXMOD]], i64 2
+  // LLVM-NEXT: %[[#EXT3:]] = extractelement <4 x i32> %[[#A]], i32 %[[#IDX2]]
+  // LLVM-NEXT: %[[#INS3:]] = insertelement <4 x i32> %[[#INS2]], i32 %[[#EXT3]], i64 2
+  // LLVM-NEXT: %[[#IDX3:]] = extractelement <4 x i32> %[[#IDXMOD]], i64 3
+  // LLVM-NEXT: %[[#EXT4:]] = extractelement <4 x i32> %[[#A]], i32 %[[#IDX3]]
+  // LLVM-NEXT: %[[#INS4:]] = insertelement <4 x i32> %[[#INS3]], i32 %[[#EXT4]], i64 3
+  // LLVM-NEXT: store <4 x i32> %[[#INS4]], ptr %{{[0-9]+}}, align 16
+}
+
+// CIR: cir.func {{.*}} {{@.*vector_double_test.*}}
+// LLVM: define dso_local void {{@.*vector_double_test.*}}
+void vector_double_test(int x, double y) {
+  // Vector constant.
+  vd2 a = { 1.5, 2.5 };
+  // CIR: %{{[0-9]+}} = cir.const #cir.const_vector<[#cir.fp<1.500000e+00> : !cir.double, #cir.fp<2.500000e+00> : !cir.double]> : !cir.vector<!cir.double x 2>
+
+  // LLVM: store <2 x double> <double 1.500000e+00, double 2.500000e+00>, ptr %{{[0-9]+}}, align 16
+
+  // Non-const vector initialization.
+  vd2 b = { y, y + 1.0 };
+  // CIR: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}} : !cir.double, !cir.double) : !cir.vector<!cir.double x 2>
+
+  // LLVM:      %[[#Y1:]] = load double, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#Y2:]] = load double, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#SUM:]] = fadd double %[[#Y2]], 1.000000e+00
+  // LLVM-NEXT: %[[#VEC1:]] = insertelement <2 x double> poison, double %[[#Y1]], i64 0
+  // LLVM-NEXT: %[[#VEC2:]] = insertelement <2 x double> %[[#VEC1]], double %[[#SUM]], i64 1
+  // LLVM-NEXT: store <2 x double> %[[#VEC2]], ptr %{{[0-9]+}}, align 16
+
+  // Incomplete vector initialization
+  vd2 bb = { y };
+  // CIR: [[#dzero:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+  // CIR: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %[[#dzero]] : !cir.double, !cir.double) : !cir.vector<!cir.double x 2>
+
+  // LLVM:      %[[#Y1:]] = load double, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#VEC1:]] = insertelement <2 x double> poison, double %[[#Y1]], i64 0
+  // LLVM-NEXT: %[[#VEC2:]] = insertelement <2 x double> %[[#VEC1]], double 0.000000e+00, i64 1
+  // LLVM-NEXT: store <2 x double> %[[#VEC2]], ptr %{{[0-9]+}}, align 16
+
+  // Scalar to vector conversion, a.k.a. vector splat.  Only valid as an
+  // operand of a binary operator, not as a regular conversion.
+  bb = a + 2.5;
+  // CIR: %[[#twohalf:]] = cir.const #cir.fp<2.500000e+00> : !cir.double
+  // CIR: %{{[0-9]+}} = cir.vec.splat %[[#twohalf]] : !cir.double, !cir.vector<!cir.double x 2>
+
+  // LLVM:      %[[#A:]] = load <2 x double>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#BB:]] = fadd <2 x double> %[[#A]], splat (double 2.500000e+00)
+  // LLVM-NEXT: store <2 x double> %[[#BB]], ptr %{{[0-9]+}}, align 16
+
+  // Extract element
+  double c = a[x];
+  // CIR: %{{[0-9]+}} = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = extractelement <2 x double> %{{[0-9]+}}, i32 %{{[0-9]+}}
+
+  // Insert element
+  a[x] = y;
+  // CIR: %[[#LOADEDVF:]] = cir.load{{.*}} %[[#STORAGEVF:]] : !cir.ptr<!cir.vector<!cir.double x 2>>, !cir.vector<!cir.double x 2>
+  // CIR: %[[#UPDATEDVF:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOADEDVF]][%{{[0-9]+}} : !s32i] : !cir.vector<!cir.double x 2>
+  // CIR: cir.store{{.*}} %[[#UPDATEDVF]], %[[#STORAGEVF]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
+
+  // LLVM:      %[[#Y:]] = load double, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#X:]] = load i32, ptr %{{[0-9]+}}, align 4
+  // LLVM-NEXT: %[[#A:]] = load <2 x double>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#INS:]] = insertelement <2 x double> %[[#A]], double %[[#Y]], i32 %[[#X]]
+  // LLVM-NEXT: store <2 x double> %[[#INS]], ptr %{{[0-9]+}}, align 16
+
+  // Binary arithmetic operations
+  vd2 d = a + b;
+  // CIR: %{{[0-9]+}} = cir.binop(add, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = fadd <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  vd2 e = a - b;
+  // CIR: %{{[0-9]+}} = cir.binop(sub, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = fsub <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  vd2 f = a * b;
+  // CIR: %{{[0-9]+}} = cir.binop(mul, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = fmul <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  vd2 g = a / b;
+  // CIR: %{{[0-9]+}} = cir.binop(div, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  // LLVM: %{{[0-9]+}} = fdiv <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+
+  // Unary arithmetic operations
+  vd2 l = +a;
+  // CIR: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>
+  // LLVM:      %[[#VAL:]] = load <2 x double>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: store <2 x double> %[[#VAL]], ptr %{{[0-9]+}}, align 16
+  vd2 m = -a;
+  // CIR: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>
+  // LLVM:      %[[#VAL:]] = load <2 x double>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#RES:]] = fneg <2 x double> %[[#VAL]]
+  // LLVM-NEXT: store <2 x double> %[[#RES]], ptr %{{[0-9]+}}, align 16
+
+  // Comparisons
+  vl2 o = a == b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(eq, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp oeq <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 p = a != b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(ne, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp une <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 q = a < b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(lt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp olt <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 r = a > b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(gt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp ogt <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 s = a <= b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(le, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp ole <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+  vl2 t = a >= b;
+  // CIR: %{{[0-9]+}} = cir.vec.cmp(ge, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  // LLVM: %[[#RES:]] = fcmp oge <2 x double> %{{[0-9]+}}, %{{[0-9]+}}
+  // LLVM-NEXT: sext <2 x i1> %[[#RES:]] to <2 x i64>
+
+  // __builtin_convertvector
+  vus2 w = __builtin_convertvector(a, vus2);
+  // CIR: %{{[0-9]+}} = cir.cast float_to_int %{{[0-9]+}} : !cir.vector<!cir.double x 2> -> !cir.vector<!u16i x 2>
+  // LLVM: %{{[0-9]+}} = fptoui <2 x double> %{{[0-9]+}} to <2 x i16>
+}
+
+// CIR: cir.func {{.*}} {{@.*test_load.*}}
+// LLVM: define dso_local void {{@.*test_load.*}}
+void test_load() {
+  vi4 a = { 1, 2, 3, 4 };
+
+  vi2 b;
+
+  b = a.wz;
+  // CIR:      %[[#LOAD1:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR:      %[[#POISON1:]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#SHUFFLE1:]] = cir.vec.shuffle(%[[#LOAD1]], %[[#POISON1]] : !cir.vector<!s32i x 4>) [#cir.int<3> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 2>
+  // CIR-NEXT: cir.store{{.*}} %[[#SHUFFLE1]], %{{[0-9]+}} : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+
+  // LLVM:      %[[#LOAD1:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#SHUFFLE1:]] = shufflevector <4 x i32> %[[#LOAD1]], <4 x i32> poison, <2 x i32> <i32 3, i32 2>
+  // LLVM-NEXT: store <2 x i32> %[[#SHUFFLE1]], ptr %{{[0-9]+}}, align 8
+
+  int one_elem_load = a.s2;
+  // CIR-NEXT: %[[#LOAD8:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#EXTRACT_INDEX:]] = cir.const #cir.int<2> : !s64i
+  // CIR-NEXT: %[[#EXTRACT1:]] = cir.vec.extract %[[#LOAD8]][%[[#EXTRACT_INDEX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store{{.*}} %[[#EXTRACT1]], %{{[0-9]+}} : !s32i, !cir.ptr<!s32i>
+
+  // LLVM-NEXT: %[[#LOAD8:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#EXTRACT1:]] = extractelement <4 x i32> %[[#LOAD8]], i64 2
+  // LLVM-NEXT: store i32 %[[#EXTRACT1]], ptr %{{[0-9]+}}, align 4
+
+}
+
+// CIR: cir.func {{.*}} {{@.*test_store.*}}
+// LLVM: define dso_local void {{@.*test_store.*}}
+void test_store() {
+  vi4 a;
+  // CIR: %[[#PVECA:]] = cir.alloca !cir.vector<!s32i x 4>
+  // LLVM: %[[#PVECA:]] = alloca <4 x i32>
+
+  vi2 b = {1, 2};
+  // CIR-NEXT: %[[#PVECB:]] = cir.alloca !cir.vector<!s32i x 2>
+  // LLVM-NEXT: %[[#PVECB:]] = alloca <2 x i32>
+
+  vi3 c = {};
+  // CIR-NEXT: %[[#PVECC:]] = cir.alloca !cir.vector<!s32i x 3>
+  // LLVM-NEXT: %[[#PVECC:]] = alloca <3 x i32>
+
+  a.xy = b;
+  // CIR:      %[[#LOAD4RHS:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+  // CIR-NEXT: %[[#LOAD5LHS:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#POISON2:]] = cir.const #cir.poison : !cir.vector<!s32i x 2>
+  // CIR-NEXT: %[[#SHUFFLE5:]] = cir.vec.shuffle(%[[#LOAD4RHS]], %[[#POISON2]] : !cir.vector<!s32i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#SHUFFLE6:]] = cir.vec.shuffle(%[[#LOAD5LHS]], %[[#SHUFFLE5]] : !cir.vector<!s32i x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store{{.*}} %[[#SHUFFLE6]], %{{[0-9]+}} : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM:      %[[#LOAD4RHS:]] = load <2 x i32>, ptr %{{[0-9]+}}, align 8
+  // LLVM-NEXT: %[[#LOAD5LHS:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#SHUFFLE5:]] = shufflevector <2 x i32> %[[#LOAD4RHS]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+  // LLVM-NEXT: %[[#SHUFFLE6:]] = shufflevector <4 x i32> %[[#LOAD5LHS]], <4 x i32> %[[#SHUFFLE5]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  // LLVM-NEXT: store <4 x i32> %[[#SHUFFLE6]], ptr %{{[0-9]+}}, align 16
+
+  // load single element
+  a.s0 = 1;
+  // CIR-NEXT: cir.const #cir.int<1>
+  // CIR-NEXT: %[[#LOAD7:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#INSERT_INDEX:]] = cir.const #cir.int<0> : !s64i
+  // CIR-NEXT: %[[#INSERT1:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOAD7]][%[[#INSERT_INDEX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store{{.*}} %[[#INSERT1]], %{{[0-9]+}} : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM-NEXT: %[[#LOAD7:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#INSERT1:]] = insertelement <4 x i32> %[[#LOAD7]], i32 1, i64 0
+  // LLVM-NEXT: store <4 x i32> %[[#INSERT1]], ptr %{{[0-9]+}}, align 16
+
+  // extend length from 2 to 4, then merge two vectors
+  a.lo = b;
+  // CIR:      %[[#VECB:]] = cir.load{{.*}} %[[#PVECB]] : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+  // CIR-NEXT: %[[#VECA:]] = cir.load{{.*}} %[[#PVECA]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#POISON3:]] = cir.const #cir.poison : !cir.vector<!s32i x 2>
+  // CIR-NEXT: %[[#EXTVECB:]] = cir.vec.shuffle(%[[#VECB]], %[[#POISON3]] : !cir.vector<!s32i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.shuffle(%[[#VECA]], %[[#EXTVECB]] : !cir.vector<!s32i x 4>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store{{.*}} %[[#RESULT]], %[[#PVECA]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM:      %[[#VECB:]] = load <2 x i32>, ptr %[[#PVECB]], align 8
+  // LLVM-NEXT: %[[#VECA:]] = load <4 x i32>, ptr %[[#PVECA]], align 16
+  // LLVM-NEXT: %[[#EXTVECB:]] = shufflevector <2 x i32> %[[#VECB]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+  // LLVM-NEXT: %[[#RESULT:]] = shufflevector <4 x i32> %[[#VECA]], <4 x i32> %[[#EXTVECB]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  // LLVM-NEXT: store <4 x i32> %[[#RESULT]], ptr %[[#PVECA]], align 16
+
+  // OpenCL C Specification 6.3.7. Vector Components
+  // The suffixes .lo (or .even) and .hi (or .odd) for a 3-component vector type
+  // operate as if the 3-component vector type is a 4-component vector type with
+  // the value in the w component undefined.
+  b = c.hi;
+
+  // CIR-NEXT: %[[#VECC:]] = cir.load{{.*}} %[[#PVECC]] : !cir.ptr<!cir.vector<!s32i x 3>>, !cir.vector<!s32i x 3>
+  // CIR-NEXT: %[[#POISON4:]] = cir.const #cir.poison : !cir.vector<!s32i x 3>
+  // CIR-NEXT: %[[#HIPART:]] = cir.vec.shuffle(%[[#VECC]], %[[#POISON4]] : !cir.vector<!s32i x 3>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 2>
+  // CIR-NEXT: cir.store{{.*}} %[[#HIPART]], %[[#PVECB]] : !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+
+  // LLVM-NEXT: %[[#VECC:]] = load <3 x i32>, ptr %[[#PVECC]], align 16
+  // LLVM-NEXT: %[[#HIPART:]] = shufflevector <3 x i32> %[[#VECC]], <3 x i32> poison, <2 x i32> <i32 2, i32 3>
+  // LLVM-NEXT: store <2 x i32> %[[#HIPART]], ptr %[[#PVECB]], align 8
+
+  // c.hi is c[2, 3], in which 3 should be ignored in CIRGen for store
+  c.hi = b;
+
+  // CIR-NEXT: %[[#VECB:]] = cir.load{{.*}} %[[#PVECB]] : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+  // CIR-NEXT: %[[#VECC:]] = cir.load{{.*}} %[[#PVECC]] : !cir.ptr<!cir.vector<!s32i x 3>>, !cir.vector<!s32i x 3>
+  // CIR-NEXT: %[[#POISON5:]] = cir.const #cir.poison : !cir.vector<!s32i x 2>
+  // CIR-NEXT: %[[#EXTVECB:]] = cir.vec.shuffle(%[[#VECB]], %[[#POISON5]] : !cir.vector<!s32i x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<!s32i x 3>
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.shuffle(%[[#VECC]], %[[#EXTVECB]] : !cir.vector<!s32i x 3>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<3> : !s32i] : !cir.vector<!s32i x 3>
+  // CIR-NEXT: cir.store{{.*}} %[[#RESULT]], %[[#PVECC]] : !cir.vector<!s32i x 3>, !cir.ptr<!cir.vector<!s32i x 3>>
+
+  // LLVM-NEXT: %[[#VECB:]] = load <2 x i32>, ptr %[[#PVECB]], align 8
+  // LLVM-NEXT: %[[#VECC:]] = load <3 x i32>, ptr %[[#PVECC]], align 16
+  // LLVM-NEXT: %[[#EXTVECB:]] = shufflevector <2 x i32> %[[#VECB]], <2 x i32> poison, <3 x i32> <i32 0, i32 1, i32 poison>
+  // LLVM-NEXT: %[[#RESULT:]] = shufflevector <3 x i32> %[[#VECC]], <3 x i32> %[[#EXTVECB]], <3 x i32> <i32 0, i32 1, i32 3>
+  // LLVM-NEXT: store <3 x i32> %[[#RESULT]], ptr %[[#PVECC]], align 16
+
+}
+
+// CIR: cir.func {{.*}} {{@.*test_build_lvalue.*}}
+// LLVM: define dso_local void {{@.*test_build_lvalue.*}}
+void test_build_lvalue() {
+  // special cases only
+
+  vi4 *pv, v;
+
+  // CIR-NEXT: %[[#ALLOCAPV:]] = cir.alloca !cir.ptr<!cir.vector<!s32i x 4>>, !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, ["pv"] {alignment = 8 : i64}
+  // CIR-NEXT: %[[#ALLOCAV:]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["v"] {alignment = 16 : i64}
+  // CIR-NEXT: %[[#ALLOCAS:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["s", init] {alignment = 4 : i64}
+  // CIR-NEXT: %[[#ALLOCATMP:]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["tmp"] {alignment = 16 : i64}
+  // CIR-NEXT: %[[#ALLOCAR:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["r", init] {alignment = 4 : i64}
+
+  // LLVM-NEXT: %[[#ALLOCAPV:]] = alloca ptr, i64 1, align 8
+  // LLVM-NEXT: %[[#ALLOCAV:]] = alloca <4 x i32>, i64 1, align 16
+  // LLVM-NEXT: %[[#ALLOCAS:]] = alloca i32, i64 1, align 4
+  // LLVM-NEXT: %[[#ALLOCATMP:]] = alloca <4 x i32>, i64 1, align 16
+  // LLVM-NEXT: %[[#ALLOCAR:]] = alloca i32, i64 1, align 4
+
+  pv->x = 99;
+  // CIR-NEXT: %[[#VAL:]] = cir.const #cir.int<99> : !s32i
+  // CIR-NEXT: %[[#PV:]] = cir.load{{.*}} %[[#ALLOCAPV]] : !cir.ptr<!cir.ptr<!cir.vector<!s32i x 4>>>, !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: %[[#V:]] = cir.load{{.*}} %[[#PV]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#IDX:]] = cir.const #cir.int<0> : !s64i
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.insert %[[#VAL]], %[[#V]][%[[#IDX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store{{.*}} %[[#RESULT]], %[[#PV]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // LLVM-NEXT: %[[#PV:]] = load ptr, ptr %[[#ALLOCAPV]], align 8
+  // LLVM-NEXT: %[[#V:]] = load <4 x i32>, ptr %[[#PV]], align 16
+  // LLVM-NEXT: %[[#RESULT:]] = insertelement <4 x i32> %[[#V]], i32 99, i64 0
+  // LLVM-NEXT: store <4 x i32> %[[#RESULT]], ptr %[[#PV]], align 16
+
+  int s = (v+v).x;
+
+  // CIR-NEXT: %[[#LOAD1:]] = cir.load{{.*}} %[[#ALLOCAV]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#LOAD2:]] = cir.load{{.*}} %[[#ALLOCAV]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#SUM:]] = cir.binop(add, %[[#LOAD1]], %[[#LOAD2]]) : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store{{.*}} %[[#SUM]], %[[#ALLOCATMP]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: %[[#TMP:]] = cir.load{{.*}} %[[#ALLOCATMP]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#IDX:]] = cir.const #cir.int<0> : !s64i
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.extract %[[#TMP]][%[[#IDX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store{{.*}} %[[#RESULT]], %[[#ALLOCAS]] : !s32i, !cir.ptr<!s32i>
+
+  // LLVM-NEXT: %[[#LOAD1:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#LOAD2:]] = load <4 x i32>, ptr %{{[0-9]+}}, align 16
+  // LLVM-NEXT: %[[#SUM:]] = add <4 x i32> %[[#LOAD1]], %[[#LOAD2]]
+  // LLVM-NEXT: store <4 x i32> %[[#SUM]], ptr %[[#ALLOCATMP]], align 16
+  // LLVM-NEXT: %[[#TMP:]] = load <4 x i32>, ptr %[[#ALLOCATMP]], align 16
+  // LLVM-NEXT: %[[#RESULT:]] = extractelement <4 x i32> %[[#TMP]], i64 0
+  // LLVM-NEXT: store i32 %[[#RESULT]], ptr %[[#ALLOCAS]], align 4
+
+  int r = v.xy.x;
+  // CIR-NEXT: %[[#V:]] = cir.load{{.*}} %[[#ALLOCAV]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#IDX:]] = cir.const #cir.int<0> : !s64i
+  // CIR-NEXT: %[[#RESULT:]] = cir.vec.extract %[[#V]][%[[#IDX]] : !s64i] : !cir.vector<!s32i x 4>
+  // CIR-NEXT: cir.store{{.*}} %[[#RESULT]], %[[#ALLOCAR]] : !s32i, !cir.ptr<!s32i>
+
+  // LLVM-NEXT: %[[#V:]] = load <4 x i32>, ptr %[[#ALLOCAV]], align 16
+  // LLVM-NEXT: %[[#RESULT:]] = extractelement <4 x i32> %[[#V]], i64 0
+  // LLVM-NEXT: store i32 %[[#RESULT]], ptr %[[#ALLOCAR]], align 4
+
+}
+
+// CIR: cir.func {{.*}} {{@.*test_vec3.*}}
+// LLVM: define dso_local void {{@.*test_vec3.*}}
+void test_vec3() {
+  vi3 v = {};
+  // CIR-NEXT: %[[#PV:]] = cir.alloca !cir.vector<!s32i x 3>, !cir.ptr<!cir.vector<!s32i x 3>>, ["v", init] {alignment = 16 : i64}
+  // CIR-NEXT: %[[#VVAL:]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 3>
+  // CIR-NEXT: cir.store{{.*}} %[[#VVAL]], %[[#PV]] : !cir.vector<!s32i x 3>, !cir.ptr<!cir.vector<!s32i x 3>>
+
+  // LLVM-NEXT: %[[#PV:]] = alloca <3 x i32>, i64 1, align 16
+  // LLVM-NEXT: store <3 x i32> zeroinitializer, ptr %[[#PV]], align 16
+
+  v + 1;
+  // CIR-NEXT: %[[#PV4:]] = cir.cast bitcast %[[#PV]] : !cir.ptr<!cir.vector<!s32i x 3>> -> !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: %[[#V4:]] = cir.load{{.*}} %[[#PV4]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#POISON6:]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#V3:]] = cir.vec.shuffle(%[[#V4]], %[[#POISON6]] : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 3>
+  // CIR:      %[[#RES:]] = cir.binop(add, %[[#V3]], %{{[0-9]+}}) : !cir.vector<!s32i x 3>
+
+  // LLVM-NEXT: %[[#V4:]] = load <4 x i32>, ptr %[[#PV:]], align 16
+  // LLVM-NEXT: %[[#V3:]] = shufflevector <4 x i32> %[[#V4]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  // LLVM-NEXT: %[[#RES:]] = add <3 x i32> %[[#V3]], splat (i32 1)
+
+}
+
+void vector_integers_shifts_test() {
+  vi4 a = {1, 2, 3, 4};
+  uvi4 b = {5u, 6u, 7u, 8u};
+
+  vi4 shl = a << b;
+  // CIR:  %{{[0-9]+}} = cir.shift(left, %{{[0-9]+}} : !cir.vector<!s32i x 4>, %{{[0-9]+}} : !cir.vector<!u32i x 4>) -> !cir.vector<!s32i x 4>
+  uvi4 shr = b >> a;
+  // CIR: %{{[0-9]+}} = cir.shift(right, %{{[0-9]+}} : !cir.vector<!u32i x 4>, %{{[0-9]+}} : !cir.vector<!s32i x 4>) -> !cir.vector<!u32i x 4>
+}
+
+void vector_shuffle_dynamic_mask_test() {
+  vi6 a;
+  vi6 b;
+  vi6 r = __builtin_shufflevector(a, b);
+
+  // CIR: %{{[0-9]+}} = cir.vec.shuffle.dynamic %{{[0-9]+}} : !cir.vector<!s32i x 6>, %{{[0-9]+}} : !cir.vector<!s32i x 6>
+
+  // LLVM: {{.*}} = and <6 x i32> {{.*}}, splat (i32 7)
+}
+
+// Test for arrays of 3-component extended vectors
+// This documents how vec3 arrays are handled in memory accesses
+// Original CodeGen treats vec3 as vec4 for aligned memory access
+// See issue #685
+// CIR: cir.func {{.*}} {{@.*test_vec3_array.*}}
+// LLVM: define dso_local void {{@.*test_vec3_array.*}}
+void test_vec3_array() {
+  vi3 arr[4] = {};
+  // CIR: cir.alloca !cir.array<!cir.vector<!s32i x 3> x 4>, !cir.ptr<!cir.array<!cir.vector<!s32i x 3> x 4>>, ["arr", init]
+  // LLVM: alloca [4 x <3 x i32>], i64 1, align 16
+
+  vi3 *ptr = &arr[0];
+  // CIR: cir.get_element{{.*}}!cir.array<!cir.vector<!s32i x 3> x 4>
+  // LLVM: getelementptr [4 x <3 x i32>]
+  
+  // Key behavior: Loading from array element shows vec3->vec4 optimization
+  arr[0] + arr[1];
+  // CIR: %[[#PTR0:]] = cir.get_element{{.*}}!cir.ptr<!cir.vector<!s32i x 3>>
+  // CIR-NEXT: %[[#PTR0_V4:]] = cir.cast bitcast %[[#PTR0]] : !cir.ptr<!cir.vector<!s32i x 3>> -> !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: %[[#V4_0:]] = cir.load{{.*}}%[[#PTR0_V4]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#POISON0:]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#V3_0:]] = cir.vec.shuffle(%[[#V4_0]], %[[#POISON0]] : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 3>
+  
+  // LLVM: %[[#GEP0:]] = getelementptr [4 x <3 x i32>], ptr %{{.+}}, i32 0, i64 0
+  // LLVM-NEXT: %[[#LOAD_V4_0:]] = load <4 x i32>, ptr %[[#GEP0]], align 16
+  // LLVM-NEXT: %[[#LOAD_V3_0:]] = shufflevector <4 x i32> %[[#LOAD_V4_0]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+
+  // Same pattern for arr[1]
+  // CIR: %[[#PTR1:]] = cir.get_element{{.*}}!cir.ptr<!cir.vector<!s32i x 3>>
+  // CIR-NEXT: %[[#PTR1_V4:]] = cir.cast bitcast %[[#PTR1]] : !cir.ptr<!cir.vector<!s32i x 3>> -> !cir.ptr<!cir.vector<!s32i x 4>>
+  // CIR-NEXT: %[[#V4_1:]] = cir.load{{.*}}%[[#PTR1_V4]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#POISON1:]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
+  // CIR-NEXT: %[[#V3_1:]] = cir.vec.shuffle(%[[#V4_1]], %[[#POISON1]] : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 3>
+  // CIR: cir.binop(add, %[[#V3_0]], %[[#V3_1]]) : !cir.vector<!s32i x 3>
+  
+  // LLVM: %[[#GEP1:]] = getelementptr [4 x <3 x i32>], ptr %{{.+}}, i32 0, i64 1
+  // LLVM-NEXT: %[[#LOAD_V4_1:]] = load <4 x i32>, ptr %[[#GEP1]], align 16
+  // LLVM-NEXT: %[[#LOAD_V3_1:]] = shufflevector <4 x i32> %[[#LOAD_V4_1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
+  // LLVM: add <3 x i32> %[[#LOAD_V3_0]], %[[#LOAD_V3_1]]
+  
+  // Note: Array element stores (arr[i] = value) are not yet implemented (NYI at CIRGenExpr.cpp:640)
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/vectype-issized.c b/clang/test/CIR/Incubator/CodeGen/vectype-issized.c
new file mode 100644
index 0000000000000..380ed7a13f286
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vectype-issized.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir -emit-cir -target-feature +neon %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android24  -fclangir -emit-llvm -target-feature +neon %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+typedef __attribute__((neon_vector_type(8))) short  c;
+void d() { c a[8]; }
+
+// CIR-LABEL: d
+// CIR: {{%.*}} = cir.alloca !cir.array<!cir.vector<!s16i x 8> x 8>,
+// CIR-SAME: !cir.ptr<!cir.array<!cir.vector<!s16i x 8> x 8>>, ["a"]
+// CIR-SAME: {alignment = 16 : i64}
+
+// LLVM-LABEL: d
+// LLVM: {{%.*}} = alloca [8 x <8 x i16>], i64 1, align 16
diff --git a/clang/test/CIR/Incubator/CodeGen/vectype.cpp b/clang/test/CIR/Incubator/CodeGen/vectype.cpp
new file mode 100644
index 0000000000000..1f2defbc15cdc
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vectype.cpp
@@ -0,0 +1,248 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s
+
+typedef int vi4 __attribute__((vector_size(16)));
+typedef unsigned int uvi4 __attribute__((vector_size(16)));
+typedef double vd2 __attribute__((vector_size(16)));
+typedef long long vll2 __attribute__((vector_size(16)));
+typedef unsigned short vus2 __attribute__((vector_size(4)));
+typedef float vf4 __attribute__((vector_size(16)));
+
+vi4 vec_a;
+// CHECK: cir.global external @[[VEC_A:.*]] = #cir.zero : !cir.vector<!s32i x 4>
+
+vd2 b;
+// CHECK: cir.global external @[[VEC_B:.*]] = #cir.zero : !cir.vector<!cir.double x 2>
+
+vll2 c;
+// CHECK: cir.global external @[[VEC_C:.*]] = #cir.zero : !cir.vector<!s64i x 2>
+
+vi4 d = { 1, 2, 3, 4 };
+
+// CHECK: cir.global external @[[VEC_D:.*]] = #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> :
+// CHECK-SAME: !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+
+void vector_int_test(int x, unsigned short usx) {
+
+  // Vector constant.
+  vi4 a = { 1, 2, 3, 4 };
+  // CHECK: %{{[0-9]+}} = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+
+  // Non-const vector initialization.
+  vi4 b = { x, 5, 6, x + 1 };
+  // CHECK: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}} : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+
+  // Incomplete vector initialization.
+  vi4 bb = { x, x + 1 };
+  // CHECK: %[[#zero:]] = cir.const #cir.int<0> : !s32i
+  // CHECK: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}}, %[[#zero]], %[[#zero]] : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+
+  // Scalar to vector conversion, a.k.a. vector splat.  Only valid as an
+  // operand of a binary operator, not as a regular conversion.
+  bb = a + 7;
+  // CHECK: %[[#seven:]] = cir.const #cir.int<7> : !s32i
+  // CHECK: %{{[0-9]+}} = cir.vec.splat %[[#seven]] : !s32i, !cir.vector<!s32i x 4>
+
+  // Vector to vector conversion
+  vd2 bbb = { };
+  bb = (vi4)bbb;
+  // CHECK: %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.vector<!cir.double x 2> -> !cir.vector<!s32i x 4>
+
+  // Extract element
+  int c = a[x];
+  // CHECK: %{{[0-9]+}} = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+
+  // Insert element
+  a[x] = x;
+  // CHECK: %[[#LOADEDVI:]] = cir.load{{.*}} %[[#STORAGEVI:]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CHECK: %[[#UPDATEDVI:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOADEDVI]][%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CHECK: cir.store{{.*}} %[[#UPDATEDVI]], %[[#STORAGEVI]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // Compound assignment
+  a[x] += a[0];
+  // CHECK: %[[#LOADCA1:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CHECK: %[[#RHSCA:]] = cir.vec.extract %[[#LOADCA1]][%{{[0-9]+}} : !s32i] : !cir.vector<!s32i x 4>
+  // CHECK: %[[#LOADCAIDX2:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!s32i>, !s32i
+  // CHECK: %[[#LOADCAVEC3:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CHECK: %[[#LHSCA:]] = cir.vec.extract %[[#LOADCAVEC3]][%[[#LOADCAIDX2]] : !s32i] : !cir.vector<!s32i x 4>
+  // CHECK: %[[#SUMCA:]] = cir.binop(add, %[[#LHSCA]], %[[#RHSCA]]) nsw : !s32i
+  // CHECK: %[[#LOADCAVEC4:]] = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+  // CHECK: %[[#RESULTCAVEC:]] = cir.vec.insert %[[#SUMCA]], %[[#LOADCAVEC4]][%[[#LOADCAIDX2]] : !s32i] : !cir.vector<!s32i x 4>
+  // CHECK: cir.store{{.*}} %[[#RESULTCAVEC]], %{{[0-9]+}} : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+  // Binary arithmetic operations
+  vi4 d = a + b;
+  // CHECK: %{{[0-9]+}} = cir.binop(add, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 e = a - b;
+  // CHECK: %{{[0-9]+}} = cir.binop(sub, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 f = a * b;
+  // CHECK: %{{[0-9]+}} = cir.binop(mul, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 g = a / b;
+  // CHECK: %{{[0-9]+}} = cir.binop(div, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 h = a % b;
+  // CHECK: %{{[0-9]+}} = cir.binop(rem, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 i = a & b;
+  // CHECK: %{{[0-9]+}} = cir.binop(and, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 j = a | b;
+  // CHECK: %{{[0-9]+}} = cir.binop(or, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+  vi4 k = a ^ b;
+  // CHECK: %{{[0-9]+}} = cir.binop(xor, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>
+
+  // Unary arithmetic operations
+  vi4 l = +a;
+  // CHECK: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 m = -a;
+  // CHECK: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 n = ~a;
+  // CHECK: %{{[0-9]+}} = cir.unary(not, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+
+  // Ternary conditional operator
+  vi4 tc = a ? b : d;
+  // CHECK: %{{[0-9]+}} = cir.vec.ternary(%{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+
+  // Comparisons
+  vi4 o = a == b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(eq, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 p = a != b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(ne, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 q = a < b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(lt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 r = a > b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(gt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 s = a <= b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(le, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+  vi4 t = a >= b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(ge, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+
+  // __builtin_shufflevector
+  vi4 u = __builtin_shufflevector(a, b, 7, 5, 3, 1);
+  // CHECK: %{{[0-9]+}} = cir.vec.shuffle(%{{[0-9]+}}, %{{[0-9]+}} : !cir.vector<!s32i x 4>) [#cir.int<7> : !s64i, #cir.int<5> : !s64i, #cir.int<3> : !s64i, #cir.int<1> : !s64i] : !cir.vector<!s32i x 4>
+  vi4 v = __builtin_shufflevector(a, b);
+  // CHECK: %{{[0-9]+}} = cir.vec.shuffle.dynamic %{{[0-9]+}} : !cir.vector<!s32i x 4>, %{{[0-9]+}} : !cir.vector<!s32i x 4>
+
+  // Shifts
+  vi4 w = a << b;
+  // CHECK: %{{[0-9]+}} = cir.shift(left, {{%.*}} : !cir.vector<!s32i x 4>,
+  // CHECK-SAME: {{%.*}} : !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+  vi4 y = a >> b;
+  // CHECK: %{{[0-9]+}} = cir.shift(right, {{%.*}} : !cir.vector<!s32i x 4>,
+  // CHECK-SAME: {{%.*}} : !cir.vector<!s32i x 4>) -> !cir.vector<!s32i x 4>
+
+  vus2 z = { usx, usx };
+  // CHECK: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}} : !u16i, !u16i) : !cir.vector<!u16i x 2>
+  vus2 zamt = { 3, 4 };
+  // CHECK: %{{[0-9]+}} = cir.const #cir.const_vector<[#cir.int<3> : !u16i, #cir.int<4> : !u16i]> : !cir.vector<!u16i x 2>
+  vus2 zzz = z >> zamt;
+  // CHECK: %{{[0-9]+}} = cir.shift(right, {{%.*}} : !cir.vector<!u16i x 2>,
+  // CHECK-SAME: {{%.*}} : !cir.vector<!u16i x 2>) -> !cir.vector<!u16i x 2>
+
+  // Vector to scalar conversion
+  unsigned int zi = (unsigned int)z;
+  // CHECK: %{{[0-9]+}} = cir.cast bitcast {{%.*}} : !cir.vector<!u16i x 2> -> !u32i
+
+  // Scalar to vector conversion
+  vus2 zz = (vus2)zi;
+  // CHECK: %{{[0-9]+}} = cir.cast bitcast {{%.*}} : !u32i -> !cir.vector<!u16i x 2>
+
+  // Vector to vector conversion
+  vll2 aaa = (vll2)a;
+  // CHECK: %{{[0-9]+}} = cir.cast bitcast {{%.*}} : !cir.vector<!s32i x 4> -> !cir.vector<!s64i x 2>
+}
+
+void vector_double_test(int x, double y) {
+  // Vector constant.
+  vd2 a = { 1.5, 2.5 };
+  // CHECK: %{{[0-9]+}} = cir.const #cir.const_vector<[#cir.fp<1.500000e+00> : !cir.double, #cir.fp<2.500000e+00> : !cir.double]> : !cir.vector<!cir.double x 2>
+
+  // Non-const vector initialization.
+  vd2 b = { y, y + 1.0 };
+  // CHECK: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %{{[0-9]+}} : !cir.double, !cir.double) : !cir.vector<!cir.double x 2>
+
+  // Incomplete vector initialization
+  vd2 bb = { y };
+  // CHECK: [[#dzero:]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+  // CHECK: %{{[0-9]+}} = cir.vec.create(%{{[0-9]+}}, %[[#dzero]] : !cir.double, !cir.double) : !cir.vector<!cir.double x 2>
+
+  // Scalar to vector conversion, a.k.a. vector splat.  Only valid as an
+  // operand of a binary operator, not as a regular conversion.
+  bb = a + 2.5;
+  // CHECK: %[[#twohalf:]] = cir.const #cir.fp<2.500000e+00> : !cir.double
+  // CHECK: %{{[0-9]+}} = cir.vec.splat %[[#twohalf]] : !cir.double, !cir.vector<!cir.double x 2>
+
+  // Extract element
+  double c = a[x];
+  // CHECK: %{{[0-9]+}} = cir.vec.extract %{{[0-9]+}}[%{{[0-9]+}} : !s32i] : !cir.vector<!cir.double x 2>
+
+  // Insert element
+  a[x] = y;
+  // CHECK: %[[#LOADEDVF:]] = cir.load{{.*}} %[[#STORAGEVF:]] : !cir.ptr<!cir.vector<!cir.double x 2>>, !cir.vector<!cir.double x 2>
+  // CHECK: %[[#UPDATEDVF:]] = cir.vec.insert %{{[0-9]+}}, %[[#LOADEDVF]][%{{[0-9]+}} : !s32i] : !cir.vector<!cir.double x 2>
+  // CHECK: cir.store{{.*}} %[[#UPDATEDVF]], %[[#STORAGEVF]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>>
+
+  // Binary arithmetic operations
+  vd2 d = a + b;
+  // CHECK: %{{[0-9]+}} = cir.binop(add, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  vd2 e = a - b;
+  // CHECK: %{{[0-9]+}} = cir.binop(sub, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  vd2 f = a * b;
+  // CHECK: %{{[0-9]+}} = cir.binop(mul, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+  vd2 g = a / b;
+  // CHECK: %{{[0-9]+}} = cir.binop(div, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>
+
+  // Unary arithmetic operations
+  vd2 l = +a;
+  // CHECK: %{{[0-9]+}} = cir.unary(plus, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>
+  vd2 m = -a;
+  // CHECK: %{{[0-9]+}} = cir.unary(minus, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!cir.double x 2>
+
+  // Comparisons
+  vll2 o = a == b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(eq, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 p = a != b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(ne, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 q = a < b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(lt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 r = a > b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(gt, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 s = a <= b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(le, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+  vll2 t = a >= b;
+  // CHECK: %{{[0-9]+}} = cir.vec.cmp(ge, %{{[0-9]+}}, %{{[0-9]+}}) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2>
+
+  // __builtin_convertvector
+  vus2 w = __builtin_convertvector(a, vus2);
+  // CHECK: %{{[0-9]+}} = cir.cast float_to_int %{{[0-9]+}} : !cir.vector<!cir.double x 2> -> !cir.vector<!u16i x 2>
+}
+
+void vector_integers_shifts_test() {
+  vi4 a = {1, 2, 3, 4};
+  uvi4 b = {5u, 6u, 7u, 8u};
+
+  vi4 shl = a << b;
+  // CHECK:  %{{[0-9]+}} = cir.shift(left, %{{[0-9]+}} : !cir.vector<!s32i x 4>, %{{[0-9]+}} : !cir.vector<!u32i x 4>) -> !cir.vector<!s32i x 4>
+  uvi4 shr = b >> a;
+  // CHECK: %{{[0-9]+}} = cir.shift(right, %{{[0-9]+}} : !cir.vector<!u32i x 4>, %{{[0-9]+}} : !cir.vector<!s32i x 4>) -> !cir.vector<!u32i x 4>
+}
+
+void logical_not() {
+   vi4 a;
+   vi4 b = !a;
+}
+
+// CHECK: %[[A_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["a"]
+// CHECK: %[[B_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["b", init]
+// CHECK: %[[TMP_A:.*]] = cir.load{{.*}}) %[[A_ADDR]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
+// CHECK: %[[CONST_V0:.*]] = cir.const #cir.zero : !cir.vector<!s32i x 4>
+// CHECK: %[[RESULT:.*]] = cir.vec.cmp(eq, %[[TMP_A]], %[[CONST_V0]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+// CHECK: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
+
+void logical_not_float() {
+  vf4 a;
+  vi4 b = !a;
+}
+
+// CHECK: %[[A_ADDR:.*]] = cir.alloca !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>>, ["a"]
+// CHECK: %[[B_ADDR:.*]] = cir.alloca !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>, ["b", init]
+// CHECK: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<!cir.float x 4>>, !cir.vector<!cir.float x 4>
+// CHECK: %[[CONST_V0:.*]] = cir.const #cir.zero : !cir.vector<!cir.float x 4>
+// CHECK: %[[RESULT:.*]] = cir.vec.cmp(eq, %[[TMP_A]], %[[CONST_V0]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+// CHECK: cir.store{{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.vector<!s32i x 4>, !cir.ptr<!cir.vector<!s32i x 4>>
diff --git a/clang/test/CIR/Incubator/CodeGen/verbose-trap.cpp b/clang/test/CIR/Incubator/CodeGen/verbose-trap.cpp
new file mode 100644
index 0000000000000..94db33a656c7f
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/verbose-trap.cpp
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:   -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir \
+// RUN:   -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
+// RUN:   -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --input-file=%t.ogcg.ll %s --check-prefix=OGCG
+
+// Test basic verbose trap with simple string literals
+void test_basic() {
+  __builtin_verbose_trap("Security", "Buffer overflow detected");
+  // CIR: cir.trap
+  // LLVM: call void @llvm.trap()
+  // OGCG: call void @llvm.trap()
+}
+
+// Test with different category and message
+void test_different_messages() {
+  __builtin_verbose_trap("Assertion", "x != nullptr");
+  // CIR: cir.trap
+  // LLVM: call void @llvm.trap()
+  // OGCG: call void @llvm.trap()
+}
+
+// Test with constexpr string pointers
+constexpr const char* kCategory = "Performance";
+constexpr const char* kMessage = "Unexpected slow path";
+
+void test_constexpr() {
+  __builtin_verbose_trap(kCategory, kMessage);
+  // CIR: cir.trap
+  // LLVM: call void @llvm.trap()
+  // OGCG: call void @llvm.trap()
+}
+
+// Test that trap acts as a terminator (code after is unreachable)
+void test_terminator() {
+  __builtin_verbose_trap("Error", "Invalid state");
+  // CIR: cir.trap
+  // LLVM: call void @llvm.trap()
+  // OGCG: call void @llvm.trap()
+
+  // The following code should still be in the IR but unreachable
+  int x = 42; // CIR: cir.store
+  (void)x;
+}
+
+// Test multiple traps in the same function
+void test_multiple_traps(bool condition) {
+  if (condition) {
+    __builtin_verbose_trap("Branch1", "First trap");
+    // CIR: cir.trap
+    // LLVM: call void @llvm.trap()
+    // OGCG: call void @llvm.trap()
+  } else {
+    __builtin_verbose_trap("Branch2", "Second trap");
+    // CIR: cir.trap
+    // LLVM: call void @llvm.trap()
+    // OGCG: call void @llvm.trap()
+  }
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/virtual-base-cast.cpp b/clang/test/CIR/Incubator/CodeGen/virtual-base-cast.cpp
new file mode 100644
index 0000000000000..bd16c960c6d46
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/virtual-base-cast.cpp
@@ -0,0 +1,61 @@
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -std=c++20 -mconstructor-aliases -O0 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -std=c++20 -mconstructor-aliases -O0 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct A { int a; virtual int aa(); };
+struct B { int b; virtual int bb(); };
+struct C : virtual A, virtual B { int c; virtual int aa(); virtual int bb(); };
+struct AA { int a; virtual int aa(); };
+struct BB { int b; virtual int bb(); };
+struct CC : AA, BB { virtual int aa(); virtual int bb(); virtual int cc(); };
+struct D : virtual C, virtual CC { int e; };
+
+D* x;
+
+A* a() { return x; }
+// CIR-LABEL: @_Z1av()
+
+// This uses the vtable to get the offset to the base object. The offset from
+// the vptr to the base object offset in the vtable is a compile-time constant.
+// CIR: %[[X_ADDR:.*]] = cir.get_global @x : !cir.ptr<!cir.ptr<!rec_D>>
+// CIR: %[[X:.*]] = cir.load{{.*}} %[[X_ADDR]]
+// CIR: %[[X_VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[X]] : !cir.ptr<!rec_D> -> !cir.ptr<!cir.vptr>
+// CIR: %[[X_VPTR_BASE:.*]] = cir.load{{.*}} %[[X_VPTR_ADDR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[X_BASE_I8PTR:.*]] = cir.cast bitcast %[[X_VPTR_BASE]] : !cir.vptr -> !cir.ptr<!u8i>
+// CIR:  %[[OFFSET_OFFSET:.*]] = cir.const #cir.int<-32> : !s64i
+// CIR:  %[[OFFSET_PTR:.*]] = cir.ptr_stride %[[X_BASE_I8PTR]], %[[OFFSET_OFFSET]] : (!cir.ptr<!u8i>, !s64i) -> !cir.ptr<!u8i>
+// CIR:  %[[OFFSET_PTR_CAST:.*]] = cir.cast bitcast %[[OFFSET_PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!s64i>
+// CIR:  %[[OFFSET:.*]] = cir.load{{.*}} %[[OFFSET_PTR_CAST]] : !cir.ptr<!s64i>, !s64i
+// CIR:  %[[VBASE_ADDR:.*]] = cir.ptr_stride {{.*}}, %[[OFFSET]] : (!cir.ptr<!u8i>, !s64i) -> !cir.ptr<!u8i>
+// CIR:  cir.cast bitcast %[[VBASE_ADDR]] : !cir.ptr<!u8i> -> !cir.ptr<!rec_D>
+
+// FIXME: this version should include null check.
+// LLVM-LABEL: @_Z1av()
+// LLVM:  %[[OFFSET_OFFSET:.*]] = getelementptr i8, ptr {{.*}}, i64 -32
+// LLVM:  %[[OFFSET_PTR:.*]] = load i64, ptr %[[OFFSET_OFFSET]], align 8
+// LLVM:  %[[VBASE_ADDR:.*]] = getelementptr i8, ptr {{.*}}, i64 %[[OFFSET_PTR]]
+// LLVM:  store ptr %[[VBASE_ADDR]], ptr {{.*}}, align 8
+
+B* b() { return x; }
+BB* c() { return x; }
+
+// Put the vbptr at a non-zero offset inside a non-virtual base.
+struct E { int e; };
+struct F : E, D { int f; };
+
+F* y;
+
+BB* d() { return y; }
+// CIR-LABEL: @_Z1dv
+// CIR: %[[OFFSET:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!s64i>, !s64i
+// CIR: %[[ADJUST:.*]] = cir.const #cir.int<16> : !s64i
+// CIR: cir.binop(add, %[[OFFSET]], %[[ADJUST]]) : !s64i
+
+// LLVM-LABEL: @_Z1dv
+// LLVM: %[[OFFSET_OFFSET:.*]] = getelementptr i8, ptr {{.*}}, i64 -48
+// LLVM: %[[OFFSET_PTR:.*]] = load i64, ptr %[[OFFSET_OFFSET]], align 8
+// LLVM: %[[ADJUST:.*]] = add i64 %[[OFFSET_PTR]], 16
+// LLVM: %[[VBASE_ADDR:.*]] = getelementptr i8, ptr {{.*}}, i64 %[[ADJUST]]
+// LLVM: store ptr %[[VBASE_ADDR]],
diff --git a/clang/test/CIR/Incubator/CodeGen/virtual-destructor-calls.cpp b/clang/test/CIR/Incubator/CodeGen/virtual-destructor-calls.cpp
new file mode 100644
index 0000000000000..df0e681e37a18
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/virtual-destructor-calls.cpp
@@ -0,0 +1,92 @@
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -std=c++20 -mconstructor-aliases -O0 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -std=c++20 -mconstructor-aliases -O1 -fclangir -emit-cir %s -o %t-o1.cir
+// RUN: FileCheck --check-prefix=CIR_O1 --input-file=%t-o1.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -std=c++20 -mconstructor-aliases -O0 -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// FIXME: LLVM IR dialect does not yet support function ptr globals, which precludes
+// a lot of the proper semantics for properly representing alias functions in LLVM
+// (see the note on LLVM_O1 below).
+
+struct Member {
+  ~Member();
+};
+
+struct A {
+  virtual ~A();
+};
+
+struct B : A {
+  Member m;
+  virtual ~B();
+};
+
+// Aliases are inserted before the function definitions in LLVM IR
+// FIXME: These should have unnamed_addr set.
+// LLVM: @_ZN1BD1Ev = alias void (ptr), ptr @_ZN1BD2Ev
+// LLVM: @_ZN1CD1Ev = alias void (ptr), ptr @_ZN1CD2Ev
+
+// Base dtor: actually calls A's base dtor.
+// CIR: cir.func {{.*}} @_ZN1BD2Ev
+// CIR:   cir.call @_ZN6MemberD1Ev
+// CIR:   cir.call @_ZN1AD2Ev
+// LLVM: define{{.*}} void @_ZN1BD2Ev(ptr
+// LLVM: call void @_ZN6MemberD1Ev
+// LLVM: call void @_ZN1AD2Ev
+
+// Complete dtor: just an alias because there are no virtual bases.
+// CIR: cir.func {{.*}} @_ZN1BD1Ev(!cir.ptr<!rec_B>) special_member<#cir.cxx_dtor<!rec_B>> alias(@_ZN1BD2Ev)
+
+// Deleting dtor: defers to the complete dtor.
+// LLVM: define{{.*}} void @_ZN1BD0Ev(ptr
+// LLVM: call void @_ZN1BD1Ev
+// LLVM: call void @_ZdlPv
+
+// (aliases from C)
+// CIR: cir.func {{.*}} @_ZN1CD2Ev(%arg0: !cir.ptr<!rec_C>{{.*}})) {{.*}} {
+// CIR: cir.func {{.*}} @_ZN1CD1Ev(!cir.ptr<!rec_C>) special_member<#cir.cxx_dtor<!rec_C>> alias(@_ZN1CD2Ev)
+
+// CIR_O1-NOT: cir.func {{.*}} @_ZN1CD2Ev(%arg0: !cir.ptr<!rec_C>{{.*}})) {{.*}} {
+// CIR_O1: cir.func private dso_local @_ZN1CD2Ev(!cir.ptr<!rec_C>) special_member<#cir.cxx_dtor<!rec_C>> alias(@_ZN1BD2Ev)
+// CIR_O1: cir.func private dso_local @_ZN1CD1Ev(!cir.ptr<!rec_C>) special_member<#cir.cxx_dtor<!rec_C>> alias(@_ZN1CD2Ev)
+
+// FIXME: LLVM output should be: @_ZN1CD2Ev ={{.*}} unnamed_addr alias {{.*}} @_ZN1BD2Ev
+// LLVM: define dso_local void @_ZN1CD2Ev(ptr
+// FIXME: note that LLVM_O1 cannot be tested because the canocalizers running
+// on top of LLVM IR dialect delete _ZN1CD2Ev in its current form (a function
+// declaration) since its not used in the TU.
+
+B::~B() { }
+
+struct C : B {
+  ~C();
+};
+
+C::~C() { }
+
+// Complete dtor: just an alias (checked above).
+
+// Deleting dtor: defers to the complete dtor.
+// CIR: cir.func {{.*}} @_ZN1CD0Ev
+// CIR: cir.call @_ZN1CD1Ev
+// CIR: cir.call @_ZdlPvm
+// LLVM: define{{.*}} void @_ZN1CD0Ev(ptr
+// LLVM: call void @_ZN1CD1Ev
+// LLVM: call void @_ZdlPv
+
+// Base dtor: just an alias to B's base dtor.
+
+namespace PR12798 {
+  // A qualified call to a base class destructor should not undergo virtual
+  // dispatch. Template instantiation used to lose the qualifier.
+  struct A { virtual ~A(); };
+  template<typename T> void f(T *p) { p->A::~A(); }
+
+  // CIR: cir.func {{.*}} @_ZN7PR127981fINS_1AEEEvPT_
+  // CIR: cir.call @_ZN7PR127981AD1Ev
+  // LLVM: define {{.*}} @_ZN7PR127981fINS_1AEEEvPT_(
+  // LLVM: call void @_ZN7PR127981AD1Ev(
+  template void f(A*);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/virtual-destructor-explicit-unqualified-call.cpp b/clang/test/CIR/Incubator/CodeGen/virtual-destructor-explicit-unqualified-call.cpp
new file mode 100644
index 0000000000000..99f9570e6844b
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/virtual-destructor-explicit-unqualified-call.cpp
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-llvm %s -o %t.cir
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -emit-llvm %s -o %t.cir
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.cir %s
+
+class A {
+  virtual ~A();
+  A B(A);
+};
+A A::B(A) {
+  // CIR-LABEL:   cir.func {{.*}} @_ZN1A1BES_(
+  // CIR-SAME:      %[[THIS_ARG:.*]]: !cir.ptr<!rec_A>
+  // CIR-NEXT:           %[[THIS_VAR:.*]] = cir.alloca !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+  // CIR:                cir.store %[[THIS_ARG]], %[[THIS_VAR]] : !cir.ptr<!rec_A>, !cir.ptr<!cir.ptr<!rec_A>>
+  // CIR:                %[[THIS:.*]] = cir.load %[[THIS_VAR]] : !cir.ptr<!cir.ptr<!rec_A>>, !cir.ptr<!rec_A>
+  // CIR-NEXT:           %[[VPTR_PTR:.*]] = cir.vtable.get_vptr %[[THIS]] : !cir.ptr<!rec_A> -> !cir.ptr<!cir.vptr>
+  // CIR-NEXT:           %[[VPTR:.*]] = cir.load align(8) %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+  // CIR-NEXT:           %[[DTOR_PTR:.*]] = cir.vtable.get_virtual_fn_addr %[[VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_A>)>>>
+  // CIR-NEXT:           %[[DTOR:.*]] = cir.load align(8) %[[DTOR_PTR]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_A>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_A>)>>
+  // CIR-NEXT:           cir.call %[[DTOR]](%[[THIS]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_A>)>>, !cir.ptr<!rec_A>) -> ()
+  // CIR-NEXT:           cir.trap
+  // CIR-NEXT:         }
+
+
+  // LLVM-LABEL:   define dso_local %class.A @_ZN1A1BES_(
+  // LLVM-SAME:      ptr %[[THIS_ARG:[0-9]+]],
+  // LLVM-NEXT:          %[[THIS_VAR:.*]] = alloca ptr, i64 1, align 8
+  // LLVM:               store ptr %[[THIS_ARG]], ptr %[[THIS_VAR]], align 8
+  // LLVM:               %[[THIS:.*]] = load ptr, ptr %[[THIS_VAR]], align 8
+  // LLVM-NEXT:          %[[VTABLE_PTR:.*]] = load ptr, ptr %[[THIS]], align 8
+  // LLVM-NEXT:          %[[VIRT_DTOR_ADDR:.*]] = getelementptr inbounds ptr, ptr %[[VTABLE_PTR]], i32 0
+  // LLVM-NEXT:          %[[DTOR:.*]] = load ptr, ptr %[[VIRT_DTOR_ADDR]], align 8
+  // LLVM-NEXT:          call void %[[DTOR]](ptr %[[THIS]])
+  // LLVM-NEXT:          call void @llvm.trap()
+  // LLVM-NEXT:          unreachable
+  // LLVM-NEXT:        }
+
+  
+  // OGCG-LABEL:   define dso_local void @_ZN1A1BES_(
+  // OGCG-SAME:      ptr {{.*}}%[[THIS_ARG:.*]],
+  // OGCG:               %[[VAL_0:.*]] = alloca ptr, align 8
+  // OGCG-NEXT:          %[[THIS_VAR:.*]] = alloca ptr, align 8
+  // OGCG:               store ptr %[[THIS_ARG]], ptr %[[THIS_VAR]], align 8
+  // OGCG:               %[[THIS:.*]] = load ptr, ptr %[[THIS_VAR]], align 8
+  // OGCG-NEXT:          %[[VTABLE:.*]] = load ptr, ptr %[[THIS]], align 8
+  // OGCG-NEXT:          %[[VIRT_DTOR_ADDR:.*]] = getelementptr inbounds ptr, ptr %[[VTABLE]], i64 0
+  // OGCG-NEXT:          %[[DTOR:.*]] = load ptr, ptr %[[VIRT_DTOR_ADDR]], align 8
+  // OGCG-NEXT:          call void %[[DTOR]](ptr noundef nonnull align 8 dereferenceable(8) %[[THIS]]) #2
+  // OGCG-NEXT:          call void @llvm.trap()
+  // OGCG-NEXT:          unreachable
+  // OGCG-NEXT:        }
+
+  this->~A();
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/virtual-function-calls.cpp b/clang/test/CIR/Incubator/CodeGen/virtual-function-calls.cpp
new file mode 100644
index 0000000000000..09f48f7c16d23
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/virtual-function-calls.cpp
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct A {
+  virtual void f(char);
+};
+
+void f1(A *a) {
+  a->f('c');
+}
+
+// CIR: cir.func{{.*}} @_Z2f1P1A(%arg0: !cir.ptr<!rec_A> {{.*}})
+// CIR:   %[[A_ADDR:.*]] = cir.alloca !cir.ptr<!rec_A>
+// CIR:   cir.store %arg0, %[[A_ADDR]]
+// CIR:   %[[A:.*]] = cir.load{{.*}} %[[A_ADDR]]
+// CIR:   %[[C_LITERAL:.*]] = cir.const #cir.int<99> : !s8i
+// CIR:   %[[VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[A]] : !cir.ptr<!rec_A> -> !cir.ptr<!cir.vptr>
+// CIR:   %[[VPTR:.*]] = cir.load{{.*}} %[[VPTR_ADDR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR:   %[[FN_PTR_PTR:.*]] = cir.vtable.get_virtual_fn_addr %[[VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_A>, !s8i)>>>
+// CIR:   %[[FN_PTR:.*]] = cir.load{{.*}} %[[FN_PTR_PTR:.*]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_A>, !s8i)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_A>, !s8i)>>
+// CIR:   cir.call %[[FN_PTR]](%[[A]], %[[C_LITERAL]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_A>, !s8i)>>, !cir.ptr<!rec_A>, !s8i) -> ()
+
+// LLVM: define{{.*}} void @_Z2f1P1A(ptr %[[ARG0:.*]])
+// LLVM:   %[[A_ADDR:.*]] = alloca ptr
+// LLVM:   store ptr %[[ARG0]], ptr %[[A_ADDR]]
+// LLVM:   %[[A:.*]] = load ptr, ptr %[[A_ADDR]]
+// LLVM:   %[[VPTR:.*]] = load ptr, ptr %[[A]]
+// LLVM:   %[[FN_PTR_PTR:.*]] = getelementptr inbounds ptr, ptr %[[VPTR]], i32 0
+// LLVM:   %[[FN_PTR:.*]] = load ptr, ptr %5
+// LLVM:   call void %[[FN_PTR]](ptr %[[A]], i8 99)
+
+struct B : virtual A {
+  virtual void f();
+};
+
+void f2(B * b) {
+  b->f();
+}
+
+// CIR: cir.func{{.*}} @_Z2f2P1B(%arg0: !cir.ptr<!rec_B> {{.*}})
+// CIR:   %[[B_ADDR:.*]] = cir.alloca !cir.ptr<!rec_B>
+// CIR:   cir.store %arg0, %[[B_ADDR]]
+// CIR:   %[[B:.*]] = cir.load{{.*}} %[[B_ADDR]]
+// CIR:   %[[VPTR_ADDR:.*]] = cir.vtable.get_vptr %[[B]] : !cir.ptr<!rec_B> -> !cir.ptr<!cir.vptr>
+// CIR:   %[[VPTR:.*]] = cir.load{{.*}} %[[VPTR_ADDR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR:   %[[FN_PTR_PTR:.*]] = cir.vtable.get_virtual_fn_addr %[[VPTR]][1] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_B>)>>>
+// CIR:   %[[FN_PTR:.*]] = cir.load{{.*}} %[[FN_PTR_PTR:.*]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_B>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_B>)>>
+// CIR:   cir.call %[[FN_PTR]](%[[B]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_B>)>>, !cir.ptr<!rec_B>) -> ()
+
+// LLVM: define{{.*}} void @_Z2f2P1B(ptr %[[ARG0:.*]])
+// LLVM:   %[[B_ADDR:.*]] = alloca ptr
+// LLVM:   store ptr %[[ARG0]], ptr %[[B_ADDR]]
+// LLVM:   %[[B:.*]] = load ptr, ptr %[[B_ADDR]]
+// LLVM:   %[[VPTR:.*]] = load ptr, ptr %[[B]]
+// LLVM:   %[[FN_PTR_PTR:.*]] = getelementptr inbounds ptr, ptr %[[VPTR]], i32 1
+// LLVM:   %[[FN_PTR:.*]] = load ptr, ptr %5
+// LLVM:   call void %[[FN_PTR]](ptr %[[B]])
diff --git a/clang/test/CIR/Incubator/CodeGen/visibility-attribute.c b/clang/test/CIR/Incubator/CodeGen/visibility-attribute.c
new file mode 100644
index 0000000000000..087e7938f8152
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/visibility-attribute.c
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s -input-file=%t.cir -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck %s -input-file=%t.ll -check-prefix=LLVM
+
+extern int glob_default;
+// CIR: cir.global "private" external @glob_default : !s32i
+// LLVM: @glob_default = external global i32
+
+extern int __attribute__((__visibility__("hidden"))) glob_hidden;
+// CIR: cir.global "private" hidden external @glob_hidden : !s32i
+// LLVM: @glob_hidden = external hidden global i32
+
+extern int __attribute__((__visibility__("protected"))) glob_protected;
+// CIR: cir.global "private" protected external @glob_protected : !s32i
+// LLVM: @glob_protected = external protected global i32
+
+int call_glob()
+{
+  return glob_default + glob_hidden + glob_protected;
+}
+
+void foo_default();
+// CIR: cir.func {{.*}} private @foo_default(...)
+// LLVM: declare void @foo_default(...)
+
+void __attribute__((__visibility__("hidden"))) foo_hidden();
+// CIR: cir.func {{.*}} private hidden @foo_hidden(...)
+// LLVM: declare hidden void @foo_hidden(...)
+
+void __attribute__((__visibility__("protected"))) foo_protected();
+// CIR: cir.func {{.*}} private protected @foo_protected(...)
+// LLVM: declare protected void @foo_protected(...)
+
+static void static_foo_default() {}
+// CIR: cir.func {{.*}} internal private {{.*}} @static_foo_default()
+// LLVM: define internal void @static_foo_default()
+
+static void __attribute__((__visibility__("hidden"))) static_foo_hidden() {}
+// CIR: cir.func {{.*}} internal private {{.*}} @static_foo_hidden()
+// LLVM: define internal void @static_foo_hidden()
+
+static void __attribute__((__visibility__("protected"))) static_foo_protected() {}
+// CIR: cir.func {{.*}} internal private {{.*}} @static_foo_protected()
+// LLVM: define internal void @static_foo_protected()
+
+void call_foo()
+{
+  foo_default();
+  foo_hidden();
+  foo_protected();
+  static_foo_default();
+  static_foo_hidden();
+  static_foo_protected();
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/vla.c b/clang/test/CIR/Incubator/CodeGen/vla.c
new file mode 100644
index 0000000000000..9eb733b30faa4
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vla.c
@@ -0,0 +1,191 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CHECK --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// CHECK:  cir.func {{.*}} @f0(%arg0: !s32i
+// CHECK:    [[TMP0:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["len", init] {alignment = 4 : i64}
+// CHECK:    [[TMP1:%.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["saved_stack"] {alignment = 8 : i64}
+// CHECK:    cir.store{{.*}} %arg0, [[TMP0]] : !s32i, !cir.ptr<!s32i>
+// CHECK:    [[TMP2:%.*]] = cir.load{{.*}} [[TMP0]] : !cir.ptr<!s32i>, !s32i
+// CHECK:    [[TMP3:%.*]] = cir.cast integral [[TMP2]] : !s32i -> !u64i
+// CHECK:    [[TMP4:%.*]] = cir.stack_save : !cir.ptr<!u8i>
+// CHECK:    cir.store{{.*}} [[TMP4]], [[TMP1]] : !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>
+// CHECK:    [[TMP5:%.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, [[TMP3]] : !u64i, ["vla"] {alignment = 16 : i64}
+// CHECK:    [[TMP6:%.*]] = cir.load{{.*}} [[TMP1]] : !cir.ptr<!cir.ptr<!u8i>>, !cir.ptr<!u8i>
+// CHECK:    cir.stack_restore [[TMP6]] : !cir.ptr<!u8i>
+void f0(int len) {
+    int a[len];
+}
+
+//     CHECK: cir.func {{.*}} @f1
+// CHECK-NOT:   cir.stack_save
+// CHECK-NOT:   cir.stack_restore
+//     CHECK:   cir.return
+int f1(int n) {
+  return sizeof(int[n]);
+}
+
+// CHECK: cir.func {{.*}} @f2
+// CHECK:   cir.stack_save
+// DONT_CHECK:   cir.stack_restore
+// CHECK:   cir.return
+int f2(int x) {
+  int vla[x];
+  return vla[x-1];
+}
+
+// CHECK: cir.func {{.*}} @f3
+// CHECK:   cir.stack_save
+// CHECK:   cir.stack_restore
+// CHECK:   cir.return
+void f3(int count) {
+  int a[count];
+
+  do {  } while (0);
+  if (a[0] != 3) {}
+}
+
+
+//     CHECK: cir.func {{.*}} @f4
+// CHECK-NOT:   cir.stack_save
+// CHECK-NOT:   cir.stack_restore
+//     CHECK:   cir.return
+void f4(int count) {
+  // Make sure we emit sizes correctly in some obscure cases
+  int (*a[5])[count];
+  int (*b)[][count];
+}
+
+// FIXME(cir): the test is commented due to stack_restore operation
+// is not emitted for the if branch
+// void f5(unsigned x) {
+//   while (1) {
+//     char s[x];
+//     if (x > 5) //: stack restore here is missed
+//       break;
+//   }
+// }
+
+// Check no errors happen
+void function1(short width, int data[][width]) {}
+void function2(short width, int data[][width][width]) {}
+void f6(void) {
+     int bork[4][13][15];
+
+     function1(1, bork[2]);
+     function2(1, bork);
+}
+
+static int GLOB;
+int f7(int n)
+{
+  GLOB = 0;
+  char b[1][n+3];
+
+  __typeof__(b[GLOB++]) c;
+  return GLOB;
+}
+
+double f8(int n, double (*p)[n][5]) {
+    return p[1][2][3];
+}
+
+int f9(unsigned n, char (*p)[n][n+1][6]) {
+    __typeof(p) p2 = (p + n/2) - n/4;
+
+  return p2 - p;
+}
+
+long f10(int n) {
+    int (*p)[n];
+    int (*q)[n];
+    return q - p;
+}
+// CHECK-LABEL: cir.func {{.*}} @f10
+// CHECK: %[[Q_VAL:[0-9]+]] = cir.load {{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[P_VAL:[0-9]+]] = cir.load {{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[PTRDIFF:[0-9]+]] = cir.ptr_diff %[[Q_VAL]], %[[P_VAL]] : !cir.ptr<!s32i> -> !s64i
+// CHECK: %[[N_S64:[0-9]+]] = cir.cast integral %{{.*}} : !u64i -> !s64i
+// CHECK: %[[DIV:[0-9]+]] = cir.binop(div, %[[PTRDIFF]], %[[N_S64]]) : !s64i
+
+// LLVM-LABEL: @f10(
+// LLVM: %[[QI:[0-9]+]] = ptrtoint ptr %{{.*}} to i64
+// LLVM: %[[PI:[0-9]+]] = ptrtoint ptr %{{.*}} to i64
+// LLVM: %[[DIFF_BYTES:[0-9]+]] = sub i64 %[[QI]], %[[PI]]
+// LLVM: %[[PTRDIFF_INTS:[0-9]+]] = sdiv i64 %[[DIFF_BYTES]], 4
+// LLVM: %[[RESULT:[0-9]+]] = sdiv i64 %[[PTRDIFF_INTS]], %{{.*}}
+
+// OGCG-LABEL: @f10(
+// OGCG: %{{.*}} = ptrtoint ptr %{{.*}} to i64
+// OGCG: %{{.*}} = ptrtoint ptr %{{.*}} to i64
+// OGCG: %{{.*}} = sub i64 %{{.*}}, %{{.*}}
+// OGCG: %{{.*}} = mul nuw i64 4, %{{.*}}
+// OGCG: %{{.*}} = sdiv exact i64 %{{.*}}, %{{.*}}
+
+long f11(int n, int m) {
+    int (*p)[n][m];
+    int (*q)[n][m];
+    return q - p;
+}
+// CHECK-LABEL: cir.func {{.*}} @f11
+
+// # allocas
+// CHECK: %[[N_ADDR:[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i>
+// CHECK: %[[M_ADDR:[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i>
+// CHECK: %[[RET:[0-9]+]] = cir.alloca !s64i, !cir.ptr<!s64i>
+// CHECK: %[[P:[0-9]+]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// CHECK: %[[Q:[0-9]+]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+
+// # store n, m
+// CHECK: cir.store %arg0, %[[N_ADDR]] : !s32i, !cir.ptr<!s32i>
+// CHECK: cir.store %arg1, %[[M_ADDR]] : !s32i, !cir.ptr<!s32i>
+
+// # load n and cast to u64
+// CHECK: %[[N_LOAD:[0-9]+]] = cir.load {{.*}} %[[N_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[N_U64:[0-9]+]] = cir.cast integral %[[N_LOAD]] : !s32i -> !u64i
+
+// # load m and cast to u64
+// CHECK: %[[M_LOAD:[0-9]+]] = cir.load {{.*}} %[[M_ADDR]] : !cir.ptr<!s32i>, !s32i
+// CHECK: %[[M_U64:[0-9]+]] = cir.cast integral %[[M_LOAD]] : !s32i -> !u64i
+
+// # load q and p
+// CHECK: %[[Q_VAL:[0-9]+]] = cir.load {{.*}} %[[Q]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// CHECK: %[[P_VAL:[0-9]+]] = cir.load {{.*}} %[[P]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// 
+// # ptrdiff → (byte_diff / 4)
+// CHECK: %[[PTRDIFF:[0-9]+]] = cir.ptr_diff %[[Q_VAL]], %[[P_VAL]] : !cir.ptr<!s32i> -> !s64i
+
+// # compute n*m
+// CHECK: %[[NM_U64:[0-9]+]] = cir.binop(mul, %[[N_U64]], %[[M_U64]]) : !u64i
+// CHECK: %[[NM_S64:[0-9]+]] = cir.cast integral %[[NM_U64]] : !u64i -> !s64i
+
+// # divide ptrdiff_ints by (n*m)
+// CHECK: %[[RESULT:[0-9]+]] = cir.binop(div, %[[PTRDIFF]], %[[NM_S64]]) : !s64i
+
+// # store + return
+// CHECK: cir.store{{.*}} %[[RESULT]], %[[RET]] : !s64i, !cir.ptr<!s64i>
+// CHECK: %[[RETVAL:[0-9]+]] = cir.load{{.*}} %[[RET]] : !cir.ptr<!s64i>, !s64i
+// CHECK: cir.return %[[RETVAL]] : !s64i
+
+
+// LLVM-LABEL: @f11(
+// # load q and p
+// LLVM: %[[QI:[0-9]+]] = ptrtoint ptr %{{.*}} to i64
+// LLVM: %[[PI:[0-9]+]] = ptrtoint ptr %{{.*}} to i64
+// LLVM: %[[DIFF_BYTES:[0-9]+]] = sub i64 %[[QI]], %[[PI]]
+// LLVM: %[[PTRDIFF_INTS:[0-9]+]] = sdiv i64 %[[DIFF_BYTES]], 4
+// LLVM: %[[NM:[0-9]+]] = mul i64 %{{.*}}, %{{.*}}
+// LLVM: %[[RESULT:[0-9]+]] = sdiv i64 %[[PTRDIFF_INTS]], %[[NM]]
+
+// OGCG-LABEL: @f11(
+// OGCG: %{{.*}} = ptrtoint ptr %{{.*}} to i64
+// OGCG: %{{.*}} = ptrtoint ptr %{{.*}} to i64
+// OGCG: %{{.*}} = sub i64 %{{.*}}, %{{.*}}
+// OGCG: %{{.*}} = mul nuw i64 %{{.*}}, %{{.*}}
+// OGCG: %{{.*}} = mul nuw i64 4, %{{.*}}
+// OGCG: %{{.*}} = sdiv exact i64 %{{.*}}, %{{.*}}
diff --git a/clang/test/CIR/Incubator/CodeGen/volatile.cpp b/clang/test/CIR/Incubator/CodeGen/volatile.cpp
new file mode 100644
index 0000000000000..e5c414958a819
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/volatile.cpp
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+int test_load(volatile int *ptr) {
+  return *ptr;
+}
+
+// CHECK: cir.func {{.*}} @_Z9test_loadPVi
+// CHECK:   %{{.+}} = cir.load volatile
+
+void test_store(volatile int *ptr) {
+  *ptr = 42;
+}
+
+// CHECK: cir.func {{.*}} @_Z10test_storePVi
+// CHECK:   cir.store volatile
+
+struct Foo {
+  int x;
+  volatile int y;
+  volatile int z: 4;
+};
+
+int test_load_field1(volatile Foo *ptr) {
+  return ptr->x;
+}
+
+// CHECK: cir.func {{.*}} @_Z16test_load_field1PV3Foo
+// CHECK:   %[[MemberAddr:.*]] = cir.get_member
+// CHECK:   %{{.+}} = cir.load volatile{{.*}} %[[MemberAddr]]
+
+int test_load_field2(Foo *ptr) {
+  return ptr->y;
+}
+
+// CHECK: cir.func {{.*}} @_Z16test_load_field2P3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   %{{.+}} = cir.load volatile{{.*}} %[[MemberAddr]]
+
+int test_load_field3(Foo *ptr) {
+  return ptr->z;
+}
+
+// CHECK: cir.func {{.*}} @_Z16test_load_field3P3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   %{{.+}} = cir.get_bitfield align(4) (#bfi_z, %[[MemberAddr:.+]] {is_volatile}
+
+void test_store_field1(volatile Foo *ptr) {
+  ptr->x = 42;
+}
+
+// CHECK: cir.func {{.*}} @_Z17test_store_field1PV3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   cir.store volatile{{.*}} %{{.+}}, %[[MemberAddr]]
+
+void test_store_field2(Foo *ptr) {
+  ptr->y = 42;
+}
+
+// CHECK: cir.func {{.*}} @_Z17test_store_field2P3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   cir.store volatile{{.*}} %{{.+}}, %[[MemberAddr]]
+
+void test_store_field3(Foo *ptr) {
+  ptr->z = 4;
+}
+
+// CHECK: cir.func {{.*}} @_Z17test_store_field3P3Foo
+// CHECK:   %[[MemberAddr:.+]] = cir.get_member
+// CHECK:   cir.set_bitfield align(4) (#bfi_z, %[[MemberAddr:.+]] : !cir.ptr<!u8i>, %1 : !s32i) {is_volatile}
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-available-externally.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-available-externally.cpp
new file mode 100644
index 0000000000000..d1bc900729cd7
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-available-externally.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -I%S -I%S/../Inputs -triple x86_64-unknown-linux-gnu -std=c++98 -O0 -disable-llvm-passes -emit-cir -o %t
+// RUN: FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK %s < %t
+// RUN: %clang_cc1 %s -I%S -I%S/../Inputs -triple x86_64-unknown-linux-gnu -std=c++98 -O2 -disable-llvm-passes -emit-cir -o %t.opt
+// RUN: FileCheck -allow-deprecated-dag-overlap --check-prefix=CHECK-FORCE-EMIT %s < %t.opt
+
+#include <typeinfo>
+
+// CHECK: cir.global{{.*}} external @_ZTV1A
+// CHECK-FORCE-EMIT-DAG: cir.global{{.*}} available_externally @_ZTV1A
+struct A {
+  A();
+  virtual void f();
+  virtual ~A() { }
+};
+
+A::A() { }
+
+void f(A* a) {
+  a->f();
+};
+void g() {
+  A a;
+  f(&a);
+}
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-comdat-divergence.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-comdat-divergence.cpp
new file mode 100644
index 0000000000000..41f7229ae10ed
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-comdat-divergence.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-codegen.ll
+// RUN: FileCheck --check-prefix=CIR --input-file=%t-cir.ll %s
+// RUN: FileCheck --check-prefix=CODEGEN --input-file=%t-codegen.ll %s
+
+// XFAIL: *
+
+// This test documents a divergence between CIR and CodeGen:
+// CIR does not emit 'comdat' attribute on vtables.
+// This is a bug that needs to be fixed.
+//
+// Expected (CodeGen):
+//   @_ZTV4Base = linkonce_odr unnamed_addr constant { [3 x ptr] } ... comdat
+//
+// Actual (CIR):
+//   @_ZTV4Base = linkonce_odr global { [3 x ptr] } ...
+//
+// The vtable should be marked with 'comdat' because:
+// 1. Enables proper handling of linkonce_odr definitions across translation units
+// 2. Ensures the linker can safely discard duplicate vtable definitions
+// 3. Required for correct C++ semantics with inline/template classes
+// 4. CodeGen has always emitted them with this attribute
+
+class Base {
+public:
+  virtual void foo() {}
+};
+
+void test() {
+  Base b;
+  b.foo();
+}
+
+// Both should emit comdat attribute
+// CIR: @_ZTV4Base = {{.*}}, comdat
+// CODEGEN: @_ZTV4Base = {{.*}}, comdat
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-emission.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-emission.cpp
new file mode 100644
index 0000000000000..fe915cfb6e675
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-emission.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir  -emit-llvm -o - %s \
+// RUN: | opt -S -passes=instcombine,mem2reg,simplifycfg -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct S {
+  virtual void key();
+  virtual void nonKey() {}
+} sobj;
+
+void S::key() {}
+
+// CHECK-DAG: !rec_anon_struct1 = !cir.record<struct  {!cir.array<!cir.ptr<!u8i> x 4>}>
+// CHECK-DAG: !rec_anon_struct2 = !cir.record<struct  {!cir.ptr<!rec_anon_struct1>}>
+
+// The definition of the key function should result in the vtable being emitted.
+// CHECK: cir.global constant external @_ZTV1S = #cir.vtable
+// LLVM: @_ZTV1S = constant { [4 x ptr] } { [4 x ptr]
+// LLVM-SAME: [ptr null, ptr @_ZTI1S, ptr @_ZN1S3keyEv, ptr @_ZN1S6nonKeyEv] }, align 8
+
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// CHECK: cir.global external @sobj = #cir.const_record
+// CHECK-SAME: <{#cir.global_view<@_ZTV1S, [0 : i32, 2 : i32]> :
+// CHECK-SAME: !cir.ptr<!rec_anon_struct1>}> : !rec_anon_struct2 {alignment = 8 : i64}
+// LLVM: @sobj = global { ptr } { ptr getelementptr inbounds nuw
+// LLVM-SAME: (i8, ptr @_ZTV1S, i64 16) }, align 8
+
+// The reference from the vtable should result in nonKey being emitted.
+// CHECK: cir.func {{.*}} @_ZN1S6nonKeyEv({{.*}} {
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-rtti.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-rtti.cpp
new file mode 100644
index 0000000000000..cb56a4c9c9ac8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-rtti.cpp
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fno-rtti -mconstructor-aliases -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir --check-prefix=RTTI_DISABLED %s
+
+class A
+{
+public:
+    A() noexcept {}
+    A(const A&) noexcept = default;
+
+    virtual ~A() noexcept;
+    virtual const char* quack() const noexcept;
+};
+
+class B : public A
+{
+public:
+    virtual ~B() noexcept {}
+};
+
+// Type info B.
+// CHECK-DAG: ![[TypeInfoB:rec_.*]] = !cir.record<struct {!cir.ptr<!u8i>, !cir.ptr<!u8i>, !cir.ptr<!u8i>}>
+
+// vtable for A type
+// CHECK-DAG: ![[VPtrTypeA:rec_.*]] = !cir.record<struct {!cir.array<!cir.ptr<!u8i> x 5>}>
+// RTTI_DISABLED-DAG: ![[VPtrTypeA:rec_.*]] = !cir.record<struct {!cir.array<!cir.ptr<!u8i> x 5>}>
+
+// Class A
+// CHECK-DAG: ![[ClassA:rec_.*]] = !cir.record<class "A" {!cir.vptr} #cir.record.decl.ast>
+// RTTI_DISABLED-DAG: ![[ClassA:rec_.*]] = !cir.record<class "A" {!cir.vptr} #cir.record.decl.ast>
+
+// Class B
+// CHECK-DAG: ![[ClassB:rec_.*]] = !cir.record<class "B" {![[ClassA]]}>
+// RTTI_DISABLED-DAG: ![[ClassB:rec_.*]] = !cir.record<class "B" {![[ClassA]]}>
+
+// B ctor => @B::B()
+// Calls @A::A() and initialize __vptr with address of B's vtable.
+//
+// CHECK: cir.func {{.*}} @_ZN1BC2Ev(%arg0: !cir.ptr<![[ClassB]]>
+// RTTI_DISABLED: cir.func {{.*}} @_ZN1BC2Ev(%arg0: !cir.ptr<![[ClassB]]>
+
+// CHECK:   %0 = cir.alloca !cir.ptr<![[ClassB]]>, !cir.ptr<!cir.ptr<![[ClassB]]>>, ["this", init] {alignment = 8 : i64}
+// CHECK:   cir.store{{.*}} %arg0, %0 : !cir.ptr<![[ClassB]]>, !cir.ptr<!cir.ptr<![[ClassB]]>>
+// CHECK:   %1 = cir.load %0 : !cir.ptr<!cir.ptr<![[ClassB]]>>, !cir.ptr<![[ClassB]]>
+// CHECK:   %2 = cir.base_class_addr %1 : !cir.ptr<![[ClassB]]> nonnull [0] -> !cir.ptr<![[ClassA]]>
+// CHECK:   cir.call @_ZN1AC2Ev(%2) : (!cir.ptr<![[ClassA]]>) -> ()
+// CHECK:   %3 = cir.vtable.address_point(@_ZTV1B, address_point = <index = 0, offset = 2>) : !cir.vptr
+// CHECK:   %4 = cir.vtable.get_vptr %1 : !cir.ptr<!rec_B> -> !cir.ptr<!cir.vptr>
+// CHECK:   cir.store{{.*}} %3, %4 : !cir.vptr, !cir.ptr<!cir.vptr>
+// CHECK:   cir.return
+// CHECK: }
+
+// foo - zero initialize object B and call ctor (@B::B())
+//
+// CHECK: cir.func {{.*}} @_Z3foov()
+// CHECK:   cir.scope {
+// CHECK:     %0 = cir.alloca !rec_B, !cir.ptr<!rec_B>, ["agg.tmp.ensured"] {alignment = 8 : i64}
+// CHECK:     %1 = cir.const #cir.zero : ![[ClassB]]
+// CHECK:     cir.store{{.*}} %1, %0 : ![[ClassB]], !cir.ptr<![[ClassB]]>
+// CHECK:     cir.call @_ZN1BC2Ev(%0) : (!cir.ptr<![[ClassB]]>) -> ()
+// CHECK:   }
+// CHECK:   cir.return
+// CHECK: }
+
+// Vtable definition for A
+// CHECK: cir.global "private" constant external @_ZTV1A : ![[VPtrTypeA]] {alignment = 8 : i64}
+
+// A ctor => @A::A()
+// Calls @A::A() and initialize __vptr with address of A's vtable
+//
+// CHECK:  cir.func {{.*}} @_ZN1AC2Ev(%arg0: !cir.ptr<![[ClassA]]>
+// CHECK:    %0 = cir.alloca !cir.ptr<![[ClassA]]>, !cir.ptr<!cir.ptr<![[ClassA]]>>, ["this", init] {alignment = 8 : i64}
+// CHECK:    cir.store{{.*}} %arg0, %0 : !cir.ptr<![[ClassA]]>, !cir.ptr<!cir.ptr<![[ClassA]]>>
+// CHECK:    %1 = cir.load %0 : !cir.ptr<!cir.ptr<![[ClassA]]>>, !cir.ptr<![[ClassA]]>
+// CHECK:    %2 = cir.vtable.address_point(@_ZTV1A, address_point = <index = 0, offset = 2>) : !cir.vptr
+// CHECK:    %3 = cir.vtable.get_vptr %1 : !cir.ptr<!rec_A> -> !cir.ptr<!cir.vptr>
+// CHECK:    cir.store{{.*}} %2, %3 : !cir.vptr, !cir.ptr<!cir.vptr>
+// CHECK:    cir.return
+// CHECK:  }
+
+// vtable for B
+// CHECK:   cir.global constant linkonce_odr @_ZTV1B = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1B> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1BD2Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1BD0Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZNK1A5quackEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 5>}> : ![[VPtrTypeA]]
+// RTTI_DISABLED:   cir.global constant linkonce_odr @_ZTV1B = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1BD2Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1BD0Ev> : !cir.ptr<!u8i>, #cir.global_view<@_ZNK1A5quackEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 5>}> : ![[VPtrTypeA]]
+
+// vtable for __cxxabiv1::__si_class_type_info
+// CHECK:   cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+// RTTI_DISABLED-NOT:   cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<!cir.ptr<!u8i>>
+
+// typeinfo name for B
+// CHECK:   cir.global constant linkonce_odr comdat @_ZTS1B = #cir.const_array<"1B" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2> {alignment = 1 : i64}
+// RTTI_DISABLED-NOT: cir.global linkonce_odr @_ZTS1B
+
+// typeinfo for A
+// CHECK:   cir.global "private" constant external @_ZTI1A : !cir.ptr<!u8i>
+// RTTI_DISABLED-NOT:   cir.global "private" constant external @_ZTI1A : !cir.ptr<!u8i>
+
+// typeinfo for B
+// CHECK: cir.global constant external @_ZTI1B = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1B> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1A> : !cir.ptr<!u8i>}> : ![[TypeInfoB]]
+// RTTI_DISABLED-NOT: cir.global constant external @_ZTI1B
+
+// Checks for dtors in dtors.cpp
+
+void foo() { B(); }
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-thunk-compare-codegen.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-compare-codegen.cpp
new file mode 100644
index 0000000000000..bb1c6fcbda98c
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-compare-codegen.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.og.ll %s
+
+// Test that CIR thunk generation matches original CodeGen behavior
+
+class Base1 {
+public:
+  virtual void foo() {}
+};
+
+class Base2 {
+public:
+  virtual void bar() {}
+};
+
+class Derived : public Base1, public Base2 {
+public:
+  void bar() override {}
+};
+
+void test() {
+  Derived d;
+  Base2* b2 = &d;
+  b2->bar();
+}
+
+// Check CIR thunk in vtable
+// CIR: cir.global {{.*}}linkonce_odr @_ZTV7Derived = #cir.vtable<{{.*}}@_ZThn{{[0-9]+}}_N7Derived3barEv
+
+// Check CIR thunk function
+// CIR: cir.func {{.*}} @_ZThn{{[0-9]+}}_N7Derived3barEv
+// CIR:   cir.ptr_stride
+// CIR:   cir.call @_ZN7Derived3barEv
+
+// Check LLVM thunk in vtable (from CIR)
+// LLVM-DAG: @_ZTV7Derived = linkonce_odr constant {{.*}} @_ZThn{{[0-9]+}}_N7Derived3barEv
+
+// Check LLVM thunk function (from CIR)
+// LLVM-DAG: define linkonce_odr void @_ZThn{{[0-9]+}}_N7Derived3barEv
+
+// Check original CodeGen LLVM output matches
+// OGCG-DAG: @_ZTV7Derived = linkonce_odr unnamed_addr constant {{.*}} @_ZThn{{[0-9]+}}_N7Derived3barEv
+// OGCG-DAG: define linkonce_odr void @_ZThn{{[0-9]+}}_N7Derived3barEv
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-thunk-destructor.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-destructor.cpp
new file mode 100644
index 0000000000000..eaf5ab6da2120
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-destructor.cpp
@@ -0,0 +1,86 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
+
+// Test thunk generation for virtual destructors in multiple inheritance
+
+class Base1 {
+public:
+  virtual ~Base1() {}
+  int x;
+};
+
+class Base2 {
+public:
+  virtual ~Base2() {}
+  int y;
+};
+
+class Derived : public Base1, public Base2 {
+public:
+  ~Derived() override {}
+};
+
+void test() {
+  Base2* b2 = new Derived();
+  delete b2;  // Uses destructor thunk
+}
+
+// ============================================================================
+// Destructor Thunks
+// ============================================================================
+
+// Derived's destructor needs thunks when called through Base2* because
+// Base2 is at offset 16 in Derived (after Base1's vtable + data)
+// The Itanium ABI generates multiple destructor variants:
+// - D2 (base object destructor)
+// - D1 (complete object destructor)
+// - D0 (deleting destructor)
+
+// Check for complete destructor thunk (D1) - appears first in output
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn16_N7DerivedD1Ev
+// CIR: cir.ptr_stride
+// CIR: cir.call @_ZN7DerivedD1Ev
+
+// Check for deleting destructor thunk (D0) - appears second in output
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn16_N7DerivedD0Ev
+// CIR: cir.ptr_stride
+// CIR: cir.call @_ZN7DerivedD0Ev
+
+// ============================================================================
+// VTable Structure
+// ============================================================================
+
+// Check that vtable contains destructor thunks
+//     LLVM: @_ZTV7Derived = linkonce_odr constant
+// LLVM-DAG: @_ZThn16_N7DerivedD1Ev
+// LLVM-DAG: @_ZThn16_N7DerivedD0Ev
+
+//     OGCG: @_ZTV7Derived = linkonce_odr {{.*}} constant
+// OGCG-DAG: @_ZThn16_N7DerivedD1Ev
+// OGCG-DAG: @_ZThn16_N7DerivedD0Ev
+
+// ============================================================================
+// Thunk Implementation
+// ============================================================================
+
+// Complete destructor thunk (D1)
+// LLVM-LABEL: define linkonce_odr void @_ZThn16_N7DerivedD1Ev
+//       LLVM: getelementptr i8, ptr %{{[0-9]+}}, i64 -16
+//       LLVM: call void @_ZN7DerivedD1Ev
+
+// OGCG-LABEL: define linkonce_odr void @_ZThn16_N7DerivedD1Ev
+//       OGCG: getelementptr inbounds i8, ptr %{{.*}}, i64 -16
+//       OGCG: call void @_ZN7DerivedD1Ev
+
+// Deleting destructor thunk (D0)
+// LLVM-LABEL: define linkonce_odr void @_ZThn16_N7DerivedD0Ev
+//       LLVM: getelementptr i8, ptr %{{[0-9]+}}, i64 -16
+//       LLVM: call void @_ZN7DerivedD0Ev
+
+// OGCG-LABEL: define linkonce_odr void @_ZThn16_N7DerivedD0Ev
+//       OGCG: getelementptr inbounds i8, ptr %{{.*}}, i64 -16
+//       OGCG: call void @_ZN7DerivedD0Ev
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-thunk-edge-cases.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-edge-cases.cpp
new file mode 100644
index 0000000000000..1105b38e6600d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-edge-cases.cpp
@@ -0,0 +1,178 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
+
+// Test edge cases for thunk generation:
+// 1. Deep inheritance hierarchies
+// 2. Empty base optimization affecting offsets
+// 3. Multiple overrides in diamond inheritance
+// 4. Mix of polymorphic and non-polymorphic bases
+
+// ============================================================================
+// Test 1: Deep Inheritance Hierarchy
+// ============================================================================
+
+class Level0 {
+public:
+  virtual void method0() {}
+};
+
+class Level1 : public Level0 {
+public:
+  virtual void method1() {}
+  int data1;
+};
+
+class Level2A : public Level1 {
+public:
+  virtual void method2a() {}
+  int data2a;
+};
+
+class Level2B {
+public:
+  virtual void method2b() {}
+  int data2b;
+};
+
+class DeepDerived : public Level2A, public Level2B {
+public:
+  void method2b() override {}
+};
+
+void testDeep() {
+  DeepDerived d;
+  Level2B* b = &d;
+  b->method2b();  // Needs thunk due to Level2B offset
+}
+
+// Check thunk for deep hierarchy
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn{{[0-9]+}}_N11DeepDerived8method2bEv
+
+//      LLVM: @_ZTV11DeepDerived = linkonce_odr constant
+// LLVM-SAME: @_ZThn{{[0-9]+}}_N11DeepDerived8method2bEv
+
+//      OGCG: @_ZTV11DeepDerived = linkonce_odr {{.*}} constant
+// OGCG-SAME: @_ZThn{{[0-9]+}}_N11DeepDerived8method2bEv
+
+// ============================================================================
+// Test 2: Empty Base Optimization
+// ============================================================================
+
+// Empty base class should not affect layout
+class EmptyBase {
+public:
+  virtual void emptyMethod() {}
+};
+
+class NonEmptyBase {
+public:
+  virtual void nonEmptyMethod() {}
+  int data;
+};
+
+class EmptyDerived : public EmptyBase, public NonEmptyBase {
+public:
+  void nonEmptyMethod() override {}
+};
+
+void testEmpty() {
+  EmptyDerived d;
+  NonEmptyBase* b = &d;
+  b->nonEmptyMethod();  // Needs thunk, offset affected by empty base
+}
+
+// Check thunk with empty base
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn{{[0-9]+}}_N12EmptyDerived14nonEmptyMethodEv
+
+//      LLVM: @_ZTV12EmptyDerived = linkonce_odr constant
+// LLVM-SAME: @_ZThn{{[0-9]+}}_N12EmptyDerived14nonEmptyMethodEv
+
+//      OGCG: @_ZTV12EmptyDerived = linkonce_odr {{.*}} constant
+// OGCG-SAME: @_ZThn{{[0-9]+}}_N12EmptyDerived14nonEmptyMethodEv
+
+// ============================================================================
+// Test 3: Multiple Methods Requiring Different Thunk Offsets
+// ============================================================================
+
+class MultiBase1 {
+public:
+  virtual void method1() {}
+  int data1;
+};
+
+class MultiBase2 {
+public:
+  virtual void method2a() {}
+  virtual void method2b() {}
+  int data2;
+};
+
+class MultiDerived : public MultiBase1, public MultiBase2 {
+public:
+  void method2a() override {}
+  void method2b() override {}
+};
+
+void testMulti() {
+  MultiDerived d;
+  MultiBase2* b = &d;
+  b->method2a();  // Both need same thunk offset
+  b->method2b();
+}
+
+// Check multiple thunks with same offset
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn{{[0-9]+}}_N12MultiDerived8method2aEv
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn{{[0-9]+}}_N12MultiDerived8method2bEv
+
+//     LLVM: @_ZTV12MultiDerived = linkonce_odr constant
+// LLVM-DAG: @_ZThn{{[0-9]+}}_N12MultiDerived8method2aEv
+// LLVM-DAG: @_ZThn{{[0-9]+}}_N12MultiDerived8method2bEv
+
+//     OGCG: @_ZTV12MultiDerived = linkonce_odr {{.*}} constant
+// OGCG-DAG: @_ZThn{{[0-9]+}}_N12MultiDerived8method2aEv
+// OGCG-DAG: @_ZThn{{[0-9]+}}_N12MultiDerived8method2bEv
+
+// ============================================================================
+// Thunk Implementation Checks
+// ============================================================================
+
+// Verify thunk implementations match between CIR lowering and OGCG
+
+// Deep hierarchy thunk
+// LLVM-LABEL: define linkonce_odr void @_ZThn{{[0-9]+}}_N11DeepDerived8method2bEv
+//       LLVM: getelementptr i8, ptr %{{[0-9]+}}, i64 -{{[0-9]+}}
+//       LLVM: call void @_ZN11DeepDerived8method2bEv
+
+// OGCG-LABEL: define linkonce_odr void @_ZThn{{[0-9]+}}_N11DeepDerived8method2bEv
+//       OGCG: getelementptr inbounds i8, ptr %{{.*}}, i64 -{{[0-9]+}}
+//       OGCG: call void @_ZN11DeepDerived8method2bEv
+
+// Empty base thunk
+// LLVM-LABEL: define linkonce_odr void @_ZThn{{[0-9]+}}_N12EmptyDerived14nonEmptyMethodEv
+//       LLVM: getelementptr i8, ptr %{{[0-9]+}}, i64 -{{[0-9]+}}
+//       LLVM: call void @_ZN12EmptyDerived14nonEmptyMethodEv
+
+// OGCG-LABEL: define linkonce_odr void @_ZThn{{[0-9]+}}_N12EmptyDerived14nonEmptyMethodEv
+//       OGCG: getelementptr inbounds i8, ptr %{{.*}}, i64 -{{[0-9]+}}
+//       OGCG: call void @_ZN12EmptyDerived14nonEmptyMethodEv
+
+// Multiple methods thunks
+// LLVM-LABEL: define linkonce_odr void @_ZThn{{[0-9]+}}_N12MultiDerived8method2aEv
+//       LLVM: getelementptr i8, ptr %{{[0-9]+}}, i64 -{{[0-9]+}}
+//       LLVM: call void @_ZN12MultiDerived8method2aEv
+
+// OGCG-LABEL: define linkonce_odr void @_ZThn{{[0-9]+}}_N12MultiDerived8method2aEv
+//       OGCG: getelementptr inbounds i8, ptr %{{.*}}, i64 -{{[0-9]+}}
+//       OGCG: call void @_ZN12MultiDerived8method2aEv
+
+// LLVM-LABEL: define linkonce_odr void @_ZThn{{[0-9]+}}_N12MultiDerived8method2bEv
+//       LLVM: getelementptr i8, ptr %{{[0-9]+}}, i64 -{{[0-9]+}}
+//       LLVM: call void @_ZN12MultiDerived8method2bEv
+
+// OGCG-LABEL: define linkonce_odr void @_ZThn{{[0-9]+}}_N12MultiDerived8method2bEv
+//       OGCG: getelementptr inbounds i8, ptr %{{.*}}, i64 -{{[0-9]+}}
+//       OGCG: call void @_ZN12MultiDerived8method2bEv
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-thunk-multibase.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-multibase.cpp
new file mode 100644
index 0000000000000..46a29260ce1b8
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-multibase.cpp
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
+
+// Test thunk generation with multiple base classes
+// This validates thunks for void-returning methods (no return adjustment).
+// Full covariant return adjustment for pointer-returning methods is NYI.
+
+class Base1 {
+public:
+  virtual void foo() {}
+};
+
+class Base2 {
+public:
+  virtual void bar() {}
+  int data;
+};
+
+class Derived : public Base1, public Base2 {
+public:
+  void bar() override {}
+};
+
+void test() {
+  Derived d;
+  Base2* b2 = &d;
+  b2->bar();  // Needs this-adjusting thunk (no return adjustment)
+}
+
+// ============================================================================
+// CIR Output - Thunk with This-Adjustment Only
+// ============================================================================
+
+// Derived::bar() needs a thunk when called through Base2* because
+// Base2 is at offset 8 in Derived (after Base1's vtable pointer)
+
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn8_N7Derived3barEv
+// CIR: cir.ptr_stride
+// CIR: cir.call @_ZN7Derived3barEv
+
+// ============================================================================
+// VTable Structure - Both CIR and OGCG
+// ============================================================================
+
+// Check that vtable contains the thunk
+//      LLVM: @_ZTV7Derived = linkonce_odr constant
+// LLVM-SAME: @_ZThn8_N7Derived3barEv
+
+//      OGCG: @_ZTV7Derived = linkonce_odr {{.*}} constant
+// OGCG-SAME: @_ZThn8_N7Derived3barEv
+
+// ============================================================================
+// Thunk Implementation - LLVM Lowering vs OGCG
+// ============================================================================
+
+// CIR lowering should produce this-adjustment (no return adjustment for void)
+// LLVM-LABEL: define linkonce_odr void @_ZThn8_N7Derived3barEv
+//       LLVM: getelementptr i8, ptr %{{[0-9]+}}, i64 -8
+//       LLVM: call void @_ZN7Derived3barEv
+
+// OGCG-LABEL: define linkonce_odr void @_ZThn8_N7Derived3barEv
+//       OGCG: getelementptr inbounds i8, ptr %{{.*}}, i64 -8
+//       OGCG: call void @_ZN7Derived3barEv
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-thunk-virtual-inheritance.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-virtual-inheritance.cpp
new file mode 100644
index 0000000000000..51bbbd3b3b780
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-thunk-virtual-inheritance.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
+
+// Test thunk generation with virtual inheritance (diamond problem)
+
+class Base {
+public:
+  virtual void method() {}
+  int a;
+};
+
+class Left : public virtual Base {
+public:
+  virtual void leftMethod() {}
+  int b;
+};
+
+class Right : public virtual Base {
+public:
+  virtual void rightMethod() {}
+  int c;
+};
+
+class Diamond : public Left, public Right {
+public:
+  void leftMethod() override {}
+  void rightMethod() override {}
+};
+
+void test() {
+  Diamond d;
+  Left* l = &d;
+  Right* r = &d;
+  l->leftMethod();
+  r->rightMethod();
+}
+
+// ============================================================================
+// CIR Output - Thunk Generation
+// ============================================================================
+
+// Diamond's rightMethod needs a thunk because Right is at offset 16
+// leftMethod doesn't need a thunk because Left is at offset 0
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn16_N7Diamond11rightMethodEv
+// CIR: cir.ptr_stride
+// CIR: cir.call @_ZN7Diamond11rightMethodEv
+
+// ============================================================================
+// VTable Structure - Both CIR and OGCG
+// ============================================================================
+
+// Check that vtable contains the thunk reference at the correct position
+//      LLVM: @_ZTV7Diamond = linkonce_odr constant
+// LLVM-SAME: @_ZThn16_N7Diamond11rightMethodEv
+
+//      OGCG: @_ZTV7Diamond = linkonce_odr {{.*}} constant
+// OGCG-SAME: @_ZThn16_N7Diamond11rightMethodEv
+
+// ============================================================================
+// Thunk Implementation - LLVM Lowering vs OGCG
+// ============================================================================
+
+// CIR lowering should produce the same this-pointer adjustment as OGCG
+// LLVM-LABEL: define linkonce_odr void @_ZThn16_N7Diamond11rightMethodEv
+//      LLVM: %[[VAR1:[0-9]+]] = getelementptr i8, ptr %{{[0-9]+}}, i64 -16
+//      LLVM: call void @_ZN7Diamond11rightMethodEv(ptr %[[VAR1]])
+
+// OGCG-LABEL: define linkonce_odr void @_ZThn16_N7Diamond11rightMethodEv
+//      OGCG: %[[VAR2:[0-9]+]] = getelementptr inbounds i8, ptr %{{.*}}, i64 -16
+//      OGCG: call void @_ZN7Diamond11rightMethodEv(ptr {{.*}} %[[VAR2]])
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-thunk.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-thunk.cpp
new file mode 100644
index 0000000000000..0355eaf45951d
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-thunk.cpp
@@ -0,0 +1,111 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// Test basic thunk generation for multiple inheritance with non-virtual thunks
+
+class Base1 {
+public:
+  virtual void foo() {}
+  int x;
+};
+
+class Base2 {
+public:
+  virtual void bar() {}
+  int y;
+};
+
+class Derived : public Base1, public Base2 {
+public:
+  void bar() override {}
+};
+
+void test() {
+  Derived d;
+  Base2* b2 = &d;
+  b2->bar();
+}
+
+// ============================================================================
+// CIR VTable Structure
+// ============================================================================
+
+// Check thunk is in vtable
+// CIR: cir.global constant linkonce_odr @_ZTV7Derived = #cir.vtable
+// CIR: #cir.global_view<@_ZThn16_N7Derived3barEv>
+
+// ============================================================================
+// CIR Thunk Function Generation
+// ============================================================================
+
+// Check that thunk function is generated with:
+// - comdat attribute (for deduplication across TUs)
+// - linkonce_odr linkage (one definition rule, discardable)
+// - correct mangling (_ZThn<offset>_<original_name>)
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn16_N7Derived3barEv
+
+// ============================================================================
+// CIR Thunk Implementation - This Pointer Adjustment
+// ============================================================================
+
+// The thunk should:
+// 1. Adjust the 'this' pointer by the offset (-16 bytes)
+// 2. Call the actual implementation with the adjusted pointer
+
+// CIR: cir.ptr_stride
+// CIR: cir.call @_ZN7Derived3barEv
+
+// ============================================================================
+// LLVM IR Output Validation
+// ============================================================================
+
+//      LLVM: @_ZTV7Derived = linkonce_odr constant
+// LLVM-SAME: @_ZThn16_N7Derived3barEv
+
+//      LLVM: define linkonce_odr void @_ZThn16_N7Derived3barEv
+// LLVM-SAME: ptr
+
+// ============================================================================
+// Test Multiple Base Classes (Different Offsets)
+// ============================================================================
+
+class A {
+public:
+  virtual void methodA() {}
+  long long a;  // 8 bytes
+};
+
+class B {
+public:
+  virtual void methodB() {}
+  long long b;  // 8 bytes
+};
+
+class C {
+public:
+  virtual void methodC() {}
+  long long c;  // 8 bytes
+};
+
+class Multi : public A, public B, public C {
+public:
+  void methodB() override {}
+  void methodC() override {}
+};
+
+void test_multi() {
+  Multi m;
+  B* pb = &m;
+  C* pc = &m;
+  pb->methodB();
+  pc->methodC();
+}
+
+// Different thunks for different offsets
+// Offset to B should be 16 (A's vptr + a)
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn16_N5Multi7methodBEv
+
+// Offset to C should be 32 (A's vptr + a + B's vptr + b)
+// CIR: cir.func {{.*}}comdat linkonce_odr @_ZThn32_N5Multi7methodCEv
diff --git a/clang/test/CIR/Incubator/CodeGen/vtable-unnamed-addr-divergence.cpp b/clang/test/CIR/Incubator/CodeGen/vtable-unnamed-addr-divergence.cpp
new file mode 100644
index 0000000000000..446359743c754
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtable-unnamed-addr-divergence.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t-codegen.ll
+// RUN: FileCheck --check-prefix=CIR --input-file=%t-cir.ll %s
+// RUN: FileCheck --check-prefix=CODEGEN --input-file=%t-codegen.ll %s
+
+// XFAIL: *
+
+// This test documents a divergence between CIR and CodeGen:
+// CIR does not emit 'unnamed_addr' attribute on vtables.
+// This is a bug that needs to be fixed.
+//
+// Expected (CodeGen):
+//   @_ZTV4Base = linkonce_odr unnamed_addr constant { [3 x ptr] } ...
+//
+// Actual (CIR):
+//   @_ZTV4Base = linkonce_odr global { [3 x ptr] } ...
+//
+// The vtable should be marked as 'unnamed_addr' because:
+// 1. The address of a vtable is never taken or used for identity comparison
+// 2. This allows the linker to merge duplicate vtables across translation units
+// 3. Reduces binary size and improves performance
+// 4. CodeGen has always emitted them with this attribute
+
+class Base {
+public:
+  virtual void foo() {}
+};
+
+void test() {
+  Base b;
+  b.foo();
+}
+
+// Both should emit unnamed_addr attribute
+// CIR: @_ZTV4Base = linkonce_odr unnamed_addr
+// CODEGEN: @_ZTV4Base = linkonce_odr unnamed_addr
diff --git a/clang/test/CIR/Incubator/CodeGen/vtt.cpp b/clang/test/CIR/Incubator/CodeGen/vtt.cpp
new file mode 100644
index 0000000000000..676cf210ceee6
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/vtt.cpp
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll  %s
+
+class A {
+public:
+  int a;
+  virtual void v() {}
+};
+
+class B : public virtual A {
+public:
+  int b;
+  virtual void w();
+};
+
+class C : public virtual A {
+public:
+  long c;
+  virtual void x() {}
+};
+
+class D : public B, public C {
+public:
+  long d;
+  virtual void y() {}
+};
+
+
+int f() {
+  B *b = new D ();
+  return 0;
+}
+
+// Vtable of Class A
+// CIR: cir.global constant linkonce_odr @_ZTV1A = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1A> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1A1vEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !rec_anon_struct3 {alignment = 8 : i64}
+
+// Class A constructor
+// CIR: cir.func {{.*}} @_ZN1AC2Ev(%arg0: !cir.ptr<!rec_A>
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV1A, address_point = <index = 0, offset = 2>) : !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{.*}} : !cir.ptr<!rec_A> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+// CIR: }
+
+// Vtable of Class D
+// CIR: cir.global constant linkonce_odr @_ZTV1D = #cir.vtable<{#cir.const_array<[#cir.ptr<40 : i64> : !cir.ptr<!u8i>, #cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1D> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1B1wEv> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1D1yEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 5>, #cir.const_array<[#cir.ptr<24 : i64> : !cir.ptr<!u8i>, #cir.ptr<-16 : i64> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1D> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1C1xEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>, #cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.ptr<-40 : i64> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1D> : !cir.ptr<!u8i>, #cir.global_view<@_ZN1A1vEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>}> : !rec_anon_struct4 {alignment = 8 : i64}
+// VTT of class D
+// CIR: cir.global constant linkonce_odr @_ZTT1D = #cir.const_array<[#cir.global_view<@_ZTV1D, [0 : i32, 3 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTC1D0_1B, [0 : i32, 3 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTC1D0_1B, [1 : i32, 3 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTC1D16_1C, [0 : i32, 3 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTC1D16_1C, [1 : i32, 3 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTV1D, [2 : i32, 3 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTV1D, [1 : i32, 3 : i32]> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 7> {alignment = 8 : i64}
+
+// Class B constructor
+// CIR: cir.func {{.*}} @_ZN1BC2Ev(%arg0: !cir.ptr<!rec_B>
+// CIR:   %{{[0-9]+}} = cir.vtt.address_point %{{[0-9]+}} : !cir.ptr<!cir.ptr<!void>>, offset = 0 -> !cir.ptr<!cir.ptr<!void>>
+// CIR:   %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
+// CIR:   %{{[0-9]+}} = cir.load align(8) %{{[0-9]+}} : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{[0-9]+}} : !cir.ptr<!rec_B> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+
+// CIR:   %{{[0-9]+}} = cir.vtt.address_point %{{[0-9]+}} : !cir.ptr<!cir.ptr<!void>>, offset = 1 -> !cir.ptr<!cir.ptr<!void>>
+// CIR:   %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
+// CIR:   %{{[0-9]+}} = cir.load align(8) %{{[0-9]+}} : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{.*}} : !cir.ptr<!rec_B> -> !cir.ptr<!cir.vptr>
+// CIR:   %{{[0-9]+}} = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.vptr -> !cir.ptr<!u8i>
+// CIR:   %{{[0-9]+}} = cir.const #cir.int<-24> : !s64i
+// CIR:   %{{[0-9]+}} = cir.ptr_stride %{{[0-9]+}}, %{{[0-9]+}} : (!cir.ptr<!u8i>, !s64i) -> !cir.ptr<!u8i>
+// CIR:   %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.ptr<!u8i> -> !cir.ptr<!s64i>
+// CIR:   %{{[0-9]+}} = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!s64i>, !s64i
+// CIR:   %{{[0-9]+}} = cir.ptr_stride %{{[0-9]+}}, %{{[0-9]+}} : (!cir.ptr<!u8i>, !s64i) -> !cir.ptr<!u8i>
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{[0-9]+}} : !cir.ptr<!rec_B> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+// CIR: }
+
+// LLVM-LABEL: @_ZN1BC2Ev
+// LLVM:   %[[THIS_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[VTT_ADDR:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]], align 8
+// LLVM:   %[[VTT:.*]] = load ptr, ptr %[[VTT_ADDR]], align 8
+// LLVM:   %[[V:.*]] = load ptr, ptr %[[VTT]], align 8
+// LLVM:   store ptr %[[V]], ptr %[[THIS]], align 8
+// LLVM:   getelementptr inbounds ptr, ptr %[[VTT]], i32 1
+// LLVM:   ret void
+// LLVM: }
+
+// Class C constructor
+// CIR: cir.func {{.*}} @_ZN1CC2Ev(%arg0: !cir.ptr<!rec_C>
+// CIR:   %{{[0-9]+}} = cir.vtt.address_point %{{[0-9]+}} : !cir.ptr<!cir.ptr<!void>>, offset = 0 -> !cir.ptr<!cir.ptr<!void>>
+// CIR:   %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
+// CIR:   %{{[0-9]+}} = cir.load align(8) %{{[0-9]+}} : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{[0-9]+}} : !cir.ptr<!rec_C> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+
+// CIR:   %{{[0-9]+}} = cir.vtt.address_point %{{[0-9]+}} : !cir.ptr<!cir.ptr<!void>>, offset = 1 -> !cir.ptr<!cir.ptr<!void>>
+// CIR:   %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.vptr>
+// CIR:   %{{[0-9]+}} = cir.load align(8) %{{[0-9]+}} : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{[0-9]+}} : !cir.ptr<!rec_C> -> !cir.ptr<!cir.vptr>
+// CIR:   %{{[0-9]+}} = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.vptr -> !cir.ptr<!u8i>
+// CIR:   %{{[0-9]+}} = cir.const #cir.int<-24> : !s64i
+// CIR:   %{{[0-9]+}} = cir.ptr_stride %{{[0-9]+}}, %{{[0-9]+}} : (!cir.ptr<!u8i>, !s64i) -> !cir.ptr<!u8i>
+// CIR:   %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.ptr<!u8i> -> !cir.ptr<!s64i>
+// CIR:   %{{[0-9]+}} = cir.load{{.*}} %{{[0-9]+}} : !cir.ptr<!s64i>, !s64i
+// CIR:   %{{[0-9]+}} = cir.ptr_stride %{{[0-9]+}}, %{{[0-9]+}} : (!cir.ptr<!u8i>, !s64i) -> !cir.ptr<!u8i>
+// CIR:   %{{[0-9]+}} = cir.cast bitcast %{{[0-9]+}} : !cir.ptr<!u8i> -> !cir.ptr<!rec_C>
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{[0-9]+}} : !cir.ptr<!rec_C> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+// CIR: }
+
+// Class D constructor
+// CIR: cir.func {{.*}} @_ZN1DC1Ev(%arg0: !cir.ptr<!rec_D>
+// CIR:   %{{[0-9]+}} = cir.alloca !cir.ptr<!rec_D>, !cir.ptr<!cir.ptr<!rec_D>>, ["this", init] {alignment = 8 : i64}
+// CIR:   cir.store{{.*}} %arg0, %{{[0-9]+}} : !cir.ptr<!rec_D>, !cir.ptr<!cir.ptr<!rec_D>>
+// CIR:   %[[D_PTR:.*]] = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!rec_D>>, !cir.ptr<!rec_D>
+// CIR:   %[[A_PTR:.*]] = cir.base_class_addr %[[D_PTR]] : !cir.ptr<!rec_D> nonnull [40] -> !cir.ptr<!rec_A>
+// CIR:   cir.call @_ZN1AC2Ev(%[[A_PTR]]) : (!cir.ptr<!rec_A>) -> ()
+
+// CIR:   %[[B_PTR:.*]] = cir.base_class_addr %[[D_PTR]] : !cir.ptr<!rec_D> nonnull [0] -> !cir.ptr<!rec_B>
+// CIR:   %[[VTT_D_TO_B:.*]] = cir.vtt.address_point @_ZTT1D, offset = 1 -> !cir.ptr<!cir.ptr<!void>>
+// CIR:   cir.call @_ZN1BC2Ev(%[[B_PTR]], %[[VTT_D_TO_B]]) : (!cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!void>>) -> ()
+
+// CIR:   %[[C_PTR:.*]] = cir.base_class_addr %1 : !cir.ptr<!rec_D> nonnull [16] -> !cir.ptr<!rec_C>
+// CIR:   %[[VTT_D_TO_C:.*]] = cir.vtt.address_point @_ZTT1D, offset = 3 -> !cir.ptr<!cir.ptr<!void>>
+// CIR:   cir.call @_ZN1CC2Ev(%[[C_PTR]], %[[VTT_D_TO_C]]) : (!cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!void>>) -> ()
+
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV1D, address_point = <index = 0, offset = 3>) : !cir.vptr
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{[0-9]+}} : !cir.ptr<!rec_D> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV1D, address_point = <index = 2, offset = 3>) : !cir.vptr
+
+// CIR:   %{{[0-9]+}} = cir.base_class_addr %{{[0-9]+}} : !cir.ptr<!rec_D> nonnull [40] -> !cir.ptr<!rec_A>
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{[0-9]+}} : !cir.ptr<!rec_A> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+// CIR:   %{{[0-9]+}} = cir.vtable.address_point(@_ZTV1D, address_point = <index = 1, offset = 3>) : !cir.vptr
+
+// CIR:   cir.base_class_addr %{{[0-9]+}} : !cir.ptr<!rec_D> nonnull [16] -> !cir.ptr<!rec_C>
+// CIR:   %{{[0-9]+}} = cir.vtable.get_vptr %{{[0-9]+}} : !cir.ptr<!rec_C> -> !cir.ptr<!cir.vptr>
+// CIR:   cir.store{{.*}} %{{[0-9]+}}, %{{[0-9]+}} : !cir.vptr, !cir.ptr<!cir.vptr>
+// CIR:   cir.return
+// CIR: }
+
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// LLVM-LABEL: @_ZN1DC1Ev
+// LLVM:   %2 = alloca ptr, i64 1, align 8
+// LLVM:   store ptr %0, ptr %2, align 8
+// LLVM:   %[[THIS:.*]] = load ptr, ptr %2, align 8
+// LLVM:   %[[BASE_A:.*]] = getelementptr i8, ptr %[[THIS]], i32 40
+// LLVM:   call void @_ZN1AC2Ev(ptr %[[BASE_A]])
+// LLVM:   call void @_ZN1BC2Ev(ptr %[[THIS]], ptr getelementptr inbounds nuw (i8, ptr @_ZTT1D, i64 8))
+// LLVM:   %[[BASE_C:.*]] = getelementptr i8, ptr %[[THIS]], i32 16
+// LLVM:   call void @_ZN1CC2Ev(ptr %[[BASE_C]], ptr getelementptr inbounds nuw (i8, ptr @_ZTT1D, i64 24))
+// LLVM:   ret void
+// LLVM: }
+
+namespace other {
+  struct A {
+    A();
+    ~A();
+  };
+
+  struct B : virtual A {
+    B();
+    ~B();
+  };
+
+  extern int foo();
+  B::B() {
+    int x = foo();
+  }
+
+  B::~B() {
+    int y = foo();
+  }
+}
+
+// CIR-LABEL:   cir.func {{.*}} @_ZN5other1BD1Ev(
+// CIR-SAME:                               %[[VAL_0:.*]]: !cir.ptr<!rec_other3A3AB>
+// CIR:           %[[VAL_1:.*]] = cir.alloca !cir.ptr<!rec_other3A3AB>, !cir.ptr<!cir.ptr<!rec_other3A3AB>>, ["this", init] {alignment = 8 : i64}
+// CIR:           cir.store{{.*}} %[[VAL_0]], %[[VAL_1]] : !cir.ptr<!rec_other3A3AB>, !cir.ptr<!cir.ptr<!rec_other3A3AB>>
+// CIR:           %[[VAL_2:.*]] = cir.load{{.*}} %[[VAL_1]] : !cir.ptr<!cir.ptr<!rec_other3A3AB>>, !cir.ptr<!rec_other3A3AB>
+// CIR:           %[[VAL_3:.*]] = cir.vtt.address_point @_ZTTN5other1BE, offset = 0 -> !cir.ptr<!cir.ptr<!void>>
+// CIR:           cir.call @_ZN5other1BD2Ev(%[[VAL_2]], %[[VAL_3]]) : (!cir.ptr<!rec_other3A3AB>, !cir.ptr<!cir.ptr<!void>>) -> ()
+// CIR:           %[[VAL_4:.*]] = cir.base_class_addr %[[VAL_2]] : !cir.ptr<!rec_other3A3AB> nonnull [0] -> !cir.ptr<!rec_other3A3AA>
+// CIR:           cir.call @_ZN5other1AD2Ev(%[[VAL_4]]) : (!cir.ptr<!rec_other3A3AA>) -> ()
+// CIR:           cir.return
+// CIR:         }
diff --git a/clang/test/CIR/Incubator/CodeGen/weak.c b/clang/test/CIR/Incubator/CodeGen/weak.c
new file mode 100644
index 0000000000000..8b87b658e87b2
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/weak.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+extern void B (void);
+static __typeof(B) A __attribute__ ((__weakref__("B")));
+
+void active (void)
+{
+  A();
+}
+
+// LLVM: @y = weak_odr global
+// LLVM: @x = weak global
+
+// CIR:      cir.func extern_weak private @B()
+// CIR:      cir.func {{.*}} @active()
+// CIR-NEXT:   cir.call @B() : () -> ()
+
+// LLVM:     declare extern_weak void @B()
+// LLVM:     define dso_local void @active()
+// LLVM-NEXT:  call void @B()
+
+int __attribute__((selectany)) y;
+// CIR:      cir.global weak_odr comdat @y
+
+int __attribute__((weak)) x;
+// CIR:      cir.global weak
diff --git a/clang/test/CIR/Incubator/CodeGen/wide-string.cpp b/clang/test/CIR/Incubator/CodeGen/wide-string.cpp
new file mode 100644
index 0000000000000..0f1f745ab3ffa
--- /dev/null
+++ b/clang/test/CIR/Incubator/CodeGen/wide-string.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+const char16_t *test_utf16() {
+  return u"你好世界";
+}
+
+// CHECK: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<20320> : !u16i, #cir.int<22909> : !u16i, #cir.int<19990> : !u16i, #cir.int<30028> : !u16i, #cir.int<0> : !u16i]> : !cir.array<!u16i x 5>
+
+const char32_t *test_utf32() {
+  return U"你好世界";
+}
+
+// CHECK: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<20320> : !u32i, #cir.int<22909> : !u32i, #cir.int<19990> : !u32i, #cir.int<30028> : !u32i, #cir.int<0> : !u32i]> : !cir.array<!u32i x 5>
+
+const char16_t *test_zero16() {
+  return u"\0\0\0\0";
+}
+
+// CHECK: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.zero : !cir.array<!u16i x 5>
+
+const char32_t *test_zero32() {
+  return U"\0\0\0\0";
+}
+
+// CHECK: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.zero : !cir.array<!u32i x 5>
diff --git a/clang/test/CIR/Incubator/Driver/callconv.cpp b/clang/test/CIR/Incubator/Driver/callconv.cpp
new file mode 100644
index 0000000000000..3227820ad7213
--- /dev/null
+++ b/clang/test/CIR/Incubator/Driver/callconv.cpp
@@ -0,0 +1,4 @@
+// RUN: %clang %s -fno-clangir-call-conv-lowering -### -c %s 2>&1 | FileCheck --check-prefix=DISABLE %s
+// DISABLE: "-fno-clangir-call-conv-lowering"
+// RUN: %clang %s -fclangir-call-conv-lowering -### -c %s 2>&1 | FileCheck --check-prefix=ENABLE %s
+// ENABLE: "-fclangir-call-conv-lowering"
diff --git a/clang/test/CIR/Incubator/Driver/idiom-recognizer.cpp b/clang/test/CIR/Incubator/Driver/idiom-recognizer.cpp
new file mode 100644
index 0000000000000..eebc7e6b747c1
--- /dev/null
+++ b/clang/test/CIR/Incubator/Driver/idiom-recognizer.cpp
@@ -0,0 +1,2 @@
+// RUN: %clang %s -fclangir-idiom-recognizer -### -c %s 2>&1 | FileCheck --check-prefix=ENABLE %s
+// ENABLE: "-fclangir-idiom-recognizer"
diff --git a/clang/test/CIR/Incubator/Driver/lib-opt.cpp b/clang/test/CIR/Incubator/Driver/lib-opt.cpp
new file mode 100644
index 0000000000000..c3685261261dd
--- /dev/null
+++ b/clang/test/CIR/Incubator/Driver/lib-opt.cpp
@@ -0,0 +1,3 @@
+// RUN: %clang %s -fclangir-lib-opt -### -c %s 2>&1 | FileCheck --check-prefix=ENABLE %s
+// ENABLE: "-fclangir-lib-opt"
+// ENABLE: "-fclangir-idiom-recognizer"
diff --git a/clang/test/CIR/Incubator/Driver/move-opt.cpp b/clang/test/CIR/Incubator/Driver/move-opt.cpp
new file mode 100644
index 0000000000000..22cfe7b7d05a5
--- /dev/null
+++ b/clang/test/CIR/Incubator/Driver/move-opt.cpp
@@ -0,0 +1,2 @@
+// RUN: %clang %s -fclangir-move-opt -### -c %s 2>&1 | FileCheck --check-prefix=ENABLE %s
+// ENABLE: "-fclangir-move-opt"
diff --git a/clang/test/CIR/Incubator/IR/address-space.cir b/clang/test/CIR/Incubator/IR/address-space.cir
new file mode 100644
index 0000000000000..0be43e07fdcd7
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/address-space.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  // CHECK: @test_format1(%arg0: !cir.ptr<!s32i, target_address_space(0)>)
+  cir.func @test_format1(%arg0: !cir.ptr<!s32i, target_address_space(0)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format2(%arg0: !cir.ptr<!s32i>)
+  cir.func @test_format2(%arg0: !cir.ptr<!s32i>) {
+    cir.return
+  }
+
+  // CHECK: @test_format3(%arg0: !cir.ptr<!s32i, lang_address_space(offload_private)>)
+  cir.func @test_format3(%arg0: !cir.ptr<!s32i, lang_address_space(offload_private)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format4(%arg0: !cir.ptr<!s32i, lang_address_space(offload_local)>)
+  cir.func @test_format4(%arg0: !cir.ptr<!s32i, lang_address_space(offload_local)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format5(%arg0: !cir.ptr<!s32i, lang_address_space(offload_global)>)
+  cir.func @test_format5(%arg0: !cir.ptr<!s32i, lang_address_space(offload_global)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format6(%arg0: !cir.ptr<!s32i, lang_address_space(offload_constant)>)
+  cir.func @test_format6(%arg0: !cir.ptr<!s32i, lang_address_space(offload_constant)>) {
+    cir.return
+  }
+
+  // CHECK: @test_format7(%arg0: !cir.ptr<!s32i, lang_address_space(offload_generic)>)
+  cir.func @test_format7(%arg0: !cir.ptr<!s32i, lang_address_space(offload_generic)>) {
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/aliases.cir b/clang/test/CIR/Incubator/IR/aliases.cir
new file mode 100644
index 0000000000000..62707c95d8613
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/aliases.cir
@@ -0,0 +1,14 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+module {
+  // CHECK: @testAnonRecordsAlias
+  cir.func @testAnonRecordsAlias() {
+    // CHECK: cir.alloca !rec_anon_struct, !cir.ptr<!rec_anon_struct>
+    %0 = cir.alloca !cir.record<struct {!cir.int<s, 32>}>, !cir.ptr<!cir.record<struct {!cir.int<s, 32>}>>, ["A"]
+    // CHECK: cir.alloca !rec_anon_struct1, !cir.ptr<!rec_anon_struct1>
+    %1 = cir.alloca !cir.record<struct {!cir.int<u, 8>}>, !cir.ptr<!cir.record<struct {!cir.int<u, 8>}>>, ["B"]
+    // CHECK: cir.alloca !rec_anon_union, !cir.ptr<!rec_anon_union>
+    %2 = cir.alloca !cir.record<union {!cir.int<s, 32>}>, !cir.ptr<!cir.record<union {!cir.int<s, 32>}>>, ["C"]
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/alloca.cir b/clang/test/CIR/Incubator/IR/alloca.cir
new file mode 100644
index 0000000000000..80b756adb5267
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/alloca.cir
@@ -0,0 +1,21 @@
+// Test the CIR operations can parse and print correctly (roundtrip)
+
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!s32i = !cir.int<s, 32>
+!u64i = !cir.int<u, 64>
+
+module  {
+  cir.func @foo(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, %arg0 : !s32i, ["tmp"] {alignment = 16 : i64}
+    cir.return
+  }
+}
+
+//CHECK: module  {
+
+//CHECK-NEXT:  cir.func @foo(%arg0: !s32i) {
+//CHECK-NEXT:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, %arg0 : !s32i, ["tmp"] {alignment = 16 : i64}
+//CHECK-NEXT:    cir.return
+//CHECK-NEXT:  }
+
+//CHECK: }
diff --git a/clang/test/CIR/Incubator/IR/annotations.cir b/clang/test/CIR/Incubator/IR/annotations.cir
new file mode 100644
index 0000000000000..1150d58d5266a
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/annotations.cir
@@ -0,0 +1,37 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+module attributes {cir.global_annotations =
+#cir<global_annotations [["a", #cir.annotation<name = "testanno", args = ["21", 12 : i32]>],
+["foo", #cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>],
+["bar", #cir.annotation<name = "noargfunc", args = []>],
+["bar", #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>],
+["_Z1fv", #cir.annotation<name = "tile", args = []>]]>}
+{
+cir.global external @a = #cir.int<0> : !s32i [#cir.annotation<name = "testanno", args = ["21", 12 : i32]>]
+cir.func @foo() attributes {annotations = [#cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>]} {
+    cir.return
+}
+cir.func @bar() attributes {annotations = [#cir.annotation<name = "noargfunc", args = []>, #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>]} {
+    cir.return
+}
+// Check that the pretty-printed syntax is also correctly parsed
+  cir.func @_Z1fv() [#cir.annotation<name = "tile", args = []>] {
+    cir.return
+  }
+}
+
+// CHECK: module attributes {cir.global_annotations = #cir<global_annotations [
+// CHECK-SAME: ["a", #cir.annotation<name = "testanno", args = ["21", 12 : i32]>],
+// CHECK-SAME: ["foo", #cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>],
+// CHECK-SAME: ["bar", #cir.annotation<name = "noargfunc", args = []>],
+// CHECK-SAME: ["bar", #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>],
+// CHECK-SAME: ["_Z1fv", #cir.annotation<name = "tile", args = []>]]>}
+// CHECK: cir.global external @a = #cir.int<0> : !s32i
+// CHECK-SAME: [#cir.annotation<name = "testanno", args = ["21", 12 : i32]>]
+// CHECK: cir.func @foo()
+// CHECK-SAME: [#cir.annotation<name = "withargfunc", args = ["os", 22 : i32]>]
+// CHECK: cir.func @bar()
+// CHECK-SAME: [#cir.annotation<name = "noargfunc", args = []>,
+// CHECK-SAME: #cir.annotation<name = "withargfunc", args = ["os", 23 : i32]>]
+// CHECK: cir.func @_Z1fv() [#cir.annotation<name = "tile", args = []>] {
diff --git a/clang/test/CIR/Incubator/IR/array.cir b/clang/test/CIR/Incubator/IR/array.cir
new file mode 100644
index 0000000000000..3ff4957b6fb2e
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/array.cir
@@ -0,0 +1,13 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u32i = !cir.int<u, 32>
+
+module  {
+  cir.func @arrays() {
+    %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
+    cir.return
+  }
+}
+
+// CHECK: cir.func @arrays() {
+// CHECK-NEXT:     %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
diff --git a/clang/test/CIR/Incubator/IR/attribute.cir b/clang/test/CIR/Incubator/IR/attribute.cir
new file mode 100644
index 0000000000000..a45151f387db1
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/attribute.cir
@@ -0,0 +1,25 @@
+// RUN: cir-opt %s -split-input-file -allow-unregistered-dialect -verify-diagnostics --verify-roundtrip | FileCheck %s
+
+cir.func @float_attrs_pass() {
+  "test.float_attrs"() {
+    // CHECK: float_attr = #cir.fp<2.000000e+00> : !cir.float
+    float_attr = #cir.fp<2.> : !cir.float
+  } : () -> ()
+  "test.float_attrs"() {
+    // CHECK: float_attr = #cir.fp<-2.000000e+00> : !cir.float
+    float_attr = #cir.fp<-2.> : !cir.float
+  } : () -> ()
+  "test.float_attrs"() {
+    // CHECK: float_attr = #cir.fp<2.000000e+00> : !cir.double
+    float_attr = #cir.fp<2.> : !cir.double
+  } : () -> ()
+  "test.float_attrs"() {
+    // CHECK: float_attr = #cir.fp<2.000000e+00> : !cir.long_double<!cir.f80>
+    float_attr = #cir.fp<2.> : !cir.long_double<!cir.f80>
+  } : () -> ()
+  "test.float_attrs"() {
+    // CHECK: float_attr = #cir.fp<2.000000e+00> : !cir.long_double<!cir.double>
+    float_attr = #cir.fp<2.> : !cir.long_double<!cir.double>
+  } : () -> ()
+  cir.return
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/IR/await.cir b/clang/test/CIR/Incubator/IR/await.cir
new file mode 100644
index 0000000000000..c1fb0d6d7c57c
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/await.cir
@@ -0,0 +1,21 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+cir.func coroutine @checkPrintParse(%arg0 : !cir.bool) {
+  cir.await(user, ready : {
+    cir.condition(%arg0)
+  }, suspend : {
+    cir.yield
+  }, resume : {
+    cir.yield
+  },)
+  cir.return
+}
+
+// CHECK:  cir.func coroutine @checkPrintParse
+// CHECK:  cir.await(user, ready : {
+// CHECK:    cir.condition(%arg0)
+// CHECK:  }, suspend : {
+// CHECK:    cir.yield
+// CHECK:  }, resume : {
+// CHECK:    cir.yield
+// CHECK:  },)
diff --git a/clang/test/CIR/Incubator/IR/being_and_nothingness.cir b/clang/test/CIR/Incubator/IR/being_and_nothingness.cir
new file mode 100644
index 0000000000000..cd8cc98e3e4c0
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/being_and_nothingness.cir
@@ -0,0 +1,19 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+// Exercise different ways to encode a function returning void
+// This test is less useful that it used to be, because a redundant `!cir.void`
+// as a function return type is no longer supported.
+!s32i = !cir.int<s, 32>
+!f = !cir.func<()>
+!f2 = !cir.func<() -> !s32i>
+!void = !cir.void
+!fnptr2 = !cir.ptr<!cir.func<(!s32i)>>
+module {
+  cir.func @ind2(%fnptr: !fnptr2, %a : !s32i) {
+    // CHECK:  cir.func @ind2(%arg0: !cir.ptr<!cir.func<(!s32i)>>, %arg1: !s32i) {
+    cir.return
+  }
+  cir.func @f2() {
+    // CHECK:  cir.func @f2() {
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/bit.cir b/clang/test/CIR/Incubator/IR/bit.cir
new file mode 100644
index 0000000000000..562a60b03c734
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/bit.cir
@@ -0,0 +1,75 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s8i = !cir.int<s, 8>
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u8i = !cir.int<u, 8>
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+
+module {
+  cir.func @test() {
+    %s8 = cir.const #cir.int<1> : !s8i
+    %s16 = cir.const #cir.int<1> : !s16i
+    %s32 = cir.const #cir.int<1> : !s32i
+    %s64 = cir.const #cir.int<1> : !s64i
+    %u8 = cir.const #cir.int<1> : !u8i
+    %u16 = cir.const #cir.int<1> : !u16i
+    %u32 = cir.const #cir.int<1> : !u32i
+    %u64 = cir.const #cir.int<1> : !u64i
+
+    %2 = cir.clrsb %s32 : !s32i
+    %3 = cir.clrsb %s64 : !s64i
+
+    %4 = cir.clz %u16 zero_poison : !u16i
+    %5 = cir.clz %u32 : !u32i
+    %6 = cir.clz %u64 zero_poison : !u64i
+
+    %7 = cir.ctz %u16 zero_poison : !u16i
+    %8 = cir.ctz %u32 : !u32i
+    %9 = cir.ctz %u64 zero_poison : !u64i
+
+    %10 = cir.ffs %s32 : !s32i
+    %11 = cir.ffs %s64 : !s64i
+
+    %12 = cir.parity %u32 : !u32i
+    %13 = cir.parity %u64 : !u64i
+
+    %14 = cir.popcount %u16 : !u16i
+    %15 = cir.popcount %u32 : !u32i
+    %16 = cir.popcount %u64 : !u64i
+
+    cir.return
+  }
+}
+
+//      CHECK: module {
+// CHECK-NEXT:   cir.func @test() {
+// CHECK-NEXT:     %0 = cir.const #cir.int<1> : !s8i
+// CHECK-NEXT:     %1 = cir.const #cir.int<1> : !s16i
+// CHECK-NEXT:     %2 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %3 = cir.const #cir.int<1> : !s64i
+// CHECK-NEXT:     %4 = cir.const #cir.int<1> : !u8i
+// CHECK-NEXT:     %5 = cir.const #cir.int<1> : !u16i
+// CHECK-NEXT:     %6 = cir.const #cir.int<1> : !u32i
+// CHECK-NEXT:     %7 = cir.const #cir.int<1> : !u64i
+// CHECK-NEXT:     %8 = cir.clrsb %2 : !s32i
+// CHECK-NEXT:     %9 = cir.clrsb %3 : !s64i
+// CHECK-NEXT:     %10 = cir.clz %5 zero_poison : !u16i
+// CHECK-NEXT:     %11 = cir.clz %6 : !u32i
+// CHECK-NEXT:     %12 = cir.clz %7 zero_poison : !u64i
+// CHECK-NEXT:     %13 = cir.ctz %5 zero_poison : !u16i
+// CHECK-NEXT:     %14 = cir.ctz %6 : !u32i
+// CHECK-NEXT:     %15 = cir.ctz %7 zero_poison : !u64i
+// CHECK-NEXT:     %16 = cir.ffs %2 : !s32i
+// CHECK-NEXT:     %17 = cir.ffs %3 : !s64i
+// CHECK-NEXT:     %18 = cir.parity %6 : !u32i
+// CHECK-NEXT:     %19 = cir.parity %7 : !u64i
+// CHECK-NEXT:     %20 = cir.popcount %5 : !u16i
+// CHECK-NEXT:     %21 = cir.popcount %6 : !u32i
+// CHECK-NEXT:     %22 = cir.popcount %7 : !u64i
+// CHECK-NEXT:     cir.return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/IR/block-adress.cir b/clang/test/CIR/Incubator/IR/block-adress.cir
new file mode 100644
index 0000000000000..a33ac151a3d42
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/block-adress.cir
@@ -0,0 +1,34 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!void = !cir.void
+
+module {
+  cir.func @block_address(){
+    %0 = cir.blockaddress <@block_address, "label"> -> !cir.ptr<!void>
+    cir.br ^bb1
+  ^bb1:
+    cir.label "label"
+    cir.return
+  }
+// CHECK: cir.func @block_address
+// CHECK: %0 = cir.blockaddress <@block_address, "label"> -> !cir.ptr<!void>
+// CHECK:   cir.br ^bb1
+// CHECK: ^bb1:
+// CHECK:   cir.label "label"
+// CHECK:   cir.return
+
+cir.func @block_address_inside_scope() -> () {
+  cir.scope{
+    %0 = cir.blockaddress <@block_address_inside_scope, "label"> -> !cir.ptr<!void>
+  }
+  cir.br ^bb1
+^bb1:
+  cir.label "label"
+  cir.return
+}
+// CHECK: cir.func @block_address_inside_scope
+// CHECK: cir.scope
+// CHECK:  %0 = cir.blockaddress <@block_address_inside_scope, "label"> -> !cir.ptr<!void>
+// CHECK:  cir.label "label"
+// CHECK: cir.return
+}
diff --git a/clang/test/CIR/Incubator/IR/branch.cir b/clang/test/CIR/Incubator/IR/branch.cir
new file mode 100644
index 0000000000000..12bf7c093e623
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/branch.cir
@@ -0,0 +1,21 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+cir.func @test_branch_parsing(%arg0: !cir.bool) {
+  // CHECK: cir.br ^bb1
+  cir.br ^bb1
+^bb1:
+  // CHECK: cir.br ^bb2(%arg0 : !cir.bool)
+  cir.br ^bb2(%arg0 : !cir.bool)
+// CHECK: ^bb2(%0: !cir.bool):
+^bb2(%x: !cir.bool):
+  cir.return
+}
+
+cir.func @test_conditional_branch_parsing(%arg0 : !cir.bool) {
+  // CHEK: cir.brcond %arg0 ^bb1, ^bb2
+  cir.brcond %arg0 ^bb1, ^bb2
+^bb1:
+  cir.return
+^bb2:
+  cir.return
+}
diff --git a/clang/test/CIR/Incubator/IR/builtins.cir b/clang/test/CIR/Incubator/IR/builtins.cir
new file mode 100644
index 0000000000000..f7a6a652187df
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/builtins.cir
@@ -0,0 +1,16 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!u32i = !cir.int<u, 32>
+
+module  {
+  cir.func @test1() {
+    %0 = cir.const #cir.int<1> : !u32i
+    %1 = cir.return_address(%0)
+    %2 = cir.frame_address(%0)
+    cir.return
+  }
+  // CHECK:  cir.func @test1()
+  // CHECK:  %0 = cir.const #cir.int<1> : !u32i
+  // CHECK:  %1 = cir.return_address(%0)
+  // CHECK:  %2 = cir.frame_address(%0)
+  // CHECK: cir.return
+}
diff --git a/clang/test/CIR/Incubator/IR/call-op-call-conv.cir b/clang/test/CIR/Incubator/IR/call-op-call-conv.cir
new file mode 100644
index 0000000000000..7b418226cec15
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/call-op-call-conv.cir
@@ -0,0 +1,26 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!fnptr = !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+
+module {
+  cir.func @my_add(%a: !s32i, %b: !s32i) -> !s32i cc(spir_function) {
+    %c = cir.binop(add, %a, %b) : !s32i
+    cir.return %c : !s32i
+  }
+
+  cir.func @ind(%fnptr: !fnptr, %a : !s32i) {
+    %1 = cir.call %fnptr(%a) : (!fnptr, !s32i) -> !s32i cc(spir_kernel)
+    %2 = cir.call %fnptr(%a) : (!fnptr, !s32i) -> !s32i cc(spir_function)
+
+    %3 = cir.try_call @my_add(%1, %2) ^continue, ^landing_pad : (!s32i, !s32i) -> !s32i cc(spir_function)
+  ^continue:
+    cir.br ^landing_pad
+  ^landing_pad:
+    cir.return
+  }
+}
+
+// CHECK: %{{[0-9]+}} = cir.call %arg0(%arg1) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i cc(spir_kernel)
+// CHECK: %{{[0-9]+}} = cir.call %arg0(%arg1) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i cc(spir_function)
+// CHECK: %{{[0-9]+}} = cir.try_call @my_add(%{{[0-9]+}}, %{{[0-9]+}}) ^{{.+}}, ^{{.+}} : (!s32i, !s32i) -> !s32i cc(spir_function)
diff --git a/clang/test/CIR/Incubator/IR/call.cir b/clang/test/CIR/Incubator/IR/call.cir
new file mode 100644
index 0000000000000..4685578693161
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/call.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!fnptr = !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+
+#fn_attr = #cir<extra({nothrow = #cir.nothrow})>
+
+module {
+  // Excerpt of std::array<int, 8192ul>::operator[](unsigned long)
+  cir.func no_inline optnone linkonce_odr @_ZNSt5arrayIiLm8192EEixEm(%arg0: !s32i) -> !s32i {
+    cir.return %arg0 : !s32i
+  }
+
+  cir.func optnone private @my_add(%a: !s32i, %b: !s32i) -> !s32i cc(spir_function)
+
+  cir.func @ind(%fnptr: !fnptr, %a : !s32i) {
+    %r = cir.call %fnptr(%a) : (!fnptr, !s32i) -> !s32i
+// CHECK: %0 = cir.call %arg0(%arg1) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+    // Check parse->pretty-print round-trip on extra() attribute
+    %7 = cir.call @_ZNSt5arrayIiLm8192EEixEm(%a) : (!s32i) -> !s32i extra(#fn_attr)
+// CHECK: %1 = cir.call @_ZNSt5arrayIiLm8192EEixEm(%arg1) : (!s32i) -> !s32i extra(#fn_attr)
+    // Frankenstein's example from clang/test/CIR/Lowering/call-op-call-conv.cir
+    %3 = cir.try_call @my_add(%r, %7) ^continue, ^landing_pad : (!s32i, !s32i) -> !s32i cc(spir_function) extra(#fn_attr)
+// CHECK: %2 = cir.try_call @my_add(%0, %1) ^bb1, ^bb2 : (!s32i, !s32i) -> !s32i cc(spir_function) extra(#fn_attr)
+  ^continue:
+    cir.br ^landing_pad
+  ^landing_pad:
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/cast.cir b/clang/test/CIR/Incubator/IR/cast.cir
new file mode 100644
index 0000000000000..f9adde6d969a1
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/cast.cir
@@ -0,0 +1,33 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @yolo(%arg0 : !s32i) {
+    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["x", init]
+    %a = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+
+    %3 = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!s32i x 10>> -> !cir.ptr<!s32i>
+    %4 = cir.const #cir.int<0> : !s32i
+    cir.return
+  }
+
+  cir.func @bitcast(%p: !cir.ptr<!s32i>) {
+    %2 = cir.cast bitcast %p : !cir.ptr<!s32i> -> !cir.ptr<f32>
+    cir.return
+  }
+
+  cir.func @addrspace_cast(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.cast address_space %arg0 : !cir.ptr<!s32i> -> !cir.ptr<!s32i, target_address_space(2)>
+    cir.return
+  }
+}
+
+// CHECK: cir.func @yolo(%arg0: !s32i)
+// CHECK: %1 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+// CHECK: %2 = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!s32i x 10>> -> !cir.ptr<!s32i>
+
+// CHECK: cir.func @bitcast
+// CHECK: %0 = cir.cast bitcast %arg0 : !cir.ptr<!s32i> -> !cir.ptr<f32>
+
+// CHECK: cir.func @addrspace_cast
+// CHECK: %0 = cir.cast address_space %arg0 : !cir.ptr<!s32i> -> !cir.ptr<!s32i, target_address_space(2)>
diff --git a/clang/test/CIR/Incubator/IR/cir-ops.cir b/clang/test/CIR/Incubator/IR/cir-ops.cir
new file mode 100644
index 0000000000000..e85cff4e877cf
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/cir-ops.cir
@@ -0,0 +1,121 @@
+// Test the CIR operations can parse and print correctly (roundtrip)
+
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!s32i = !cir.int<s, 32>
+!s8i = !cir.int<s, 8>
+!u64i = !cir.int<u, 64>
+
+module  {
+  cir.func @foo(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store align(1) atomic(seq_cst) %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %2 = cir.load align(1) atomic(seq_cst) %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %1 : !s32i
+  }
+
+  cir.func @f3() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+    %1 = cir.const #cir.int<3> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %2 : !s32i
+  }
+
+  cir.func @if0(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    cir.store %arg0, %1 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.const #cir.int<0> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.cast int_to_bool %3 : !s32i -> !cir.bool
+    cir.if %4 {
+      %6 = cir.const #cir.int<3> : !s32i
+      cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+    } else {
+      %6 = cir.const #cir.int<4> : !s32i
+      cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+    }
+    %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %5 : !s32i
+  }
+
+  cir.func @s0() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x"] {alignment = 4 : i64}
+    cir.scope {
+      %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y"] {alignment = 4 : i64}
+    }
+    cir.return
+  }
+
+  cir.func @os() {
+    %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["m", init] {alignment = 8 : i64}
+    %3 = cir.load %0 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+    %4 = cir.objsize max %3 : !cir.ptr<!s8i> -> !u64i
+    %5 = cir.objsize min %3 : !cir.ptr<!s8i> -> !u64i
+    cir.return
+  }
+
+  cir.func @shiftvec() {
+    %0 = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["a", init] {alignment = 8 : i64}
+    %1 = cir.load %0 : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+    %2 = cir.const #cir.const_vector<[#cir.int<12> : !s32i, #cir.int<12> : !s32i]> : !cir.vector<!s32i x 2>
+    %3 = cir.shift(left, %1 : !cir.vector<!s32i x 2>, %2 : !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+    cir.return
+  }
+}
+
+// CHECK: module  {
+
+// CHECK-NEXT:   cir.func @foo(%arg0: !s32i) -> !s32i {
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CHECK-NEXT:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     cir.store align(1) atomic(seq_cst) %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %2 = cir.load align(1) atomic(seq_cst) %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     cir.return %1 : !s32i
+// CHECK-NEXT:   }
+
+// CHECK-NEXT:   cir.func @f3() -> !s32i {
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CHECK-NEXT:     %1 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:     cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     cir.return %2 : !s32i
+// CHECK-NEXT:   }
+
+// CHECK:   @if0(%arg0: !s32i) -> !s32i {
+// CHECK:   %4 = cir.cast int_to_bool %3 : !s32i -> !cir.bool
+// CHECK-NEXT:   cir.if %4 {
+// CHECK-NEXT:     %6 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:     cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   } else {
+// CHECK-NEXT:     %6 = cir.const #cir.int<4> : !s32i
+// CHECK-NEXT:     cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:   }
+
+// CHECK: cir.func @s0() {
+// CHECK-NEXT:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x"] {alignment = 4 : i64}
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %1 = cir.alloca !s32i,  !cir.ptr<!s32i>, ["y"] {alignment = 4 : i64}
+// CHECK-NEXT:     }
+
+// CHECK: cir.func @os() {
+// CHECK-NEXT:   %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["m", init] {alignment = 8 : i64}
+// CHECK-NEXT:   %1 = cir.load %0 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK-NEXT:   %2 = cir.objsize max %1 : !cir.ptr<!s8i> -> !u64i
+// CHECK-NEXT:   %3 = cir.objsize min %1 : !cir.ptr<!s8i> -> !u64i
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+
+// CHECK:  cir.func @shiftvec() {
+// CHECK-NEXT:    %0 = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>
+// CHECK-NEXT:    %1 = cir.load %0 : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+// CHECK-NEXT:    %2 = cir.const #cir.const_vector<[#cir.int<12> : !s32i, #cir.int<12> : !s32i]> : !cir.vector<!s32i x 2>
+// CHECK-NEXT:    %3 = cir.shift(left, %1 : !cir.vector<!s32i x 2>, %2 : !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
+
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/IR/cold.cir b/clang/test/CIR/Incubator/IR/cold.cir
new file mode 100644
index 0000000000000..9973c9c011799
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/cold.cir
@@ -0,0 +1,20 @@
+// RUN: cir-opt %s | cir-opt | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  // CHECK: cir.func cold @cold_func
+  cir.func cold @cold_func() {
+    cir.return
+  }
+
+  // CHECK: cir.func optnone cold @both_attrs
+  cir.func optnone cold @both_attrs() {
+    cir.return
+  }
+
+  // CHECK: cir.func no_inline optnone cold @multiple_attrs
+  cir.func no_inline optnone cold @multiple_attrs() {
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/constptrattr.cir b/clang/test/CIR/Incubator/IR/constptrattr.cir
new file mode 100644
index 0000000000000..e6ec6df43a3fb
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/constptrattr.cir
@@ -0,0 +1,10 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+cir.global external @const_ptr = #cir.ptr<4660 : i64> : !cir.ptr<!s32i>
+// CHECK: cir.global external @const_ptr = #cir.ptr<4660 : i64> : !cir.ptr<!s32i>
+cir.global external @signed_ptr = #cir.ptr<-1 : i64> : !cir.ptr<!s32i>
+// CHECK: cir.global external @signed_ptr = #cir.ptr<-1 : i64> : !cir.ptr<!s32i>
+cir.global external @null_ptr = #cir.ptr<null> : !cir.ptr<!s32i>
+// CHECK: cir.global external @null_ptr = #cir.ptr<null> : !cir.ptr<!s32i>
diff --git a/clang/test/CIR/Incubator/IR/copy.cir b/clang/test/CIR/Incubator/IR/copy.cir
new file mode 100644
index 0000000000000..b8e371e8d9e65
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/copy.cir
@@ -0,0 +1,10 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @shouldParseCopyOp(%arg0 : !cir.ptr<!s32i>, %arg1 : !cir.ptr<!s32i>) {
+    // CHECK: cir.copy
+    cir.copy %arg0 to %arg1 : !cir.ptr<!s32i>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/cxx-special-member.cir b/clang/test/CIR/Incubator/IR/cxx-special-member.cir
new file mode 100644
index 0000000000000..26ca418a9980f
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/cxx-special-member.cir
@@ -0,0 +1,21 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!rec_S = !cir.record<struct "S" {!s32i}>
+module {
+  cir.func private @_ZN1SC1ERKS_(!cir.ptr<!rec_S>, !cir.ptr<!rec_S>) special_member<#cir.cxx_ctor<!rec_S, copy>>
+  cir.func private @_ZN1SC1EOS_(!cir.ptr<!rec_S>, !cir.ptr<!rec_S>) special_member<#cir.cxx_ctor<!rec_S, move>>
+  cir.func private @_ZN1SC2Ei(!cir.ptr<!rec_S>, !cir.ptr<!rec_S>)
+  cir.func private @_ZN1SC2Ev(!cir.ptr<!rec_S>) special_member<#cir.cxx_ctor<!rec_S, default>>
+  cir.func private @_ZN1SD2Ev(!cir.ptr<!rec_S>) special_member<#cir.cxx_dtor<!rec_S>>
+}
+
+// CHECK: !s32i = !cir.int<s, 32>
+// CHECK: !rec_S = !cir.record<struct "S" {!s32i}>
+// CHECK: module {
+// CHECK:   cir.func private @_ZN1SC1ERKS_(!cir.ptr<!rec_S>, !cir.ptr<!rec_S>) special_member<#cir.cxx_ctor<!rec_S, copy>>
+// CHECK:   cir.func private @_ZN1SC1EOS_(!cir.ptr<!rec_S>, !cir.ptr<!rec_S>) special_member<#cir.cxx_ctor<!rec_S, move>>
+// CHECK:   cir.func private @_ZN1SC2Ei(!cir.ptr<!rec_S>, !cir.ptr<!rec_S>)
+// CHECK:   cir.func private @_ZN1SC2Ev(!cir.ptr<!rec_S>) special_member<#cir.cxx_ctor<!rec_S, default>>
+// CHECK:   cir.func private @_ZN1SD2Ev(!cir.ptr<!rec_S>) special_member<#cir.cxx_dtor<!rec_S>>
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/IR/data-member-ptr.cir b/clang/test/CIR/Incubator/IR/data-member-ptr.cir
new file mode 100644
index 0000000000000..63b345de582d1
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/data-member-ptr.cir
@@ -0,0 +1,32 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!rec_Foo = !cir.record<struct "Foo" {!s32i}>
+
+module {
+  cir.func @null_member() {
+    %0 = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !rec_Foo>
+    cir.return
+  }
+
+  cir.func @get_runtime_member(%arg0: !cir.ptr<!rec_Foo>) {
+    %0 = cir.const #cir.data_member<0> : !cir.data_member<!s32i in !rec_Foo>
+    %1 = cir.get_runtime_member %arg0[%0 : !cir.data_member<!s32i in !rec_Foo>] : !cir.ptr<!rec_Foo> -> !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+//      CHECK: module {
+
+// CHECK-NEXT:   cir.func @null_member() {
+// CHECK-NEXT:     %0 = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !rec_Foo>
+// CHECK-NEXT:     cir.return
+// CHECK-NEXT:   }
+
+// CHECK-NEXT:   cir.func @get_runtime_member(%arg0: !cir.ptr<!rec_Foo>) {
+// CHECK-NEXT:      %0 = cir.const #cir.data_member<0> : !cir.data_member<!s32i in !rec_Foo>
+// CHECK-NEXT:      %1 = cir.get_runtime_member %arg0[%0 : !cir.data_member<!s32i in !rec_Foo>] : !cir.ptr<!rec_Foo> -> !cir.ptr<!s32i>
+// CHECK-NEXT:      cir.return
+// CHECK-NEXT:   }
+
+//      CHECK: }
diff --git a/clang/test/CIR/Incubator/IR/do-while.cir b/clang/test/CIR/Incubator/IR/do-while.cir
new file mode 100644
index 0000000000000..195cf411607df
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/do-while.cir
@@ -0,0 +1,17 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+cir.func @testPrintingAndParsing (%arg0 : !cir.bool) -> !cir.void {
+  cir.do {
+    cir.yield
+  } while {
+    cir.condition(%arg0)
+  }
+  cir.return
+}
+
+// CHECK: testPrintingAndParsing
+// CHECK: cir.do {
+// CHECK:   cir.yield
+// CHECK: } while {
+// CHECK:   cir.condition(%arg0)
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/IR/dynamic-cast.cir b/clang/test/CIR/Incubator/IR/dynamic-cast.cir
new file mode 100644
index 0000000000000..aa542f26ba3bc
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/dynamic-cast.cir
@@ -0,0 +1,59 @@
+// RUN: cir-opt --verify-roundtrip %s | FileCheck %s
+
+!s64i = !cir.int<s, 64>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+!rec_Base = !cir.record<struct "Base" {!cir.vptr}>
+!rec_Derived = !cir.record<struct "Derived" {!rec_Base}>
+
+#dyn_cast_info__ZTI4Base__ZTI7Derived = #cir.dyn_cast_info<src_rtti = #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>, dest_rtti = #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>, runtime_func = @__dynamic_cast, bad_cast_func = @__cxa_bad_cast, offset_hint = #cir.int<0> : !s64i>
+
+// CHECK: #dyn_cast_info__ZTI4Base__ZTI7Derived = #cir.dyn_cast_info<src_rtti = #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>, dest_rtti = #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>, runtime_func = @__dynamic_cast, bad_cast_func = @__cxa_bad_cast, offset_hint = #cir.int<0> : !s64i>
+
+module {
+  cir.global "private" constant external @_ZTI4Base : !cir.ptr<!u8i>
+  cir.global "private" constant external @_ZTI7Derived : !cir.ptr<!u8i>
+  cir.func private @__dynamic_cast(!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void>
+  cir.func private @__cxa_bad_cast()
+
+  cir.func @test_ptr_cast(%arg0: !cir.ptr<!rec_Base>) -> !cir.ptr<!rec_Derived> {
+    %0 = cir.dyn_cast ptr %arg0 : !cir.ptr<!rec_Base> -> !cir.ptr<!rec_Derived> #dyn_cast_info__ZTI4Base__ZTI7Derived
+    cir.return %0 : !cir.ptr<!rec_Derived>
+  }
+
+  // CHECK:   cir.func @test_ptr_cast(%arg0: !cir.ptr<!rec_Base>) -> !cir.ptr<!rec_Derived> {
+  // CHECK:     %0 = cir.dyn_cast ptr %arg0 : !cir.ptr<!rec_Base> -> !cir.ptr<!rec_Derived> #dyn_cast_info__ZTI4Base__ZTI7Derived
+  // CHECK:     cir.return %0 : !cir.ptr<!rec_Derived>
+  // CHECK:   }
+
+  cir.func @test_ref_cast(%arg0: !cir.ptr<!rec_Base>) -> !cir.ptr<!rec_Derived> {
+    %0 = cir.dyn_cast ref %arg0 : !cir.ptr<!rec_Base> -> !cir.ptr<!rec_Derived> #dyn_cast_info__ZTI4Base__ZTI7Derived
+    cir.return %0 : !cir.ptr<!rec_Derived>
+  }
+
+  // CHECK:   cir.func @test_ref_cast(%arg0: !cir.ptr<!rec_Base>) -> !cir.ptr<!rec_Derived> {
+  // CHECK:     %0 = cir.dyn_cast ref %arg0 : !cir.ptr<!rec_Base> -> !cir.ptr<!rec_Derived> #dyn_cast_info__ZTI4Base__ZTI7Derived
+  // CHECK:     cir.return %0 : !cir.ptr<!rec_Derived>
+  // CHECK:   }
+
+  cir.func dso_local @test_cast_to_void(%arg0: !cir.ptr<!rec_Base>) -> !cir.ptr<!void> {
+   %0 = cir.dyn_cast ptr %arg0 : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+   cir.return %0 : !cir.ptr<!void>
+  }
+
+  // CHECK: cir.func {{.*}} @test_cast_to_void(%arg0: !cir.ptr<!rec_Base>) -> !cir.ptr<!void> {
+  // CHECK:     %0 = cir.dyn_cast ptr %arg0 : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+  // CHECK:     cir.return %0 : !cir.ptr<!void>
+  // CHECK:   }
+
+  cir.func dso_local @test_relative_layout_cast(%arg0: !cir.ptr<!rec_Base>) -> !cir.ptr<!void> {
+   %0 = cir.dyn_cast ptr relative_layout %arg0 : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+   cir.return %0 : !cir.ptr<!void>
+  }
+
+  // CHECK: cir.func {{.*}} @test_relative_layout_cast(%arg0: !cir.ptr<!rec_Base>) -> !cir.ptr<!void> {
+  // CHECK:     %0 = cir.dyn_cast ptr relative_layout %arg0 : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+  // CHECK:     cir.return %0 : !cir.ptr<!void>
+  // CHECK:   }
+}
diff --git a/clang/test/CIR/Incubator/IR/exceptions.cir b/clang/test/CIR/Incubator/IR/exceptions.cir
new file mode 100644
index 0000000000000..acde8a64b2c44
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/exceptions.cir
@@ -0,0 +1,69 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+// XFAIL: *
+
+!s32i = !cir.int<s, 32>
+!s8i = !cir.int<s, 8>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+module {
+    // Generated from clang/test/CIR/CodeGen/try-catch.cpp
+    cir.func @_Z2tcv() -> !u64i {
+    %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"] {alignment = 8 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %3 = cir.alloca !u64i, !cir.ptr<!u64i>, ["z"] {alignment = 8 : i64}
+    %4 = cir.const #cir.int<50> : !s32i
+    cir.store %4, %1 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.const #cir.int<3> : !s32i
+    cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %8 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["msg"] {alignment = 8 : i64}
+      %9 = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx"] {alignment = 4 : i64}
+      cir.try {
+        %10 = cir.alloca !cir.ptr<!cir.eh.info>, !cir.ptr<!cir.ptr<!cir.eh.info>>, ["__exception_ptr"] {alignment = 1 : i64}
+        %11 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+        %12 = cir.const #cir.int<4> : !s32i
+        cir.store %12, %11 : !s32i, !cir.ptr<!s32i>
+        %13 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+        %14 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %15 = cir.try_call exception(%10) @_Z8divisionii(%13, %14) : (!s32i, !s32i) -> !cir.double
+        %16 = cir.cast float_to_int %15 : !cir.double -> !u64i
+        cir.store %16, %3 : !u64i, !cir.ptr<!u64i>
+        %17 = cir.load %11 : !cir.ptr<!s32i>, !s32i
+        %18 = cir.unary(inc, %17) : !s32i, !s32i
+        cir.store %18, %11 : !s32i, !cir.ptr<!s32i>
+        %19 = cir.load %10 : !cir.ptr<!cir.ptr<!cir.eh.info>>, !cir.ptr<!cir.eh.info>
+        cir.yield
+      } catch [type #cir.global_view<@_ZTIi> : !cir.ptr<!u8i> {
+        %10 = cir.catch_param -> !cir.ptr<!s32i>
+        %11 = cir.load %10 : !cir.ptr<!s32i>, !s32i
+        cir.store %11, %9 : !s32i, !cir.ptr<!s32i>
+        %12 = cir.const #cir.int<98> : !s32i
+        %13 = cir.cast integral %12 : !s32i -> !u64i
+        cir.store %13, %3 : !u64i, !cir.ptr<!u64i>
+        %14 = cir.load %9 : !cir.ptr<!s32i>, !s32i
+        %15 = cir.unary(inc, %14) : !s32i, !s32i
+        cir.store %15, %9 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }, type #cir.global_view<@_ZTIPKc> : !cir.ptr<!u8i> {
+        %10 = cir.catch_param -> !cir.ptr<!s8i>
+        cir.store %10, %8 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+        %11 = cir.const #cir.int<99> : !s32i
+        %12 = cir.cast integral %11 : !s32i -> !u64i
+        cir.store %12, %3 : !u64i, !cir.ptr<!u64i>
+        %13 = cir.load %8 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+        %14 = cir.const #cir.int<0> : !s32i
+        %15 = cir.ptr_stride %13, %14 : (!cir.ptr<!s8i>, !s32i) -> !cir.ptr<!s8i>
+        cir.yield
+      }, #cir.unwind {
+        cir.resume
+      }]
+    }
+    %6 = cir.load %3 : !cir.ptr<!u64i>, !u64i
+    cir.store %6, %0 : !u64i, !cir.ptr<!u64i>
+    %7 = cir.load %0 : !cir.ptr<!u64i>, !u64i
+    cir.return %7 : !u64i
+  }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/IR/float.cir b/clang/test/CIR/Incubator/IR/float.cir
new file mode 100644
index 0000000000000..76a5075a8aef4
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/float.cir
@@ -0,0 +1,90 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+// Adapted from mlir/test/IR/parser.mlir
+
+// CHECK-LABEL: @f32_special_values
+cir.func @f32_special_values() {
+  // F32 signaling NaNs.
+  // CHECK: cir.const #cir.fp<0x7F800001> : !cir.float
+  %0 = cir.const #cir.fp<0x7F800001> : !cir.float
+  // CHECK: cir.const #cir.fp<0x7FBFFFFF> : !cir.float
+  %1 = cir.const #cir.fp<0x7FBFFFFF> : !cir.float
+
+  // F32 quiet NaNs.
+  // CHECK: cir.const #cir.fp<0x7FC00000> : !cir.float
+  %2 = cir.const #cir.fp<0x7FC00000> : !cir.float
+  // CHECK: cir.const #cir.fp<0xFFFFFFFF> : !cir.float
+  %3 = cir.const #cir.fp<0xFFFFFFFF> : !cir.float
+
+  // F32 positive infinity.
+  // CHECK: cir.const #cir.fp<0x7F800000> : !cir.float
+  %4 = cir.const #cir.fp<0x7F800000> : !cir.float
+  // F32 negative infinity.
+  // CHECK: cir.const #cir.fp<0xFF800000> : !cir.float
+  %5 = cir.const #cir.fp<0xFF800000> : !cir.float
+
+  cir.return
+}
+
+// CHECK-LABEL: @f64_special_values
+cir.func @f64_special_values() {
+  // F64 signaling NaNs.
+  // CHECK: cir.const #cir.fp<0x7FF0000000000001> : !cir.double
+  %0 = cir.const #cir.fp<0x7FF0000000000001> : !cir.double
+  // CHECK: cir.const #cir.fp<0x7FF8000000000000> : !cir.double
+  %1 = cir.const #cir.fp<0x7FF8000000000000> : !cir.double
+
+  // F64 quiet NaNs.
+  // CHECK: cir.const #cir.fp<0x7FF0000001000000> : !cir.double
+  %2 = cir.const #cir.fp<0x7FF0000001000000> : !cir.double
+  // CHECK: cir.const #cir.fp<0xFFF0000001000000> : !cir.double
+  %3 = cir.const #cir.fp<0xFFF0000001000000> : !cir.double
+
+  // F64 positive infinity.
+  // CHECK: cir.const #cir.fp<0x7FF0000000000000> : !cir.double
+  %4 = cir.const #cir.fp<0x7FF0000000000000> : !cir.double
+  // F64 negative infinity.
+  // CHECK: cir.const #cir.fp<0xFFF0000000000000> : !cir.double
+  %5 = cir.const #cir.fp<0xFFF0000000000000> : !cir.double
+
+  // Check that values that can't be represented with the default format, use
+  // hex instead.
+  // CHECK: cir.const #cir.fp<0xC1CDC00000000000> : !cir.double
+  %6 = cir.const #cir.fp<0xC1CDC00000000000> : !cir.double
+
+  cir.return
+}
+
+// CHECK-LABEL: @f80_special_values
+cir.func @f80_special_values() {
+  // F80 signaling NaNs.
+  // CHECK: cir.const #cir.fp<0x7FFFE000000000000001> : !cir.long_double<!cir.f80>
+  %0 = cir.const #cir.fp<0x7FFFE000000000000001> : !cir.long_double<!cir.f80>
+  // CHECK: cir.const #cir.fp<0x7FFFB000000000000011> : !cir.long_double<!cir.f80>
+  %1 = cir.const #cir.fp<0x7FFFB000000000000011> : !cir.long_double<!cir.f80>
+
+  // F80 quiet NaNs.
+  // CHECK: cir.const #cir.fp<0x7FFFC000000000100000> : !cir.long_double<!cir.f80>
+  %2 = cir.const #cir.fp<0x7FFFC000000000100000> : !cir.long_double<!cir.f80>
+  // CHECK: cir.const #cir.fp<0x7FFFE000000001000000> : !cir.long_double<!cir.f80>
+  %3 = cir.const #cir.fp<0x7FFFE000000001000000> : !cir.long_double<!cir.f80>
+
+  // F80 positive infinity.
+  // CHECK: cir.const #cir.fp<0x7FFF8000000000000000> : !cir.long_double<!cir.f80>
+  %4 = cir.const #cir.fp<0x7FFF8000000000000000> : !cir.long_double<!cir.f80>
+  // F80 negative infinity.
+  // CHECK: cir.const #cir.fp<0xFFFF8000000000000000> : !cir.long_double<!cir.f80>
+  %5 = cir.const #cir.fp<0xFFFF8000000000000000> : !cir.long_double<!cir.f80>
+
+  cir.return
+}
+
+// We want to print floats in exponential notation with 6 significant digits,
+// but it may lead to precision loss when parsing back, in which case we print
+// the decimal form instead.
+// CHECK-LABEL: @f32_potential_precision_loss()
+cir.func @f32_potential_precision_loss() {
+  // CHECK: cir.const #cir.fp<1.23697901> : !cir.float
+  %0 = cir.const #cir.fp<1.23697901> : !cir.float
+  cir.return
+}
diff --git a/clang/test/CIR/Incubator/IR/for.cir b/clang/test/CIR/Incubator/IR/for.cir
new file mode 100644
index 0000000000000..d7b8d27a4b006
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/for.cir
@@ -0,0 +1,21 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+cir.func @testPrintingParsing(%arg0 : !cir.bool) {
+  cir.for : cond {
+    cir.condition(%arg0)
+  } body {
+    cir.yield
+  } step {
+    cir.yield
+  }
+  cir.return
+}
+
+// CHECK: @testPrintingParsing
+// CHECK: cir.for : cond {
+// CHECK:   cir.condition(%arg0)
+// CHECK: } body {
+// CHECK:   cir.yield
+// CHECK: } step {
+// CHECK:   cir.yield
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/IR/func-call-conv.cir b/clang/test/CIR/Incubator/IR/func-call-conv.cir
new file mode 100644
index 0000000000000..9933d59278ec9
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/func-call-conv.cir
@@ -0,0 +1,24 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+    // CHECK: cir.func @foo() {
+    cir.func @foo() cc(c) {
+        cir.return
+    }
+
+    // CHECK: cir.func @bar() cc(spir_kernel)
+    cir.func @bar() cc(spir_kernel) {
+        cir.return
+    }
+
+    // CHECK: cir.func @bar_alias() alias(@bar) cc(spir_kernel)
+    cir.func @bar_alias() alias(@bar) cc(spir_kernel)
+
+    // CHECK: cir.func no_inline @baz() cc(spir_function)
+    cir.func no_inline @baz() cc(spir_function) {
+        cir.return
+    }
+}
+
diff --git a/clang/test/CIR/Incubator/IR/func-dsolocal-parser.cir b/clang/test/CIR/Incubator/IR/func-dsolocal-parser.cir
new file mode 100644
index 0000000000000..e982414e70a2d
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/func-dsolocal-parser.cir
@@ -0,0 +1,13 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+#fn_attr = #cir<extra({nothrow = #cir.nothrow})>
+module {
+    cir.func no_inline optnone dso_local @foo(%arg0: !s32i ) extra(#fn_attr) {
+        %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+        cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+        cir.return
+    }
+}
+
+// CHECK: cir.func {{.*}} @foo(%arg0: !s32i) extra(#fn_attr)
diff --git a/clang/test/CIR/Incubator/IR/func.cir b/clang/test/CIR/Incubator/IR/func.cir
new file mode 100644
index 0000000000000..ef27a64120326
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/func.cir
@@ -0,0 +1,57 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!u8i = !cir.int<u, 8>
+module {
+  cir.func @l0() {
+    cir.return
+  }
+
+  // Should print/parse function aliases.
+  // CHECK: cir.func @l1() alias(@l0)
+  cir.func @l1() alias(@l0)
+
+  // Should print/parse variadic function types.
+  // CHECK: cir.func private @variadic(!s32i, ...) -> !s32i
+  cir.func private @variadic(!s32i, ...) -> !s32i
+
+  // Should accept call with only the required parameters.
+  cir.func @variadic_call_1(%0: !s32i) -> !s32i {
+    %9 = cir.call @variadic(%0) : (!s32i) -> !s32i
+    cir.return %9 : !s32i
+  }
+
+  // Should accept calls with variadic parameters.
+  cir.func @variadic_call_2(%0: !s32i, %1: !s32i,  %2: !u8i) -> !s32i {
+    %9 = cir.call @variadic(%0, %1, %2) : (!s32i, !s32i, !u8i) -> !s32i
+    cir.return %9 : !s32i
+  }
+
+  // Should parse custom assembly format.
+  cir.func @parse_func_type() -> () {
+    %1 = cir.alloca !cir.ptr<!cir.func<(!s32i, ...) -> !s32i>>, !cir.ptr<!cir.ptr<!cir.func<(!s32i, ...) -> !s32i>>>, ["fn", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // Should drop void return types.
+  // CHECK: cir.func @parse_explicit_void_func() {
+  cir.func @parse_explicit_void_func() -> !cir.void {
+    cir.return
+  }
+
+  // Should print/parse omitted void return type.
+  // CHECK: cir.func @parse_func_type_with_omitted_void() {
+  cir.func @parse_func_type_with_omitted_void() {
+    cir.return
+  }
+
+  // Should print/parse variadic no-proto functions.
+  // CHECK: cir.func no_proto private @no_proto(...) -> !s32i
+  cir.func no_proto private @no_proto(...) -> !s32i
+
+  // Should print/parse argument and result attributes.
+  // CHECK: cir.func @parse_arg_res_attrs(%arg0: !u8i {cir.zeroext}) -> (!u8i {cir.zeroext}) {
+  cir.func @parse_arg_res_attrs(%0: !u8i {cir.zeroext}) -> (!u8i {cir.zeroext}) {
+    cir.return %0 : !u8i
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/getmember.cir b/clang/test/CIR/Incubator/IR/getmember.cir
new file mode 100644
index 0000000000000..abec53ec169f4
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/getmember.cir
@@ -0,0 +1,14 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+
+!rec_Struct = !cir.record<struct "Struct" {!u16i, !u32i}>
+
+module  {
+  cir.func @shouldGetStructMember(%arg0 : !cir.ptr<!rec_Struct>) {
+    // CHECK: cir.get_member %arg0[1] {name = "test"} : !cir.ptr<!rec_Struct> -> !cir.ptr<!u32i>
+    %0 = cir.get_member %arg0[1] {name = "test"} : !cir.ptr<!rec_Struct> -> !cir.ptr<!u32i>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/global.cir b/clang/test/CIR/Incubator/IR/global.cir
new file mode 100644
index 0000000000000..50fdcddf1610c
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/global.cir
@@ -0,0 +1,113 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!rec_Init = !cir.record<class "Init" {!s8i} #cir.record.decl.ast>
+module {
+  cir.global external @a = #cir.int<3> : !s32i
+  cir.global external @rgb = #cir.const_array<[#cir.int<0> : !s8i, #cir.int<-23> : !s8i, #cir.int<33> : !s8i] : !cir.array<!s8i x 3>>
+  cir.global external @b = #cir.const_array<"example\00" : !cir.array<!s8i x 8>>
+  cir.global external @rgb2 = #cir.const_record<{#cir.int<0> : !s8i, #cir.int<5> : !s64i, #cir.ptr<null> : !cir.ptr<!s8i>}> : !cir.record<struct {!s8i, !s64i, !cir.ptr<!s8i>}>
+  cir.global "private" constant internal @".str" : !cir.array<!s8i x 8> {alignment = 1 : i64}
+  cir.global "private" internal @c : !s32i
+  cir.global "private" constant internal @".str.2" = #cir.const_array<"example\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+  cir.global external @s = #cir.global_view<@".str.2"> : !cir.ptr<!s8i>
+  cir.func @use_global() {
+    %0 = cir.get_global @a : !cir.ptr<!s32i>
+    cir.return
+  }
+  cir.global external @table = #cir.global_view<@s> : !cir.ptr<!s8i>
+  cir.global external @elt_ptr = #cir.global_view<@rgb, [1]> : !cir.ptr<!s8i>
+  cir.global external @table_of_ptrs = #cir.const_array<[#cir.global_view<@rgb, [1]> : !cir.ptr<!s8i>] : !cir.array<!cir.ptr<!s8i> x 1>>
+
+  // Note MLIR requires "private" for global declarations, should get
+  // rid of this somehow in favor of clarity?
+  cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<!s32i>
+  cir.global "private" constant external @type_info_A : !cir.ptr<!s8i>
+  cir.global constant external @type_info_name_B = #cir.const_array<"1B\00" : !cir.array<!s8i x 3>>
+
+  cir.global external @type_info_B = #cir.typeinfo<{
+    #cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2]> : !cir.ptr<!s8i>,
+    #cir.global_view<@type_info_name_B> : !cir.ptr<!s8i>,
+    #cir.global_view<@type_info_A> : !cir.ptr<!s8i>}>
+  : !cir.record<struct {!cir.ptr<!s8i>, !cir.ptr<!s8i>, !cir.ptr<!s8i>}>
+  cir.func private @_ZN4InitC1Eb(!cir.ptr<!rec_Init>, !s8i)
+  cir.func private @_ZN4InitD1Ev(!cir.ptr<!rec_Init>)
+  cir.global "private" internal @_ZL8__ioinit = ctor : !rec_Init {
+    %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!rec_Init>
+    %1 = cir.const #cir.int<3> : !s8i
+    cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!rec_Init>, !s8i) -> ()
+  } dtor {
+    %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!rec_Init>
+    cir.call @_ZN4InitD1Ev(%0) : (!cir.ptr<!rec_Init>) -> ()
+  }
+
+  cir.func @f31() global_ctor {
+    cir.return
+  }
+
+  cir.func @f32() global_ctor(777) {
+    cir.return
+  }
+
+  cir.func @f33() global_dtor {
+    cir.return
+  }
+
+  cir.func @f34() global_dtor(777) {
+    cir.return
+  }
+
+  cir.global external tls_dyn @model0 = #cir.int<0> : !s32i
+  cir.global external tls_local_dyn @model1 = #cir.int<0> : !s32i
+  cir.global external tls_init_exec @model2 = #cir.int<0> : !s32i
+  cir.global external tls_local_exec @model3 = #cir.int<0> : !s32i
+
+  cir.global "private" external tls_dyn @batata : !s32i
+  cir.func @f35() {
+    %0 = cir.get_global thread_local @batata : !cir.ptr<!s32i>
+    cir.return
+  }
+
+  cir.global external lang_address_space(offload_global) @addrspace1 = #cir.int<1> : !s32i
+  cir.global "private" internal lang_address_space(offload_local) @addrspace2 : !s32i
+  cir.global external target_address_space(1) @addrspace3 = #cir.int<3> : !s32i
+}
+
+// CHECK: cir.global external @a = #cir.int<3> : !s32i
+// CHECK: cir.global external @rgb = #cir.const_array<[#cir.int<0> : !s8i, #cir.int<-23> : !s8i, #cir.int<33> : !s8i]> : !cir.array<!s8i x 3>
+// CHECK: cir.global external @b = #cir.const_array<"example\00" : !cir.array<!s8i x 8>>
+// CHECK: cir.global "private" constant internal @".str" : !cir.array<!s8i x 8> {alignment = 1 : i64}
+// CHECK: cir.global "private" internal @c : !s32i
+// CHECK: cir.global "private" constant internal @".str.2" = #cir.const_array<"example\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+// CHECK: cir.global external @s = #cir.global_view<@".str.2"> : !cir.ptr<!s8i>
+
+
+// CHECK: cir.func @use_global()
+// CHECK-NEXT: %0 = cir.get_global @a : !cir.ptr<!s32i>
+
+// CHECK:      cir.global "private" internal @_ZL8__ioinit = ctor : !rec_Init {
+// CHECK-NEXT:  %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!rec_Init>
+// CHECK-NEXT:  %1 = cir.const #cir.int<3> : !s8i
+// CHECK-NEXT:  cir.call @_ZN4InitC1Eb(%0, %1) : (!cir.ptr<!rec_Init>, !s8i) -> ()
+// CHECK-NEXT: }
+
+// CHECK: cir.func @f31() global_ctor
+// CHECK: cir.func @f32() global_ctor(777)
+// CHECK: cir.func @f33() global_dtor
+// CHECK: cir.func @f34() global_dtor(777)
+
+// CHECK: cir.global external tls_dyn @model0 = #cir.int<0> : !s32i
+// CHECK: cir.global external tls_local_dyn @model1 = #cir.int<0> : !s32i
+// CHECK: cir.global external tls_init_exec @model2 = #cir.int<0> : !s32i
+// CHECK: cir.global external tls_local_exec @model3 = #cir.int<0> : !s32i
+
+// CHECK: cir.global "private" external tls_dyn @batata : !s32i
+// CHECK: cir.func @f35() {
+// CHECK:   %0 = cir.get_global thread_local @batata : !cir.ptr<!s32i>
+// CHECK:   cir.return
+// CHECK: }
+
+// CHECK: cir.global external lang_address_space(offload_global) @addrspace1 = #cir.int<1> : !s32i
+// CHECK: cir.global "private" internal lang_address_space(offload_local) @addrspace2 : !s32i
+// CHECK: cir.global external target_address_space(1) @addrspace3 = #cir.int<3> : !s32i
diff --git a/clang/test/CIR/Incubator/IR/indirect-br.cir b/clang/test/CIR/Incubator/IR/indirect-br.cir
new file mode 100644
index 0000000000000..84eac684be102
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/indirect-br.cir
@@ -0,0 +1,46 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!void = !cir.void
+
+cir.func @E() {
+  %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr", init] {alignment = 8 : i64}
+  %1 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr2", init] {alignment = 8 : i64}
+  %2 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr3", init] {alignment = 8 : i64}
+  %3 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr4", init] {alignment = 8 : i64}
+  %4 = cir.blockaddress <@E, "D"> -> !cir.ptr<!void>
+  cir.store align(8) %4, %0 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  %5 = cir.blockaddress <@E, "C"> -> !cir.ptr<!void>
+  cir.store align(8) %5, %1 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  %6 = cir.load align(8) %0 : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+  cir.br ^bb1(%6 : !cir.ptr<!void>)
+^bb1(%7: !cir.ptr<!void>):  // pred: ^bb0
+  cir.indirectbr %7 : <!void>, [
+  ^bb5,
+  ^bb4,
+  ^bb3,
+  ^bb2
+  ]
+^bb2:  // pred: ^bb1
+  cir.label "A"
+  cir.br ^bb3
+^bb3:  // 2 preds: ^bb1, ^bb2
+  cir.label "B"
+  %8 = cir.blockaddress <@E, "B"> -> !cir.ptr<!void>
+  cir.store align(8) %8, %2 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  %9 = cir.blockaddress <@E, "A"> -> !cir.ptr<!void>
+  cir.store align(8) %9, %3 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  cir.br ^bb4
+^bb4:  // 2 preds: ^bb1, ^bb3
+  cir.label "C"
+  cir.br ^bb5
+^bb5:  // 2 preds: ^bb1, ^bb4
+  cir.label "D"
+  cir.return
+}
+
+// CHECK: cir.indirectbr %7 : <!void>, [
+// CHECK:  ^bb5,
+// CHECK:  ^bb4,
+// CHECK:  ^bb3,
+// CHECK:  ^bb2
+// CHECK: ]
diff --git a/clang/test/CIR/Incubator/IR/inlineAttr.cir b/clang/test/CIR/Incubator/IR/inlineAttr.cir
new file mode 100644
index 0000000000000..2f6445a5c442d
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/inlineAttr.cir
@@ -0,0 +1,12 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s -check-prefix=CIR
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+
+module {
+  cir.func no_inline @l0() {
+    cir.return
+  }
+}
+
+// CIR: cir.func no_inline @l0()
+
+// MLIR: llvm.func @l0() attributes {cir.extra_attrs = #fn_attr, global_visibility = #cir<visibility default>, no_inline
diff --git a/clang/test/CIR/Incubator/IR/int.cir b/clang/test/CIR/Incubator/IR/int.cir
new file mode 100644
index 0000000000000..47fb0f4931c65
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/int.cir
@@ -0,0 +1,39 @@
+// module {
+//     cir.global external @a = #cir.int<255> : !cir.int<u, 8>
+// }
+
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!s8i = !cir.int<s, 8>
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+!u8i = !cir.int<u, 8>
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+
+cir.func @validIntTypesAndAttributes() -> () {
+
+    %1 = cir.const #cir.int<-128> : !cir.int<s, 8>
+    %2 = cir.const #cir.int<127> : !cir.int<s, 8>
+    %3 = cir.const #cir.int<255> : !cir.int<u, 8>
+
+    %4 = cir.const #cir.int<-32768> : !cir.int<s, 16>
+    %5 = cir.const #cir.int<32767> : !cir.int<s, 16>
+    %6 = cir.const #cir.int<65535> : !cir.int<u, 16>
+
+    %7 = cir.const #cir.int<-2147483648> : !cir.int<s, 32>
+    %8 = cir.const #cir.int<2147483647> : !cir.int<s, 32>
+    %9 = cir.const #cir.int<4294967295> : !cir.int<u, 32>
+
+    // FIXME: MLIR is emitting a "too large" error for this one. Not sure why.
+    // %10 = cir.const #cir.int<-9223372036854775808> : !cir.int<s, 64>
+    %11 = cir.const #cir.int<9223372036854775807> : !cir.int<s, 64>
+    %12 = cir.const #cir.int<18446744073709551615> : !cir.int<u, 64>
+
+    cir.return
+}
+
+// No need to check stuff. If it parses, it's fine.
+// CHECK: cir.func @validIntTypesAndAttributes()
diff --git a/clang/test/CIR/Incubator/IR/invalid-annotations.cir b/clang/test/CIR/Incubator/IR/invalid-annotations.cir
new file mode 100644
index 0000000000000..0c3b56f758390
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/invalid-annotations.cir
@@ -0,0 +1,32 @@
+// Test attempt to construct ill-formed global annotations
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+
+// expected-error @below {{invalid kind of attribute specified}}
+// expected-error @below {{failed to parse CIR_AnnotationAttr parameter 'name' which is to be a `mlir::StringAttr`}}
+cir.global external @a = #cir.ptr<null> : !cir.ptr<!cir.double> [#cir.annotation<name = 18, args = ["21", 12 : i32]>]
+
+// -----
+
+// expected-error @below {{GlobalAnnotationValuesAttr should at least have one annotation}}
+module attributes {cir.global_annotations = #cir<global_annotations []>} {}
+
+// -----
+
+// expected-error @below {{Element of GlobalAnnotationValuesAttr annotations array must be an array}}
+module attributes {cir.global_annotations = #cir<global_annotations ["2"]>} {}
+
+// -----
+
+// expected-error @below {{Element of GlobalAnnotationValuesAttr annotations array must be a 2-element array}}
+module attributes {cir.global_annotations = #cir<global_annotations [["2",1,2]]>} {}
+
+// -----
+
+// expected-error @below {{Element of GlobalAnnotationValuesAttr annotationsarray must start with a string}}
+module attributes {cir.global_annotations = #cir<global_annotations [[2,[1,2]]]>} {}
+
+// -----
+
+// expected-error @below {{The second element of GlobalAnnotationValuesAttrannotations array element must be of type AnnotationValueAttr}}
+module attributes {cir.global_annotations = #cir<global_annotations [["2",[1,2]]]>} {}
diff --git a/clang/test/CIR/Incubator/IR/invalid-block-address.cir b/clang/test/CIR/Incubator/IR/invalid-block-address.cir
new file mode 100644
index 0000000000000..c3545406bfcff
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/invalid-block-address.cir
@@ -0,0 +1,21 @@
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+!void = !cir.void
+
+// expected-error at +1 {{expects an existing label target in the referenced function}}
+cir.func @bad_block_address() -> () {
+    %0 = cir.blockaddress <@bad_block_address, "label"> -> !cir.ptr<!void>
+    cir.br ^bb1
+  ^bb1:
+    cir.label "wrong_label"
+    cir.return
+}
+
+// expected-error at +1 {{blockaddress references a different function}}
+cir.func @bad_block_func() -> () {
+    %0 = cir.blockaddress <@mismatch_func, "label"> -> !cir.ptr<!void>
+    cir.br ^bb1
+  ^bb1:
+    cir.label "label"
+    cir.return
+}
diff --git a/clang/test/CIR/Incubator/IR/invalid-complex.cir b/clang/test/CIR/Incubator/IR/invalid-complex.cir
new file mode 100644
index 0000000000000..0716eeb0f0975
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/invalid-complex.cir
@@ -0,0 +1,23 @@
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+module {
+  cir.func @complex_real_invalid_result_type() -> !cir.double {
+    %0 = cir.alloca !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>, ["c"]
+    %2 = cir.load align(8) %0 : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+    // expected-error @below {{op : result type does not match operand type}}
+    %3 = cir.complex.real %2 : !cir.complex<!cir.double> -> !cir.float
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  cir.func @complex_imag_invalid_result_type() -> !cir.double {
+    %0 = cir.alloca !cir.complex<!cir.double>, !cir.ptr<!cir.complex<!cir.double>>, ["c"]
+    %2 = cir.load align(8) %0 : !cir.ptr<!cir.complex<!cir.double>>, !cir.complex<!cir.double>
+    // expected-error @below {{op : result type does not match operand type}}
+    %3 = cir.complex.imag %2 : !cir.complex<!cir.double> -> !cir.float
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/invalid-opencl-vec-type-hint.cir b/clang/test/CIR/Incubator/IR/invalid-opencl-vec-type-hint.cir
new file mode 100644
index 0000000000000..9e57ad793bf89
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/invalid-opencl-vec-type-hint.cir
@@ -0,0 +1,7 @@
+// RUN: cir-opt %s -verify-diagnostics -allow-unregistered-dialect
+
+// expected-error at +1 {{vec_type_hint must be a type from the CIR or LLVM dialect}}
+#fn_attr = #cir.cl.kernel_metadata<
+  vec_type_hint = !tensor<7xi8>,
+  vec_type_hint_signedness = 0
+>
diff --git a/clang/test/CIR/Incubator/IR/invalid-type-info.cir b/clang/test/CIR/Incubator/IR/invalid-type-info.cir
new file mode 100644
index 0000000000000..4d4726bdac002
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/invalid-type-info.cir
@@ -0,0 +1,17 @@
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+!u8i = !cir.int<u, 8>
+
+!rec_anon_struct = !cir.record<struct  {!cir.ptr<!u8i>, !cir.ptr<!u8i>, !cir.ptr<!u8i>}>
+
+// expected-error @below {{expected !cir.record type}}
+cir.global constant external @type_info = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS1B> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI1A> : !cir.ptr<!u8i>}> : !u8i
+
+// -----
+
+!u8i = !cir.int<u, 8>
+
+!rec_anon_struct = !cir.record<struct  {!u8i, !u8i, !u8i}>
+
+// expected-error @below {{integer or global view array attribute}}
+cir.global constant external @type_info = #cir.typeinfo<{ #cir.undef : !u8i, #cir.int<1> : !u8i, #cir.int<1> : !u8i}> : !rec_anon_struct
diff --git a/clang/test/CIR/Incubator/IR/invalid-vector-shuffle-wrong-index.cir b/clang/test/CIR/Incubator/IR/invalid-vector-shuffle-wrong-index.cir
new file mode 100644
index 0000000000000..9e37a3a593e82
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/invalid-vector-shuffle-wrong-index.cir
@@ -0,0 +1,16 @@
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module  {
+  cir.func @fold_shuffle_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+
+    // expected-error @below {{index for __builtin_shufflevector must be less than the total number of vector elements}}
+    %new_vec = cir.vec.shuffle(%vec_1, %vec_2 : !cir.vector<!s32i x 4>) [#cir.int<9> : !s64i, #cir.int<4> : !s64i,
+      #cir.int<1> : !s64i, #cir.int<5> : !s64i] : !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/invalid-vector-zero-size.cir b/clang/test/CIR/Incubator/IR/invalid-vector-zero-size.cir
new file mode 100644
index 0000000000000..d7472f28fad8a
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/invalid-vector-zero-size.cir
@@ -0,0 +1,10 @@
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+!s32i = !cir.int<s, 32>
+
+module  {
+
+// expected-error @below {{the number of vector elements must be non-zero}}
+cir.global external @vec_a = #cir.zero : !cir.vector<!s32i x 0>
+
+}
diff --git a/clang/test/CIR/Incubator/IR/invalid-vector.cir b/clang/test/CIR/Incubator/IR/invalid-vector.cir
new file mode 100644
index 0000000000000..43a1d1cb5dd86
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/invalid-vector.cir
@@ -0,0 +1,10 @@
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+!s32i = !cir.int<s, 32>
+
+module  {
+
+// expected-error @below {{failed to verify 'elementType'}}
+cir.global external @vec_b = #cir.zero : !cir.vector<!cir.array<!s32i x 10> x 4>
+
+}
diff --git a/clang/test/CIR/Incubator/IR/invalid.cir b/clang/test/CIR/Incubator/IR/invalid.cir
new file mode 100644
index 0000000000000..e63e74cc17176
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/invalid.cir
@@ -0,0 +1,1664 @@
+// Test attempts to build bogus CIR
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+!u32i = !cir.int<u, 32>
+
+cir.func @p0() {
+  // expected-error @below {{invalid kind of type specified}}
+  %1 = cir.const #cir.ptr<null> : !u32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+// expected-error at +2 {{invalid kind of type specified}}
+cir.func @b0() {
+  %1 = cir.const #cir.bool<true> : !u32i
+  cir.return
+}
+
+// -----
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+!u32i = !cir.int<u, 32>
+cir.func @if0() {
+  %0 = cir.const #true
+  // expected-error at +1 {{'cir.if' op region control flow edge from Operation cir.yield to parent results: source has 1 operands, but target successor <to parent> needs 0}}
+  cir.if %0 {
+    %6 = cir.const #cir.int<3> : !u32i
+    cir.yield %6 : !u32i
+  }
+  cir.return
+}
+
+// -----
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+cir.func @yield0() {
+  %0 = cir.const #true
+  cir.if %0 { // expected-error {{custom op 'cir.if' multi-block region must not omit terminator}}
+    cir.br ^a
+  ^a:
+  }
+  cir.return
+}
+
+// -----
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+cir.func @yieldbreak() {
+  %0 = cir.const #true
+  cir.if %0 {
+    cir.break // expected-error {{op must be within a breakable operation}}
+  }
+  cir.return
+}
+
+// -----
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+cir.func @yieldcontinue() {
+  %0 = cir.const #true
+  cir.if %0 {
+    cir.continue // expected-error {{op must be within a loop}}
+  }
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @s0() {
+  %1 = cir.const #cir.int<2> : !s32i
+  cir.switch (%1 : !s32i) {
+    cir.case (equal, [#cir.int<5> : !s32i]) {
+      %2 = cir.const #cir.int<3> : !s32i // expected-error {{block with no terminator}}
+    }
+  }
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @s1() {
+  %1 = cir.const #cir.int<2> : !s32i
+  cir.switch (%1 : !s32i) {
+    cir.case (equal, [#cir.int<5> : !s32i]) { // expected-error {{block with no terminator}}
+    }
+  }
+  cir.return
+}
+
+// -----
+
+cir.func @badstride(%x: !cir.ptr<!cir.int<s, 32>>) {
+  %idx = cir.const #cir.int<2> : !cir.int<s, 32>
+  %4 = cir.ptr_stride %x, %idx : (!cir.ptr<!cir.int<s, 32>>, !cir.int<s, 32>) -> !cir.ptr<!cir.float> // expected-error {{op failed to verify that all of {base, result} have same type}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast0(%arg0: !u32i) {
+  %1 = cir.cast int_to_bool %arg0 : !u32i -> !u32i // expected-error {{requires !cir.bool type for result}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast1(%arg1: !cir.float) {
+  %1 = cir.cast int_to_bool %arg1 : !cir.float -> !cir.bool // expected-error {{requires !cir.int type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast2(%p: !cir.ptr<!u32i>) {
+  %2 = cir.cast array_to_ptrdecay %p : !cir.ptr<!u32i> -> !cir.ptr<!u32i> // expected-error {{requires !cir.array pointee}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast3(%p: !cir.ptr<!u32i>) {
+  %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
+  %2 = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!u32i x 10>> -> !cir.ptr<!cir.float> // expected-error {{requires same type for array element and pointee result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast4(%p: !cir.ptr<!u32i>) {
+  %2 = cir.cast bitcast %p : !cir.ptr<!u32i> -> !u32i // expected-error {{requires !cir.ptr or !cir.vector type for source and result}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast5(%p: !cir.float) {
+  %2 = cir.cast bool_to_float %p : !cir.float -> !cir.float // expected-error {{requires !cir.bool type for source}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast6(%p: !cir.bool) {
+  %2 = cir.cast bool_to_float %p : !cir.bool -> !cir.int<u, 32> // expected-error {{requires !cir.float type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast7(%p: !cir.ptr<!u32i>) {
+  %2 = cir.cast ptr_to_bool %p : !cir.ptr<!u32i> -> !u32i // expected-error {{requires !cir.bool type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast8(%p: !u32i) {
+  %2 = cir.cast ptr_to_bool %p : !u32i -> !cir.bool // expected-error {{requires !cir.ptr type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast9(%p : !u32i) {
+  %2 = cir.cast integral %p : !u32i -> !cir.float // expected-error {{requires !cir.int type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast10(%p : !cir.float) {
+  %2 = cir.cast integral %p : !cir.float -> !u32i // expected-error {{requires !cir.int type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast11(%p : !cir.float) {
+  %2 = cir.cast floating %p : !cir.float -> !u32i // expected-error {{requires !cir.float type for source and result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast12(%p : !u32i) {
+  %2 = cir.cast floating %p : !u32i -> !cir.float // expected-error {{requires !cir.float type for source and result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast13(%p : !u32i) {
+  %2 = cir.cast float_to_int %p : !u32i -> !u32i // expected-error {{requires !cir.float type for source}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast14(%p : !cir.float) {
+  %2 = cir.cast float_to_int %p : !cir.float -> !cir.float // expected-error {{requires !cir.int type for result}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast15(%p : !cir.ptr<!u64i>) {
+  %2 = cir.cast int_to_ptr %p : !cir.ptr<!u64i> -> !cir.ptr<!u64i> // expected-error {{requires !cir.int type for source}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast16(%p : !u64i) {
+  %2 = cir.cast int_to_ptr %p : !u64i -> !u64i // expected-error {{requires !cir.ptr type for result}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast17(%p : !u64i) {
+  %2 = cir.cast ptr_to_int %p : !u64i -> !u64i // expected-error {{requires !cir.ptr type for source}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast18(%p : !cir.ptr<!u64i>) {
+  %2 = cir.cast ptr_to_int %p : !cir.ptr<!u64i> -> !cir.ptr<!u64i> // expected-error {{requires !cir.int type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast19(%p : !u32i) {
+  %2 = cir.cast float_to_bool %p : !u32i -> !cir.bool // expected-error {{requires !cir.float type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast20(%p : !cir.float) {
+  %2 = cir.cast float_to_bool %p : !cir.float -> !u32i // expected-error {{requires !cir.bool type for result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast21(%p : !u32i) {
+  %2 = cir.cast bool_to_int %p : !u32i -> !u32i // expected-error {{requires !cir.bool type for source}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast22(%p : !cir.bool) {
+  %2 = cir.cast bool_to_int %p : !cir.bool -> !cir.float // expected-error {{requires !cir.int type for result}}
+  cir.return
+}
+
+// -----
+
+cir.func @cast23(%p : !cir.bool) {
+  %2 = cir.cast int_to_float %p : !cir.bool -> !cir.float // expected-error {{requires !cir.int type for source}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+cir.func @cast24(%p : !u32i) {
+  %2 = cir.cast int_to_float %p : !u32i -> !cir.bool // expected-error {{requires !cir.float type for result}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast26(%p : !cir.ptr<!u64i, target_address_space(1)>) {
+  %0 = cir.cast address_space %p : !cir.ptr<!u64i, target_address_space(1)> -> !u64i // expected-error {{requires !cir.ptr type for source and result}}
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+cir.func @cast27(%p : !u64i) {
+  %0 = cir.cast address_space %p : !u64i -> !cir.ptr<!u64i, target_address_space(1)> // expected-error {{requires !cir.ptr type for source and result}}
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+module {
+  // FIXME: The check for equality of the array element type is currently disabled due to https://github.com/llvm/clangir/pull/1007
+  // expected error {{constant array element should match array element type}}
+  cir.global external @a = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<23> : !u8i, #cir.int<33> : !u8i] : !cir.array<!u32i x 3>>
+}
+
+// -----
+
+!u8i = !cir.int<u, 8>
+module {
+  // expected-error at +1 {{constant array size should match type size}}
+  cir.global external @a = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<23> : !u8i, #cir.int<33> : !u8i] : !cir.array<!u8i x 4>>
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  // expected-error at +1 {{constant array element for string literals expects !cir.int<u, 8> element type}}
+  cir.global external @b = #cir.const_array<"example\00" : !cir.array<!u32i x 8>>
+}
+
+// -----
+
+module {
+  // expected-error at +1 {{expected type declaration for string literal}}
+  cir.global "private" constant external @".str.2" = #cir.const_array<"example\00"> {alignment = 1 : i64}
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  // expected-error at +1 {{expected string or keyword containing one of the following enum values for attribute 'linkage' [external, available_externally, linkonce, linkonce_odr, weak, weak_odr, appending, internal, cir_private, extern_weak, common]}}
+  cir.global @a = #cir.const_array<[0 : !u8i, -23 : !u8i, 33 : !u8i] : !cir.array<!u32i x 3>>
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.global "private" external @v = #cir.int<3> : !u32i // expected-error {{private visibility not allowed with 'external' linkage}}
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.global "public" internal @v = #cir.int<3> : !u32i // expected-error {{public visibility not allowed with 'internal' linkage}}
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.global external @v = #cir.zero : !u32i // expected-error {{zero expects record or array type}}
+}
+
+// -----
+
+module {
+  cir.global external @v = #cir.undef : !cir.void // expected-error {{undef expects non-void type}}
+}
+
+// -----
+
+module {
+  cir.global external @v = #cir.poison : !cir.void // expected-error {{poison expects non-void type}}
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_op_size() {
+  %0 = cir.const #cir.int<1> : !s32i
+  %1 = cir.vec.create(%0 : !s32i) : !cir.vector<!s32i x 2> // expected-error {{'cir.vec.create' op operand count of 1 doesn't match vector type '!cir.vector<!cir.int<s, 32> x 2>' element count of 2}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+cir.func @vec_op_type() {
+  %0 = cir.const #cir.int<1> : !s32i
+  %1 = cir.const #cir.int<2> : !u32i
+  %2 = cir.vec.create(%0, %1 : !s32i, !u32i) : !cir.vector<!s32i x 2> // expected-error {{'cir.vec.create' op operand type '!cir.int<u, 32>' doesn't match vector element type '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_extract_non_int_idx() {
+  %0 = cir.const 1.5e+00 : f64
+  %1 = cir.const #cir.int<0> : !s32i
+  %2 = cir.vec.create(%1, %1 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %3 = cir.vec.extract %2[%0 : f64] : !cir.vector<!s32i x 2> // expected-error {{expected '<'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+cir.func @vec_extract_bad_type() {
+  %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+  %1 = cir.const #cir.int<0> : !s32i
+  %2 = cir.vec.create(%1, %1 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %3 = cir.vec.extract %2[%1 : !s32i] : !cir.vector<!s32i x 2> // expected-note {{prior use here}}
+  cir.store %3, %0 : !u32i, !cir.ptr<!u32i> // expected-error {{use of value '%3' expects different type than prior uses: '!cir.int<u, 32>' vs '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_extract_non_vector() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.extract %0[%0 : !s32i] : !s32i // expected-error {{custom op 'cir.vec.extract' 'vec' must be CIR vector type, but got '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+cir.func @vec_insert_bad_type() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.create(%0, %0 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %2 = cir.const #cir.int<0> : !u32i // expected-note {{prior use here}}
+  %3 = cir.vec.insert %2, %1[%0 : !s32i] : !cir.vector<!s32i x 2> // expected-error {{use of value '%2' expects different type than prior uses: '!cir.int<s, 32>' vs '!cir.int<u, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_insert_non_vector() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.insert %0, %0[%0 : !s32i] : !s32i // expected-error {{custom op 'cir.vec.insert' 'vec' must be CIR vector type, but got '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_ternary_non_vector1() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.create(%0, %0 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %2 = cir.vec.ternary(%0, %1, %1) : !s32i, !cir.vector<!s32i x 2> // expected-error {{'cir.vec.ternary' op operand #0 must be vector of integer type, but got '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_ternary_non_vector2() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.create(%0, %0 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %2 = cir.vec.ternary(%1, %0, %0) : !cir.vector<!s32i x 2>, !s32i // expected-error {{'cir.vec.ternary' op operand #1 must be CIR vector type, but got '!cir.int<s, 32>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_ternary_different_size() {
+  %0 = cir.const #cir.int<0> : !s32i
+  %1 = cir.vec.create(%0, %0 : !s32i, !s32i) : !cir.vector<!s32i x 2>
+  %2 = cir.vec.create(%0, %0, %0, %0 : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+  %3 = cir.vec.ternary(%1, %2, %2) : !cir.vector<!s32i x 2>, !cir.vector<!s32i x 4> // expected-error {{'cir.vec.ternary' op : the number of elements in '!cir.vector<!cir.int<s, 32> x 2>' and '!cir.vector<!cir.int<s, 32> x 4>' don't match}}
+  cir.return
+}
+
+// -----
+
+cir.func @vec_ternary_not_int(%p : !cir.float) {
+  %0 = cir.vec.create(%p, %p : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.ternary(%0, %0, %0) : !cir.vector<!cir.float x 2>, !cir.vector<!cir.float x 2> // expected-error {{'cir.vec.ternary' op operand #0 must be vector of integer type, but got '!cir.vector<!cir.float x 2>'}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_shuffle_mismatch_args(%f : !cir.float, %n : !s32i) {
+  %0 = cir.vec.create(%f, %f : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.create(%n, %n : !s32i, !s32i) : !cir.vector<!s32i x 2> // expected-note {{prior use here}}
+  %2 = cir.vec.shuffle(%0, %1 : !cir.vector<!cir.float x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 2> // expected-error {{use of value '%1' expects different type than prior uses: '!cir.vector<!cir.float x 2>' vs '!cir.vector<!cir.int<s, 32> x 2>}}
+  cir.return
+}
+
+// -----
+
+cir.func @vec_shuffle_non_ints(%f : !cir.float) {
+  %0 = cir.vec.create(%f, %f : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.shuffle(%0, %0 : !cir.vector<!cir.float x 2>) [#cir.fp<1.000000e+00> : !cir.float, #cir.fp<1.000000e+00> : !cir.float] : !cir.vector<!cir.float x 2> // expected-error {{'cir.vec.shuffle' op all index values must be integers}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_shuffle_result_size(%f : !cir.float) {
+  %0 = cir.vec.create(%f, %f : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.shuffle(%0, %0 : !cir.vector<!cir.float x 2>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.float x 4> // expected-error {{'cir.vec.shuffle' op : the number of elements in [#cir.int<1> : !cir.int<s, 32>, #cir.int<1> : !cir.int<s, 32>] and '!cir.vector<!cir.float x 4>' don't match}}
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @vec_shuffle_result_element(%f : !cir.float) {
+  %0 = cir.vec.create(%f, %f : !cir.float, !cir.float) : !cir.vector<!cir.float x 2>
+  %1 = cir.vec.shuffle(%0, %0 : !cir.vector<!cir.float x 2>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!s32i x 2> // expected-error {{'cir.vec.shuffle' op : element types of '!cir.vector<!cir.float x 2>' and '!cir.vector<!cir.int<s, 32> x 2>' don't match}}
+  cir.return
+}
+
+// -----
+
+cir.func coroutine @bad_task() { // expected-error {{coroutine body must use at least one cir.await op}}
+  cir.return
+}
+
+// -----
+
+cir.func coroutine @missing_condition() {
+  cir.scope {
+    cir.await(user, ready : { // expected-error {{ready region must end with cir.condition}}
+      cir.yield
+    }, suspend : {
+      cir.yield
+    }, resume : {
+      cir.yield
+    },)
+  }
+  cir.return
+}
+
+// -----
+
+!u8i = !cir.int<u, 8>
+!u32i = !cir.int<u, 32>
+module {
+  // Note MLIR requires "private" for global declarations, should get
+  // rid of this somehow in favor of clarity?
+  cir.global "private" external @_ZTVN10__cxxabiv120__si_class_type_infoE : !cir.ptr<!u32i>
+
+  // expected-error at +1 {{element at index 0 has type '!cir.ptr<!cir.int<u, 8>>' but return type for this element is '!cir.ptr<!cir.int<u, 32>>'}}
+  cir.global external @type_info_B = #cir.typeinfo<{
+    #cir.global_view<@_ZTVN10__cxxabiv120__si_class_type_infoE, [2]> : !cir.ptr<!u8i>}>
+    : !cir.record<struct {!cir.ptr<!u32i>}>
+}
+
+// -----
+
+module {
+  cir.func @l0() {
+    cir.return
+  }
+
+  cir.func @l1() alias(@l0) { // expected-error {{function alias shall not have a body}}
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  // expected-error at below {{expected 's' or 'u'}}
+  cir.func @l0(%arg0: !cir.int<x, 32>) -> () {
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  // expected-error at below {{expected integer width to be from 1 up to 128}}
+  cir.func @l0(%arg0: !cir.int<s, 256>) -> () {
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  // expected-error at below {{integer value too large for the given type}}
+  cir.global external @a = #cir.int<256> : !cir.int<u, 8>
+}
+
+// -----
+
+module {
+  // expected-error at below {{integer value too large for the given type}}
+  cir.global external @b = #cir.int<-129> : !cir.int<s, 8>
+}
+
+// -----
+
+module {
+  // expected-error at +1 {{custom op 'cir.func' variadic arguments must be in the end of the argument list}}
+  cir.func @variadic(..., !cir.int<s, 32>) -> !cir.int<s, 32>
+}
+
+// -----
+
+module {
+  // expected-error at +1 {{functions only supports zero or one results}}
+  cir.func @variadic() -> (!cir.int<s, 32>, !cir.int<s, 32>)
+}
+
+// -----
+
+module {
+  cir.func private @variadic(!cir.int<s, 32>, !cir.int<s, 32>, ...) -> !cir.int<s, 32>
+  cir.func @call_variadic(%0: !cir.int<s, 32>) -> !cir.int<s, 32> {
+    // expected-error at +1 {{'cir.call' op too few operands for callee}}
+    %1 = cir.call @variadic(%0) : (!cir.int<s, 32>) -> !cir.int<s, 32>
+    cir.return %1 : !cir.int<s, 32>
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+cir.func @test_br() -> !s32i {
+    %0 = cir.const #cir.int<0>: !s32i
+    // expected-error at below {{branch has 1 operands for successor #0, but target block has 0}}
+    cir.br ^bb1(%0 : !s32i)
+  ^bb1:
+    cir.return %0 : !s32i
+}
+
+// -----
+
+module {
+  cir.func private @test() -> !cir.void
+  cir.func @invalid_call() {
+    // expected-error at +1 {{'cir.call' op callee returns void but call has results}}
+    %1 = cir.call @test() : () -> (!cir.int<s, 32>)
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  cir.func private @test() -> !cir.int<u, 8>
+  cir.func @invalid_call() {
+    // expected-error at +1 {{'cir.call' op result type mismatch: expected '!cir.int<u, 8>', but provided '!cir.int<s, 32>'}}
+    %1 = cir.call @test() : () -> (!cir.int<s, 32>)
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  cir.func @invalid_return_type(%0 : !cir.int<u, 64>) -> !cir.int<s, 32> {
+    // expected-error at +1 {{'cir.return' op returns '!cir.int<u, 64>' but enclosing function returns '!cir.int<s, 32>'}}
+    cir.return %0 : !cir.int<u, 64>
+  }
+}
+
+// -----
+
+// expected-error at below {{expected ::cir::SourceLanguage to be one of: c, cxx, opencl_c}}
+// expected-error at below {{failed to parse CIR_SourceLanguageAttr parameter 'value'}}
+module attributes {cir.lang = #cir.lang<dummy>} { }
+
+// -----
+
+module {
+  // Should not copy types with no data layout (unkonwn byte size).
+  cir.func @invalid_copy(%arg0 : !cir.ptr<!cir.void>, %arg1 : !cir.ptr<!cir.void>) {
+    // expected-error at +1 {{missing data layout for pointee type}}
+    cir.copy %arg0 to %arg1 : !cir.ptr<!cir.void>
+    cir.return
+  }
+}
+
+// -----
+
+module {
+  // Should not copy to same address.
+  cir.func @invalid_copy(%arg0 : !cir.ptr<!cir.int<s, 8>>) {
+    // expected-error at +1 {{source and destination are the same}}
+    cir.copy %arg0 to %arg0 : !cir.ptr<!cir.int<s, 8>>
+    cir.return
+  }
+}
+
+// -----
+
+!s8i = !cir.int<s, 8>
+module {
+  // Should not memcpy with invalid length type.
+  cir.func @invalid_memcpy_len(%arg0 : !cir.ptr<!cir.void>, %arg1 : !s8i) {
+    // expected-error at +1 {{'cir.libc.memcpy' op operand #2 must be fundamental unsigned integer type, but got '!cir.int<s, 8>'}}
+    cir.libc.memcpy %arg1 bytes from %arg0 to %arg0 : !s8i, !cir.ptr<!cir.void> -> !cir.ptr<!cir.void>
+    cir.return
+  }
+}
+
+// -----
+
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+module {
+  // Should not memcpy non-void pointers.
+  cir.func @invalid_memcpy_pointer_0(%arg0 : !cir.ptr<!s8i>, %arg1 : !u32i) {
+    // expected-error at +1 {{'cir.libc.memcpy' op operand #0 must be pointer to void type, but got '!cir.ptr<!cir.int<s, 8>>'}}
+    cir.libc.memcpy %arg1 bytes from %arg0 to %arg0 : !u32i, !cir.ptr<!s8i> -> !cir.ptr<!s8i>
+    cir.return
+  }
+}
+
+// -----
+
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+module {
+  // Should not memcpy non-void pointers.
+  cir.func @invalid_memcpy_pointer_1(%arg0 : !cir.ptr<!cir.void>, %arg1 : !cir.ptr<!s8i>, %arg2 : !u32i) {
+    // expected-error at +1 {{'cir.libc.memcpy' op operand #1 must be pointer to void type, but got '!cir.ptr<!cir.int<s, 8>>'}}
+    cir.libc.memcpy %arg2 bytes from %arg1 to %arg0 : !u32i, !cir.ptr<!s8i> -> !cir.ptr<!cir.void>
+    cir.return
+  }
+}
+
+// -----
+!s8i = !cir.int<s, 8>
+!rec_Init = !cir.record<class "Init" {!s8i} #cir.record.decl.ast>
+module {
+  cir.global "private" internal @_ZL8__ioinit = ctor : !rec_Init {
+  }
+  // expected-error at +1 {{custom op 'cir.global' ctor region must have exactly one block}}
+}
+
+// -----
+!s8i = !cir.int<s, 8>
+#true = #cir.bool<true> : !cir.bool
+!rec_Init = !cir.record<class "Init" {!s8i} #cir.record.decl.ast>
+module {
+  cir.func private @_ZN4InitC1Eb(!cir.ptr<!rec_Init>)
+  cir.global "private" internal @_ZL8__ioinit = ctor : !rec_Init {
+    %0 = cir.get_global @_ZL8__ioinit : !cir.ptr<!rec_Init>
+    cir.call @_ZN4InitC1Eb(%0) : (!cir.ptr<!rec_Init>) -> ()
+  } dtor {}
+  // expected-error at +1 {{custom op 'cir.global' dtor region must have exactly one block}}
+}
+
+// -----
+!s32i = !cir.int<s, 32>
+!u8i = !cir.int<u, 8>
+module {
+  cir.global "private" constant internal @".str" = #cir.const_array<"Division by zero condition!\00" : !cir.array<!u8i x 28>> : !cir.array<!u8i x 28> {alignment = 1 : i64}
+  cir.global "private" constant external @_ZTIPKc : !cir.ptr<!u8i>
+  cir.func @_Z8divisionii() {
+    %11 = cir.alloc.exception 8 -> !cir.ptr<!cir.ptr<!u8i>>
+    %12 = cir.get_global @".str" : !cir.ptr<!cir.array<!u8i x 28>>
+    %13 = cir.cast array_to_ptrdecay %12 : !cir.ptr<!cir.array<!u8i x 28>> -> !cir.ptr<!u8i>
+    cir.store %13, %11 : !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>
+    cir.throw %11 : !cir.ptr<!cir.ptr<!u8i>> // expected-error {{'type_info' symbol attribute missing}}
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct = !cir.record<struct "Struct" {!u16i, !u32i}>
+module {
+  cir.func @memeber_index_out_of_bounds(%arg0 : !cir.ptr<!struct>) {
+    // expected-error at +1 {{member index out of bounds}}
+    %0 = cir.get_member %arg0[2] {name = "test"} : !cir.ptr<!struct> -> !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct = !cir.record<struct "Struct" {!u16i, !u32i}>
+module {
+  cir.func @memeber_type_mismatch(%arg0 : !cir.ptr<!struct>) {
+    // expected-error at +1 {{member type mismatch}}
+    %0 = cir.get_member %arg0[0] {name = "test"} : !cir.ptr<!struct> -> !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+// expected-error at +1 {{anonymous records must be complete}}
+!struct = !cir.record<struct incomplete>
+
+// -----
+
+!u16i = !cir.int<u, 16>
+// expected-error at +1 {{identified records cannot have an empty name}}
+!struct = !cir.record<struct "" incomplete>
+
+// -----
+
+// expected-error at +1 {{invalid self-reference within record}}
+!struct = !cir.record<struct {!cir.record<struct "SelfReference">}>
+
+// -----
+
+// expected-error at +1 {{record already defined}}
+!struct = !cir.record<struct "SelfReference" {!cir.record<struct "SelfReference" {}>}>
+
+// -----
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @tmp(%arg0: !cir.float) {
+    // expected-error at +1 {{operand #0 must be fundamental integer type}}
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, %arg0 : !cir.float, ["tmp"]
+    cir.return
+  }
+}
+
+// -----
+
+!u8i = !cir.int<u, 8>
+module {
+  cir.func @stack_save_type_mismatch() {
+    // expected-error at +1 {{must be CIR pointer type}}
+    %1 = cir.stack_save : !u8i
+    cir.return
+  }
+}
+// -----
+
+!u8i = !cir.int<u, 8>
+module {
+  cir.func @stack_restore_type_mismatch(%arg0 : !u8i) {
+    // expected-error at +1 {{must be CIR pointer type}}
+    cir.stack_restore %arg0 : !u8i
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+
+// expected-error at +1 {{invalid kind of type specified}}
+#invalid_type = #cir.data_member<0> : !u16i
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct1 = !cir.record<struct "Struct1" {!u16i, !u32i}>
+
+// expected-error at +1 {{member type of a #cir.data_member attribute must match the attribute type}}
+#invalid_member_ty = #cir.data_member<0> : !cir.data_member<!u32i in !struct1>
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct1 = !cir.record<struct "Struct1" {!u16i, !u32i}>
+
+module {
+  cir.func @invalid_base_type(%arg0 : !cir.data_member<!u32i in !struct1>) {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["tmp"] {alignment = 4 : i64}
+    // expected-error at +1 {{'cir.get_runtime_member' op operand #0 must be pointer to record type}}
+    %1 = cir.get_runtime_member %0[%arg0 : !cir.data_member<!u32i in !struct1>] : !cir.ptr<!u32i> -> !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct1 = !cir.record<struct "Struct1" {!u16i, !u32i}>
+!struct2 = !cir.record<struct "Struct2" {!u16i, !u32i}>
+
+module {
+  cir.func @invalid_base_type(%arg0 : !cir.data_member<!u32i in !struct1>) {
+    %0 = cir.alloca !struct2, !cir.ptr<!struct2>, ["tmp"] {alignment = 4 : i64}
+    // expected-error at +1 {{record type does not match the member pointer type}}
+    %1 = cir.get_runtime_member %0[%arg0 : !cir.data_member<!u32i in !struct1>] : !cir.ptr<!struct2> -> !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!struct1 = !cir.record<struct "Struct1" {!u16i, !u32i}>
+
+module {
+  cir.func @invalid_base_type(%arg0 : !cir.data_member<!u32i in !struct1>) {
+    %0 = cir.alloca !struct1, !cir.ptr<!struct1>, ["tmp"] {alignment = 4 : i64}
+    // expected-error at +1 {{result type does not match the member pointer type}}
+    %1 = cir.get_runtime_member %0[%arg0 : !cir.data_member<!u32i in !struct1>] : !cir.ptr<!struct1> -> !cir.ptr<!u16i>
+    cir.return
+  }
+}
+
+// -----
+
+!u16i = !cir.int<u, 16>
+!incomplete_struct = !cir.record<struct "Incomplete" incomplete>
+
+// expected-error at +1 {{incomplete 'cir.record' cannot be used to build a non-null data member pointer}}
+#incomplete_cls_member = #cir.data_member<0> : !cir.data_member<!u16i in !incomplete_struct>
+
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+cir.func @clrsb_invalid_input_ty(%arg0 : !u32i) -> () {
+  // expected-error at +1 {{'cir.clrsb' op operand #0 must be signed integer type of widths 32/64, but got '!cir.int<u, 32>'}}
+  %0 = cir.clrsb %arg0 : !u32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+cir.func @clrsb_invalid_result_ty(%arg0 : !s32i) -> () {  // expected-note {{prior use here}}
+  // expected-error at +1 {{use of value '%arg0' expects different type than prior uses: '!cir.int<u, 32>' vs '!cir.int<s, 32>'}}
+  %0 = cir.clrsb %arg0 : !u32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @clz_invalid_input_ty(%arg0 : !s32i) -> () {
+  // expected-error at +1 {{'cir.clz' op operand #0 must be unsigned integer type of widths 16/32/64, but got '!cir.int<s, 32>'}}
+  %0 = cir.clz %arg0 zero_poison : !s32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+!s32i = !cir.int<s, 32>
+
+cir.func @clz_invalid_result_ty(%arg0 : !u32i) -> () {  // expected-note {{prior use here}}
+  // expected-error at +1 {{use of value '%arg0' expects different type than prior uses: '!cir.int<s, 32>' vs '!cir.int<u, 32>'}}
+  %0 = cir.clz %arg0 : !s32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @ctz_invalid_input_ty(%arg0 : !s32i) -> () {
+  // expected-error at +1 {{'cir.ctz' op operand #0 must be unsigned integer type of widths 16/32/64, but got '!cir.int<s, 32>'}}
+  %0 = cir.ctz %arg0 zero_poison : !s32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+!s32i = !cir.int<s, 32>
+
+cir.func @ctz_invalid_result_ty(%arg0 : !u32i) -> () {  // expected-note {{prior use here}}
+  // expected-error at +1 {{use of value '%arg0' expects different type than prior uses: '!cir.int<s, 32>' vs '!cir.int<u, 32>'}}
+  %0 = cir.ctz %arg0 : !s32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+cir.func @ffs_invalid_input_ty(%arg0 : !u32i) -> () {  // expected-note {{prior use here}}
+  // expected-error at +1 {{use of value '%arg0' expects different type than prior uses: '!cir.int<s, 32>' vs '!cir.int<u, 32>'}}
+  %0 = cir.ffs %arg0 : !s32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+cir.func @ffs_invalid_result_ty(%arg0 : !s32i) -> () {  // expected-note {{prior use here}}
+  // expected-error at +1 {{use of value '%arg0' expects different type than prior uses: '!cir.int<u, 32>' vs '!cir.int<s, 32>'}}
+  %0 = cir.ffs %arg0 : !u32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @parity_invalid_input_ty(%arg0 : !s32i) -> () {
+  // expected-error at +1 {{'cir.parity' op operand #0 must be unsigned integer type of widths 32/64, but got '!cir.int<s, 32>'}}
+  %0 = cir.parity %arg0 : !s32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+!s32i = !cir.int<s, 32>
+
+cir.func @parity_invalid_result_ty(%arg0 : !u32i) -> () {  // expected-note {{prior use here}}
+  // expected-error at +1 {{use of value '%arg0' expects different type than prior uses: '!cir.int<s, 32>' vs '!cir.int<u, 32>'}}
+  %0 = cir.parity %arg0 : !s32i
+  cir.return
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @popcount_invalid_input_ty(%arg0 : !s32i) -> () {
+  // expected-error at +1 {{'cir.popcount' op operand #0 must be unsigned integer type of widths 16/32/64, but got '!cir.int<s, 32>'}}
+  %0 = cir.popcount %arg0 : !s32i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+
+cir.func @popcount_invalid_result_ty(%arg0 : !u32i) -> () {  // expected-note {{prior use here}}
+  // expected-error at +1 {{use of value '%arg0' expects different type than prior uses: '!cir.int<u, 64>' vs '!cir.int<u, 32>'}}
+  %0 = cir.popcount %arg0 : !u64i
+  cir.return
+}
+
+// -----
+
+cir.func @bad_fetch(%x: !cir.ptr<!cir.float>, %y: !cir.float) -> () {
+  // expected-error at +1 {{only operates on integer values}}
+  %12 = cir.atomic.fetch(xor, %x : !cir.ptr<!cir.float>, %y : !cir.float, seq_cst) : !cir.float
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+cir.func @bad_xchg(%x: !cir.ptr<!u32i>, %y: !u64i) -> () {
+  // expected-error at +1 {{ptr type and val type must match}}
+  %13 = cir.atomic.xchg(%x: !cir.ptr<!u32i>, %y: !u64i, seq_cst) : !u64i
+  cir.return
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+cir.func @bad_cmp_xchg(%x: !cir.ptr<!u32i>, %y: !u64i, %z: !u64i) -> () {
+  // expected-error at +1 {{ptr, expected and desired types must match}}
+  %14, %15 = cir.atomic.cmp_xchg(%x : !cir.ptr<!u32i>, %y : !u64i, %z : !u64i, success = seq_cst, failure = seq_cst) align(8) weak : (!u64i, !cir.bool)
+  cir.return
+}
+
+// -----
+
+cir.func @bad_operands_for_nowrap(%x: !cir.float, %y: !cir.float) {
+  // expected-error at +1 {{only operations on integer values may have nsw/nuw flags}}
+  %0 = cir.binop(add, %x, %y) nsw : !cir.float
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+
+cir.func @bad_binop_for_nowrap(%x: !u32i, %y: !u32i) {
+  // expected-error at +1 {{The nsw/nuw flags are applicable to opcodes: 'add', 'sub' and 'mul'}}
+  %0 = cir.binop(div, %x, %y) nsw : !u32i
+}
+
+// -----
+
+!u32i = !cir.int<u, 32>
+
+cir.func @bad_binop_for_saturated(%x: !u32i, %y: !u32i) {
+  // expected-error at +1 {{The saturated flag is applicable to opcodes: 'add' and 'sub'}}
+  %0 = cir.binop(div, %x, %y) sat : !u32i
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @no_nsw_for_saturated(%x: !s32i, %y: !s32i) {
+  // expected-error at +1 {{The nsw/nuw flags and the saturated flag are mutually exclusive}}
+  %0 = cir.binop(add, %x, %y) nsw sat : !s32i
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+cir.func @no_nuw_for_saturated(%x: !s32i, %y: !s32i) {
+  // expected-error at +1 {{The nsw/nuw flags and the saturated flag are mutually exclusive}}
+  %0 = cir.binop(add, %x, %y) nuw sat : !s32i
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.global "private" external @batata : !s32i
+  cir.func @f35() {
+    // expected-error at +1 {{access to global not marked thread local}}
+    %0 = cir.get_global thread_local @batata : !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// -----
+
+!s8i = !cir.int<s, 8>
+cir.func @no_reference_global() {
+  // expected-error @below {{'cir.get_global' op 'str' does not reference a valid cir.global or cir.func}}
+  %0 = cir.get_global @str : !cir.ptr<!s8i>
+  cir.return
+}
+
+// -----
+
+// expected-error at +1 {{failed to verify 'underlying': expects !cir.double, !cir.fp80 or !cir.fp128}}
+cir.func @bad_long_double(%arg0 : !cir.long_double<!cir.float>) -> () {
+  cir.return
+}
+
+// -----
+
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+!Base = !cir.record<struct "Base" {!cir.ptr<!cir.ptr<!cir.func<() -> !cir.int<u, 32>>>>}>
+!Derived = !cir.record<struct "Derived" {!cir.record<struct "Base" {!cir.ptr<!cir.ptr<!cir.func<() -> !cir.int<u, 32>>>>}>}>
+
+module {
+  cir.global "private" constant external @_ZTI4Base : !cir.ptr<!u32i>
+  cir.global "private" constant external @_ZTI7Derived : !cir.ptr<!u8i>
+  cir.func private @__dynamic_cast(!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void>
+  cir.func private @__cxa_bad_cast()
+  cir.func @test(%arg0 : !cir.ptr<!Base>) {
+    // expected-error at +1 {{srcRtti must be an RTTI pointer}}
+    %0 = cir.dyn_cast ptr %arg0 : !cir.ptr<!Base> -> !cir.ptr<!Derived> #cir.dyn_cast_info<src_rtti = #cir.global_view<@_ZTI4Base> : !cir.ptr<!u32i>, dest_rtti = #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u8i>, runtime_func = @__dynamic_cast, bad_cast_func = @__cxa_bad_cast, offset_hint = #cir.int<0> : !s64i>
+  }
+}
+
+// -----
+
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+!Base = !cir.record<struct "Base" {!cir.ptr<!cir.ptr<!cir.func<() -> !cir.int<u, 32>>>>}>
+!Derived = !cir.record<struct "Derived" {!cir.record<struct "Base" {!cir.ptr<!cir.ptr<!cir.func<() -> !cir.int<u, 32>>>>}>}>
+
+module {
+  cir.global "private" constant external @_ZTI4Base : !cir.ptr<!u8i>
+  cir.global "private" constant external @_ZTI7Derived : !cir.ptr<!u32i>
+  cir.func private @__dynamic_cast(!cir.ptr<!void>, !cir.ptr<!u8i>, !cir.ptr<!u8i>, !s64i) -> !cir.ptr<!void>
+  cir.func private @__cxa_bad_cast()
+  cir.func @test(%arg0 : !cir.ptr<!Base>) {
+    // expected-error at +1 {{destRtti must be an RTTI pointer}}
+    %0 = cir.dyn_cast ptr %arg0 : !cir.ptr<!Base> -> !cir.ptr<!Derived> #cir.dyn_cast_info<src_rtti = #cir.global_view<@_ZTI4Base> : !cir.ptr<!u8i>, dest_rtti = #cir.global_view<@_ZTI7Derived> : !cir.ptr<!u32i>, runtime_func = @__dynamic_cast, bad_cast_func = @__cxa_bad_cast, offset_hint = #cir.int<0> : !s64i>
+  }
+}
+
+
+// -----
+
+// expected-error at +1 {{goto/label mismatch}}
+cir.func @bad_goto() -> () {
+  cir.goto "somewhere"
+^bb1:
+  cir.label "label"
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+// expected-error at below {{expected language address space keyword}}
+// expected-error at below {{expected keyword for language address space kind}}
+cir.func @address_space1(%p : !cir.ptr<!u64i, lang_address_space()>) {
+  cir.return
+}
+
+// -----
+
+!u64i = !cir.int<u, 64>
+// expected-error at below {{expected target address space value}}
+// expected-error at below {{expected integer value}}
+cir.func @address_space2(%p : !cir.ptr<!u64i, target_address_space()>) {
+  cir.return
+}
+
+// -----
+
+
+!u64i = !cir.int<u, 64>
+// expected-error at below {{expected one of [default, offload_private, offload_local, offload_global, offload_constant, offload_generic] for language address space kind, got: foobar}}
+// expected-error at below {{expected language address space keyword}}
+cir.func @address_space4(%p : !cir.ptr<!u64i, lang_address_space(foobar)>) {
+  cir.return
+}
+
+// -----
+
+// expected-error at +1 {{metadata attribute without any field present is invalid}}
+#fn_attr = #cir.cl.kernel_metadata<>
+
+// -----
+
+// expected-error at +1 {{work_group_size_hint must have exactly 3 integer elements}}
+#fn_attr = #cir.cl.kernel_metadata<
+  work_group_size_hint = [2 : i32]
+>
+
+// -----
+
+// expected-error at +1 {{reqd_work_group_size must have exactly 3 integer elements}}
+#fn_attr = #cir.cl.kernel_metadata<
+  reqd_work_group_size = [3.0 : f32, 1.7 : f32]
+>
+
+// -----
+
+// expected-error at +1 {{vec_type_hint_signedness should be present if and only if vec_type_hint is set}}
+#fn_attr = #cir.cl.kernel_metadata<
+  vec_type_hint_signedness = 1
+>
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+// expected-error at +1 {{vec_type_hint_signedness should be present if and only if vec_type_hint is set}}
+#fn_attr = #cir.cl.kernel_metadata<
+  vec_type_hint = !s32i
+>
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+// expected-error at +1 {{vec_type_hint_signedness must match the signedness of the vec_type_hint type}}
+#fn_attr = #cir.cl.kernel_metadata<
+  vec_type_hint = !s32i,
+  vec_type_hint_signedness = 0
+>
+
+// -----
+
+// expected-error at +1 {{addr_space must be integer arrays}}
+#fn_attr = #cir.cl.kernel_arg_metadata<
+  addr_space = ["none"],
+  access_qual = ["none"],
+  type = ["uint*"],
+  base_type = ["uint*"],
+  type_qual = [""]
+>
+
+// -----
+
+// expected-error at +1 {{access_qual, type, base_type, type_qual must be string arrays}}
+#fn_attr = #cir.cl.kernel_arg_metadata<
+  addr_space = [0 : i32],
+  access_qual = [42 : i32],
+  type = ["uint*"],
+  base_type = ["uint*"],
+  type_qual = [""]
+>
+
+// -----
+
+// expected-error at +1 {{name must be a string array}}
+#fn_attr = #cir.cl.kernel_arg_metadata<
+  addr_space = [0 : i32],
+  access_qual = ["none"],
+  type = ["uint*"],
+  base_type = ["uint*"],
+  type_qual = [""],
+  name = [33 : i32]
+>
+
+// -----
+
+// expected-error at +1 {{all arrays must have the same number of elements}}
+#fn_attr = #cir.cl.kernel_arg_metadata<
+  addr_space = [0 : i32],
+  access_qual = ["none"],
+  type = ["uint*", "myunsignedint*"],
+  base_type = ["uint*", "uint*"],
+  type_qual = [""],
+  name = ["foo"]
+>
+
+// -----
+
+module {
+    // expected-error at +1 {{unknown calling convention}}
+    cir.func @foo() cc(foobar) {
+        cir.return
+    }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.global external lang_address_space(offload_global) @gv = #cir.int<0> : !s32i
+
+  cir.func @test_get_global() {
+    // expected-error at +1 {{'cir.get_global' op result type address space does not match the address space of the global @gv}}
+    %addr = cir.get_global @gv : !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @array_to_ptrdecay_addrspace() {
+    %0 = cir.alloca !cir.array<!s32i x 32>, !cir.ptr<!cir.array<!s32i x 32>, lang_address_space(offload_private)>, ["x", init]
+    // expected-error at +1 {{requires same address space for source and result}}
+    %1 = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!s32i x 32>, lang_address_space(offload_private)> -> !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @subroutine() cc(spir_function) {
+    cir.return
+  }
+
+  cir.func @call_conv_match() {
+    // expected-error at +1 {{'cir.call' op calling convention mismatch: expected spir_function, but provided spir_kernel}}
+    cir.call @subroutine(): () -> !cir.void cc(spir_kernel)
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @test_bitcast_addrspace() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["tmp"] {alignment = 4 : i64}
+    // expected-error at +1 {{'cir.cast' op result type address space does not match the address space of the operand}}
+    %1 = cir.cast bitcast %0 : !cir.ptr<!s32i> -> !cir.ptr<!s32i, lang_address_space(offload_local)>
+  }
+}
+
+// -----
+
+!s16i = !cir.int<s, 16>
+!s64i = !cir.int<s, 64>
+
+module {
+  cir.func @test_bitcast_vec2scalar_diff_size() {
+    %0 = cir.const #cir.int<1> : !s16i
+    %1 = cir.vec.create(%0, %0 : !s16i, !s16i) : !cir.vector<!s16i x 2>
+    // expected-error at +1 {{'cir.cast' op requires !cir.ptr or !cir.vector type for source and result}}
+    %2 = cir.cast bitcast %1 : !cir.vector<!s16i x 2> -> !s64i
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module {
+  cir.func @test_bitcast_scalar2vec_diff_size() {
+    %0 = cir.const #cir.int<1> : !s64i
+    // expected-error at +1 {{'cir.cast' op requires !cir.ptr or !cir.vector type for source and result}}
+    %1 = cir.cast bitcast %0 : !s64i -> !cir.vector<!s32i x 4>
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!s16i = !cir.int<s, 16>
+module {
+  cir.func @test_shift_vec() {
+    %0 = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["a", init] {alignment = 8 : i64}
+    %1 = cir.load %0 : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+    %2 = cir.const #cir.int<12> : !s32i
+    %4 = cir.const #cir.const_vector<[#cir.int<12> : !s16i, #cir.int<12> : !s16i]> : !cir.vector<!s16i x 2>
+    // expected-error at +1 {{'cir.shift' op input types cannot be one vector and one scalar}}
+    %3 = cir.shift(left, %1 : !cir.vector<!s32i x 2>, %2 : !s32i) -> !cir.vector<!s32i x 2>
+    %5 = cir.shift(left, %1 : !cir.vector<!s32i x 2>, %4 : !cir.vector<!s16i x 2>) -> !cir.vector<!s32i x 2>
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!s16i = !cir.int<s, 16>
+module {
+   cir.func @test_shift_vec2() {
+    %0 = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["a", init] {alignment = 8 : i64}
+    %1 = cir.load %0 : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+    %4 = cir.const #cir.const_vector<[#cir.int<12> : !s16i, #cir.int<12> : !s16i]> : !cir.vector<!s16i x 2>
+    // expected-error at +1 {{'cir.shift' op vector operands do not have the same elements sizes}}
+    %5 = cir.shift(left, %1 : !cir.vector<!s32i x 2>, %4 : !cir.vector<!s16i x 2>) -> !cir.vector<!s32i x 2>
+    cir.return
+  }
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!s16i = !cir.int<s, 16>
+module {
+   cir.func @test_shift_vec2() {
+    %0 = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["a", init] {alignment = 8 : i64}
+    %1 = cir.load %0 : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+    %4 = cir.const #cir.const_vector<[#cir.int<12> : !s16i, #cir.int<12> : !s16i]> : !cir.vector<!s16i x 2>
+    // expected-error at +1 {{'cir.shift' op vector operands do not have the same elements sizes}}
+    %5 = cir.shift(left, %4 : !cir.vector<!s16i x 2>, %1 : !cir.vector<!s32i x 2>) -> !cir.vector<!s32i x 2>
+    cir.return
+  }
+}
+
+// -----
+
+// Type of the attribute must be a CIR floating point type
+
+// expected-error @below {{invalid kind of type specified}}
+cir.global external @f = #cir.fp<0.5> : !cir.int<s, 32>
+
+// -----
+
+// Value must be a floating point literal or integer literal
+
+// expected-error @below {{expected floating point literal}}
+cir.global external @f = #cir.fp<"blabla"> : !cir.float
+
+// -----
+
+// Integer value must be in the width of the floating point type
+
+// expected-error @below {{hexadecimal float constant out of range for type}}
+cir.global external @f = #cir.fp<0x7FC000000> : !cir.float
+
+// -----
+
+// Integer value must be in the width of the floating point type
+
+// expected-error @below {{hexadecimal float constant out of range for type}}
+cir.global external @f = #cir.fp<0x7FC000007FC0000000> : !cir.double
+
+// -----
+
+// Integer value must be in the width of the floating point type
+
+// expected-error @below {{hexadecimal float constant out of range for type}}
+cir.global external @f = #cir.fp<0x7FC0000007FC0000007FC000000> : !cir.long_double<!cir.f80>
+
+// -----
+
+// Long double with `double` semantics should have a value that fits in a double.
+
+// CHECK: cir.global external @f = #cir.fp<0x7FC000007FC000000000> : !cir.long_double<!cir.f80>
+cir.global external @f = #cir.fp<0x7FC000007FC000000000> : !cir.long_double<!cir.f80>
+
+// expected-error @below {{hexadecimal float constant out of range for type}}
+cir.global external @f = #cir.fp<0x7FC000007FC000000000> : !cir.long_double<!cir.double>
+
+// -----
+
+// Verify no need for type inside the attribute
+
+// expected-error @below {{expected '>'}}
+cir.global external @f = #cir.fp<0x7FC00000 : !cir.float> : !cir.float
+
+// -----
+
+// Verify literal must be hex or float
+
+// expected-error @below {{unexpected decimal integer literal for a floating point value}}
+// expected-note @below {{add a trailing dot to make the literal a float}}
+cir.global external @f = #cir.fp<42> : !cir.float
+
+// -----
+
+// Verify
+!s32i = !cir.int<s, 32>
+cir.func @cast0(%arg0: !s32i, %arg1: !s32i) {
+  // expected-error @below {{custom op 'cir.cmp' invalid kind of type specified}}
+  %1 = cir.cmp(eq, %arg0, %arg1): !s32i, !s32i
+  cir.return
+}
+
+// -----
+
+// Verify fone predicate requires floating point operands
+!s32i = !cir.int<s, 32>
+cir.func @cmp_fone_integer(%arg0: !s32i, %arg1: !s32i) {
+  // expected-error @below {{floating point comparison predicate 'fone'/'funo' requires floating point operands}}
+  %1 = cir.cmp(fone, %arg0, %arg1): !s32i, !cir.bool
+  cir.return
+}
+
+// -----
+
+// Verify funo predicate requires floating point operands
+!s32i = !cir.int<s, 32>
+cir.func @cmp_funo_integer(%arg0: !s32i, %arg1: !s32i) {
+  // expected-error @below {{floating point comparison predicate 'fone'/'funo' requires floating point operands}}
+  %1 = cir.cmp(funo, %arg0, %arg1): !s32i, !cir.bool
+  cir.return
+}
+
+// -----
+
+// Verify that void-returning functions have no return type listed in
+// MLIR assembly.
+
+!s32i = !cir.int<s, 32>
+// expected-error @below {{!cir.func cannot have an explicit 'void' return type}}
+// expected-error @below {{failed to parse CIR_PointerType parameter}}
+cir.global external dso_local @vfp = #cir.ptr<null> : !cir.ptr<!cir.func<(!s32i) -> !cir.void>>
+
+// -----
+
+// Verify that variadic functions do not allow an ellipsis anywhere except at
+// the end of the parameter list.
+
+// expected-error @below {{variadic `...` must be the last parameter}}
+!fty = !cir.func<(..., !s32i)>
+
+// -----
+
+// Verify that complex type does not accept arbitrary type
+
+// expected-error @below {{integer or floating point type}}
+!complex = !cir.complex<!cir.ptr<!cir.void>>
+
+// -----
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+cir.func @b0() {
+  cir.scope {
+    cir.while {  // expected-error {{expected condition region to terminate with 'cir.condition'}}
+      cir.yield
+    } do {
+      cir.br ^bb1
+    ^bb1:
+      cir.return
+    }
+  }
+  cir.return
+}
+
+// -----
+
+cir.func @invalid_cond_region_terminator(%arg0 : !cir.bool) -> !cir.void {
+  cir.do { // expected-error {{op expected condition region to terminate with 'cir.condition'}}
+    cir.yield
+  } while {
+    cir.yield
+  }
+  cir.return
+}
+
+// -----
+
+cir.func @invalidConditionTerminator (%arg0 : !cir.bool) -> !cir.void {
+  cir.for : cond { // expected-error {{op expected condition region to terminate with 'cir.condition'}}
+    cir.yield
+  } body {
+    cir.yield
+  } step {
+    cir.yield
+  }
+  cir.return
+}
+
+// -----
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+#fn_attr = #cir<extra({nothrow = #cir.nothrow, uwtable = #cir.uwtable<async>})>
+!rec_S = !cir.record<struct "S" {!s16i} #cir.record.decl.ast>
+#tbaa_scalar = #cir.tbaa_scalar<id = "short", type = !s16i>
+// expected-error @below {{invalid kind of attribute specified}}
+// expected-error @below {{failed to parse CIR_TBAAStructAttr parameter 'members'}}
+#tbaa_struct = #cir.tbaa_struct<id = "S", members = {#cir.uwtable<async>}>
+#tbaa_tag = #cir.tbaa_tag<base = #tbaa_struct, access = #tbaa_scalar, offset = 0>
+cir.global external dso_local @glob = #cir.zero : !rec_S {alignment = 2 : i64}
+cir.func dso_local @main() -> !s32i extra(#fn_attr) {
+  %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+  %1 = cir.const #cir.int<0> : !s32i
+  %2 = cir.cast integral %1 : !s32i -> !s16i
+  %3 = cir.get_global @glob : !cir.ptr<!rec_S>
+  %4 = cir.get_member %3[0] {name = "i"} : !cir.ptr<!rec_S> -> !cir.ptr<!s16i>
+  cir.store align(2) %2, %4 : !s16i, !cir.ptr<!s16i> tbaa(#tbaa_tag)
+  %5 = cir.get_global @glob : !cir.ptr<!rec_S>
+  %6 = cir.get_member %5[0] {name = "i"} : !cir.ptr<!rec_S> -> !cir.ptr<!s16i>
+  %7 = cir.load align(2) %6 : !cir.ptr<!s16i>, !s16i tbaa(#tbaa_tag)
+  %8 = cir.cast integral %7 : !s16i -> !s32i
+  cir.store %8, %0 : !s32i, !cir.ptr<!s32i>
+  %9 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+  cir.return %9 : !s32i
+}
diff --git a/clang/test/CIR/Incubator/IR/libc-fabs.cir b/clang/test/CIR/Incubator/IR/libc-fabs.cir
new file mode 100644
index 0000000000000..2a644fb2a4b85
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/libc-fabs.cir
@@ -0,0 +1,10 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @foo(%arg0: !cir.double) -> !cir.double {
+    // CHECK: cir.fabs
+    %0 = cir.fabs %arg0 : !cir.double
+    cir.return %0 : !cir.double
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/libc-memchr.cir b/clang/test/CIR/Incubator/IR/libc-memchr.cir
new file mode 100644
index 0000000000000..bc746bb9db212
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/libc-memchr.cir
@@ -0,0 +1,12 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!voidptr = !cir.ptr<!cir.void>
+!s32i = !cir.int<s, 32>
+!u64i = !cir.int<u, 64>
+module {
+  cir.func @f(%src : !voidptr, %pattern : !s32i, %len : !u64i) -> !voidptr {
+    // CHECK: cir.libc.memchr 
+    %ptr = cir.libc.memchr(%src, %pattern, %len)
+    cir.return %ptr : !voidptr
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/libc-memcpy.cir b/clang/test/CIR/Incubator/IR/libc-memcpy.cir
new file mode 100644
index 0000000000000..6769092f3beb1
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/libc-memcpy.cir
@@ -0,0 +1,10 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @shouldParseLibcMemcpyOp(%arg0 : !cir.ptr<!cir.void>, %arg1 : !u32i) {
+    // CHECK: cir.libc.memcpy 
+    cir.libc.memcpy %arg1 bytes from %arg0 to %arg0 : !u32i, !cir.ptr<!cir.void> -> !cir.ptr<!cir.void>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/llvm-intrinsic.cir b/clang/test/CIR/Incubator/IR/llvm-intrinsic.cir
new file mode 100644
index 0000000000000..f69721bb66b41
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/llvm-intrinsic.cir
@@ -0,0 +1,11 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+cir.func @foo()  {
+    %a = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    %i = cir.llvm.intrinsic "llvm.aarch64.ldxr" %a : (!cir.ptr<!s32i>) -> !s64i
+    cir.return
+}
+
+// CHECK: %1 = cir.llvm.intrinsic "llvm.aarch64.ldxr" %0 : (!cir.ptr<!s32i>) -> !s64i
diff --git a/clang/test/CIR/Incubator/IR/module.cir b/clang/test/CIR/Incubator/IR/module.cir
new file mode 100644
index 0000000000000..8c782fdb2dbc6
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/module.cir
@@ -0,0 +1,11 @@
+// RUN: cir-opt %s -split-input-file --verify-roundtrip | FileCheck %s
+
+// Should parse and print C source language attribute.
+module attributes {cir.lang = #cir.lang<c>} { }
+// CHECK: module attributes {cir.lang = #cir.lang<c>}
+
+// -----
+
+// Should parse and print C++ source language attribute.
+module attributes {cir.lang = #cir.lang<cxx>} { }
+// CHECK: module attributes {cir.lang = #cir.lang<cxx>}
diff --git a/clang/test/CIR/Incubator/IR/ptr_stride.cir b/clang/test/CIR/Incubator/IR/ptr_stride.cir
new file mode 100644
index 0000000000000..a2fd9823deede
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/ptr_stride.cir
@@ -0,0 +1,39 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @arraysubscript(%arg0: !s32i) {
+    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["x", init]
+    %1 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    %2 = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!s32i x 10>> -> !cir.ptr<!s32i>
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.ptr_stride %2, %3 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    cir.return
+  }
+
+  cir.func @gepflags(%arg0: !cir.ptr<!s32i>, %arg1: !s32i) {
+    %0 = cir.ptr_stride %arg0, %arg1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %1 = cir.ptr_stride nuw %arg0, %arg1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %2 = cir.ptr_stride inbounds|nuw %arg0, %arg1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %3 = cir.ptr_stride %arg0, %arg1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    cir.return
+  }
+}
+
+// CHECK: cir.func @arraysubscript(%arg0: !s32i) {
+// CHECK-NEXT:    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["x", init]
+// CHECK-NEXT:    %1 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+// CHECK-NEXT:    %2 = cir.cast array_to_ptrdecay %0 : !cir.ptr<!cir.array<!s32i x 10>> -> !cir.ptr<!s32i>
+// CHECK-NEXT:    %3 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:    %4 = cir.ptr_stride %2, %3 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CHECK-NEXT:    cir.return
+// CHECK-NEXT:  }
+
+
+// CHECK: cir.func @gepflags(%arg0: !cir.ptr<!s32i>, %arg1: !s32i) {
+// CHECK-NEXT:   %0 = cir.ptr_stride %arg0, %arg1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CHECK-NEXT:   %1 = cir.ptr_stride nuw %arg0, %arg1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CHECK-NEXT:   %2 = cir.ptr_stride inbounds|nuw %arg0, %arg1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CHECK-NEXT:   %3 = cir.ptr_stride %arg0, %arg1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT:  }
diff --git a/clang/test/CIR/Incubator/IR/resume-location-parsing.cir b/clang/test/CIR/Incubator/IR/resume-location-parsing.cir
new file mode 100644
index 0000000000000..40d230fda130b
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/resume-location-parsing.cir
@@ -0,0 +1,62 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+// Test ClangIR exception handling parsing fix
+// This demonstrates the syntax that was failing before the fix:
+// 1. #cir.unwind attributes in catch blocks
+// 2. cir.resume operations with location information
+
+!void = !cir.void
+
+#loc1 = loc("simple.cpp":10:5)
+#loc2 = loc("simple.cpp":15:8)
+
+module {
+  // This represents C++ code like:
+  //   void function() {
+  //     RAII_Object obj;  // needs cleanup on exception
+  //   }
+
+  // CHECK-LABEL: @simple_cleanup_example
+  cir.func @simple_cleanup_example() -> !void {
+    cir.try {
+      // Normal execution path
+      cir.return
+    } catch [#cir.unwind {
+      // Cleanup/unwind region - not a real exception handler
+      // Before the fix: "undefined symbol alias id 'loc1'"
+      // CHECK: cir.resume
+      cir.resume loc(#loc1)
+    }]
+    cir.return
+  }
+
+  // This represents C++ code like:
+  //   void function() {
+  //     try { /* some code */ }
+  //     catch (...) { throw; }  // rethrow
+  //   }
+
+  // CHECK-LABEL: @rethrow_example
+  cir.func @rethrow_example() -> !void {
+    cir.try {
+      cir.return
+    } catch [#cir.unwind {
+      // Rethrow - continue unwinding to find real handler
+      // CHECK: cir.resume rethrow
+      cir.resume rethrow loc(#loc2)
+    }]
+    cir.return
+  }
+
+  // CHECK-LABEL: @test_unwind_catch_parsing
+  cir.func @test_unwind_catch_parsing() -> !void {
+    cir.try {
+      cir.return
+    } catch [#cir.unwind {
+      // CHECK: cir.resume
+      cir.resume
+    }]
+    // CHECK: cir.return
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/scope.cir b/clang/test/CIR/Incubator/IR/scope.cir
new file mode 100644
index 0000000000000..a04c9024a8684
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/scope.cir
@@ -0,0 +1,56 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!u32i = !cir.int<u, 32>
+
+module {
+  // Should properly print/parse scope with implicit empty yield.
+  // CHECK-LABEL: implicit_yield
+  cir.func @implicit_yield() {
+    cir.scope {
+    }
+    // CHECK: cir.scope {
+    // CHECK-NEXT: }
+    // CHECK-NEXT: cir.return
+    cir.return
+  }
+
+  // Should properly print/parse scope with explicit yield.
+  // CHECK-LABEL: explicit_yield
+  cir.func @explicit_yield() {
+    %0 = cir.scope {
+      %1 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+      cir.yield %1 : !cir.ptr<!u32i>
+    } : !cir.ptr<!u32i>
+    // CHECK: %0 = cir.scope {
+    //          [...]
+    // CHECK:   cir.yield %1 : !cir.ptr<!u32i>
+    // CHECK: } : !cir.ptr<!u32i>
+    cir.return
+  }
+
+  // Handle optional cleanup presence
+  // CHECK-LABEL: empty_cleanup
+  cir.func @empty_cleanup() {
+    cir.scope {
+    } cleanup {
+    }
+    // CHECK:      cir.scope {
+    // CHECK-NEXT: } cleanup {
+    // CHECK-NEXT: }
+    // CHECK-NEXT: cir.return
+    cir.return
+  }
+
+  // Handle optional cleanup presence
+  // CHECK-LABEL: some_cleanup
+  cir.func @some_cleanup() {
+    cir.scope {
+    } cleanup {
+      %1 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+    }
+    // CHECK: cir.scope {
+    // CHECK: } cleanup {
+    // CHECK:   cir.alloca
+    // CHECK: }
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/side-effect.cir b/clang/test/CIR/Incubator/IR/side-effect.cir
new file mode 100644
index 0000000000000..1c3366ebf9af0
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/side-effect.cir
@@ -0,0 +1,20 @@
+// RUN: cir-opt %s -split-input-file --verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func private @add(%arg0: !s32i, %arg1: !s32i) -> !s32i
+  cir.func @call_with_side_effect() {
+    %0 = cir.const #cir.int<0> : !s32i
+    %1 = cir.const #cir.int<1> : !s32i
+    %2 = cir.call @add(%0, %1) : (!s32i, !s32i) -> !s32i side_effect(all)
+    %3 = cir.call @add(%0, %1) : (!s32i, !s32i) -> !s32i side_effect(pure)
+    %4 = cir.call @add(%0, %1) : (!s32i, !s32i) -> !s32i side_effect(const)
+    cir.return
+  }
+  // CHECK-LABEL: @call_with_side_effect()
+  //      CHECK:    %{{.+}} = cir.call @add(%{{.+}}, %{{.+}}) : (!s32i, !s32i) -> !s32i
+  // CHECK-NEXT:    %{{.+}} = cir.call @add(%{{.+}}, %{{.+}}) : (!s32i, !s32i) -> !s32i side_effect(pure)
+  // CHECK-NEXT:    %{{.+}} = cir.call @add(%{{.+}}, %{{.+}}) : (!s32i, !s32i) -> !s32i side_effect(const)
+  //      CHECK:  }
+}
diff --git a/clang/test/CIR/Incubator/IR/stack-save-restore.cir b/clang/test/CIR/Incubator/IR/stack-save-restore.cir
new file mode 100644
index 0000000000000..bc199d5856c33
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/stack-save-restore.cir
@@ -0,0 +1,23 @@
+// Test the CIR operations can parse and print correctly (roundtrip)
+
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+
+module  {
+  cir.func @stack_save_restore() {
+    %0 = cir.stack_save : !cir.ptr<!u8i>
+    cir.stack_restore %0 : !cir.ptr<!u8i>
+    cir.return
+  }
+}
+
+//CHECK: module  {
+
+//CHECK-NEXT: cir.func @stack_save_restore() {
+//CHECK-NEXT:   %0 = cir.stack_save : !cir.ptr<!u8i>
+//CHECK-NEXT:   cir.stack_restore %0 : !cir.ptr<!u8i>
+//CHECK-NEXT:   cir.return
+//CHECK-NEXT: }
+
+//CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/IR/struct.cir b/clang/test/CIR/Incubator/IR/struct.cir
new file mode 100644
index 0000000000000..e874edc3844fb
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/struct.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+!u16i = !cir.int<u, 16>
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+!rec_2222 = !cir.record<struct {!cir.array<!cir.ptr<!u8i> x 5>}>
+!rec_22221 = !cir.record<struct {!cir.ptr<!u8i>, !cir.ptr<!u8i>, !cir.ptr<!u8i>}>
+!rec_A = !cir.record<class "A" incomplete #cir.record.decl.ast>
+!rec_i = !cir.record<union "i" incomplete>
+!rec_S = !cir.record<struct "S" {!u8i, !u16i, !u32i}>
+!rec_S1 = !cir.record<struct "S1" {!s32i, !s32i}>
+
+// Test recursive struct parsing/printing.
+!rec_Node = !cir.record<struct "Node" {!cir.ptr<!cir.record<struct "Node">>} #cir.record.decl.ast>
+// CHECK-DAG: !cir.record<struct "Node" {!cir.ptr<!cir.record<struct "Node">>} #cir.record.decl.ast>
+
+module  {
+  // Dummy function to use types and force them to be printed.
+  cir.func @useTypes(%arg0: !rec_Node) {
+    cir.return
+  }
+
+  cir.func @structs() {
+    %0 = cir.alloca !cir.ptr<!cir.record<struct "S" {!u8i, !u16i, !u32i}>>, !cir.ptr<!cir.ptr<!cir.record<struct "S" {!u8i, !u16i, !u32i}>>>, ["s", init]
+    %1 = cir.alloca !cir.ptr<!cir.record<union "i" incomplete>>, !cir.ptr<!cir.ptr<!cir.record<union "i" incomplete>>>, ["i", init]
+    cir.return
+  }
+
+// CHECK: cir.func @structs() {
+// CHECK:     %0 = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["s", init]
+// CHECK:     %1 = cir.alloca !cir.ptr<!rec_i>, !cir.ptr<!cir.ptr<!rec_i>>, ["i", init]
+
+  cir.func @shouldSuccessfullyParseConstStructAttrs() {
+    %0 = cir.const #cir.const_record<{#cir.int<1> : !s32i, #cir.int<2> : !s32i}> : !rec_S1
+    // CHECK: cir.const #cir.const_record<{#cir.int<1> : !s32i, #cir.int<2> : !s32i}> : !rec_S1
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/IR/switch.cir b/clang/test/CIR/Incubator/IR/switch.cir
new file mode 100644
index 0000000000000..87d45bf1f5219
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/switch.cir
@@ -0,0 +1,38 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!s32i = !cir.int<s, 32>
+
+cir.func @s0() {
+  %1 = cir.const #cir.int<2> : !s32i
+  cir.switch (%1 : !s32i) {
+    cir.case (default, []) {
+      cir.return
+    }
+    cir.case (equal, [#cir.int<3> : !s32i]) {
+      cir.yield
+    }
+    cir.case (anyof, [#cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i]) {
+      cir.break
+    }
+    cir.case (equal, [#cir.int<5> : !s32i]) {
+      cir.yield
+    }
+    cir.yield
+  }
+  cir.return
+}
+
+// CHECK: cir.switch (%0 : !s32i) {
+// CHECK-NEXT: cir.case(default, [])  {
+// CHECK-NEXT:   cir.return
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.case(equal, [#cir.int<3> : !s32i])  {
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.case(anyof, [#cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i]) {
+// CHECK-NEXT:   cir.break
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.case(equal, [#cir.int<5> : !s32i])  {
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/IR/tbaa-parse.cir b/clang/test/CIR/Incubator/IR/tbaa-parse.cir
new file mode 100644
index 0000000000000..723c31537704c
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/tbaa-parse.cir
@@ -0,0 +1,28 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+#fn_attr = #cir<extra({nothrow = #cir.nothrow, uwtable = #cir.uwtable<async>})>
+!rec_S = !cir.record<struct "S" {!s16i} #cir.record.decl.ast>
+// CHECK: #tbaa_scalar = #cir.tbaa_scalar<id = "short", type = !s16i>
+// CHECK: #tbaa_struct = #cir.tbaa_struct<id = "S", members = {<#tbaa_scalar, 0>}>
+// CHECK: #tbaa_tag = #cir.tbaa_tag<base = #tbaa_struct, access = #tbaa_scalar, offset = 0>
+#tbaa_scalar = #cir.tbaa_scalar<id = "short", type = !s16i>
+#tbaa_struct = #cir.tbaa_struct<id = "S", members = {<#tbaa_scalar, 0>}>
+#tbaa_tag = #cir.tbaa_tag<base = #tbaa_struct, access = #tbaa_scalar, offset = 0>
+cir.global external dso_local @glob = #cir.zero : !rec_S {alignment = 2 : i64}
+cir.func dso_local @main() -> !s32i extra(#fn_attr) {
+  %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+  %1 = cir.const #cir.int<0> : !s32i
+  %2 = cir.cast integral %1 : !s32i -> !s16i
+  %3 = cir.get_global @glob : !cir.ptr<!rec_S>
+  %4 = cir.get_member %3[0] {name = "i"} : !cir.ptr<!rec_S> -> !cir.ptr<!s16i>
+  cir.store align(2) %2, %4 : !s16i, !cir.ptr<!s16i> tbaa(#tbaa_tag)
+  %5 = cir.get_global @glob : !cir.ptr<!rec_S>
+  %6 = cir.get_member %5[0] {name = "i"} : !cir.ptr<!rec_S> -> !cir.ptr<!s16i>
+  %7 = cir.load align(2) %6 : !cir.ptr<!s16i>, !s16i tbaa(#tbaa_tag)
+  %8 = cir.cast integral %7 : !s16i -> !s32i
+  cir.store %8, %0 : !s32i, !cir.ptr<!s32i>
+  %9 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+  cir.return %9 : !s32i
+}
diff --git a/clang/test/CIR/Incubator/IR/ternary.cir b/clang/test/CIR/Incubator/IR/ternary.cir
new file mode 100644
index 0000000000000..9b7a1a8dd48e4
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/ternary.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+!u32i = !cir.int<u, 32>
+
+module  {
+  cir.func @blue(%arg0: !cir.bool) -> !u32i {
+    %0 = cir.ternary(%arg0, true {
+      %a = cir.const #cir.int<0> : !u32i
+      cir.yield %a : !u32i
+    }, false {
+      %b = cir.const #cir.int<1> : !u32i
+      cir.yield %b : !u32i
+    }) : (!cir.bool) -> !u32i
+    cir.return %0 : !u32i
+  }
+}
+
+// CHECK: module  {
+
+// CHECK: cir.func @blue(%arg0: !cir.bool) -> !u32i {
+// CHECK:   %0 = cir.ternary(%arg0, true {
+// CHECK:     %1 = cir.const #cir.int<0> : !u32i
+// CHECK:     cir.yield %1 : !u32i
+// CHECK:   }, false {
+// CHECK:     %1 = cir.const #cir.int<1> : !u32i
+// CHECK:     cir.yield %1 : !u32i
+// CHECK:   }) : (!cir.bool) -> !u32i
+// CHECK:   cir.return %0 : !u32i
+// CHECK: }
+
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/IR/try.cir b/clang/test/CIR/Incubator/IR/try.cir
new file mode 100644
index 0000000000000..3e53edb292925
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/try.cir
@@ -0,0 +1,22 @@
+// RUN: cir-opt %s -verify-roundtrip | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+// CHECK: cir.func @div
+cir.func @div(%x : !s32i, %y : !s32i) -> !s32i {
+  %3 = cir.const #cir.int<0> : !s32i
+  cir.return %3 : !s32i
+}
+
+// CHECK: cir.func @foo
+cir.func @foo(%x : !s32i, %y : !s32i) -> () {
+  cir.scope {
+    cir.scope {
+      // CHECK: cir.call exception 
+      %d = cir.call exception @div(%x, %y) : (!s32i, !s32i) -> !s32i
+      cir.yield
+    }
+    cir.yield
+  }
+  cir.return
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/IR/types.cir b/clang/test/CIR/Incubator/IR/types.cir
new file mode 100644
index 0000000000000..3ff4957b6fb2e
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/types.cir
@@ -0,0 +1,13 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u32i = !cir.int<u, 32>
+
+module  {
+  cir.func @arrays() {
+    %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
+    cir.return
+  }
+}
+
+// CHECK: cir.func @arrays() {
+// CHECK-NEXT:     %0 = cir.alloca !cir.array<!u32i x 10>, !cir.ptr<!cir.array<!u32i x 10>>, ["x", init]
diff --git a/clang/test/CIR/Incubator/IR/unreachable.cir b/clang/test/CIR/Incubator/IR/unreachable.cir
new file mode 100644
index 0000000000000..d132e434c451d
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/unreachable.cir
@@ -0,0 +1,8 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+cir.func @test() {
+  cir.unreachable
+}
+
+//      CHECK: cir.func @test
+// CHECK-NEXT:   cir.unreachable
diff --git a/clang/test/CIR/Incubator/IR/vtableAttr.cir b/clang/test/CIR/Incubator/IR/vtableAttr.cir
new file mode 100644
index 0000000000000..ebc3a96983e1c
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/vtableAttr.cir
@@ -0,0 +1,7 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+
+// Should parse VTable attribute.
+cir.global external @testVTable = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 1>}> : !cir.record<struct {!cir.array<!cir.ptr<!u8i> x 1>}>
+// CHECK: cir.global external @testVTable = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 1>}> : !rec_anon_struct
diff --git a/clang/test/CIR/Incubator/IR/while.cir b/clang/test/CIR/Incubator/IR/while.cir
new file mode 100644
index 0000000000000..fa93f1cc52d2e
--- /dev/null
+++ b/clang/test/CIR/Incubator/IR/while.cir
@@ -0,0 +1,17 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+cir.func @testPrintingParsing(%arg0 : !cir.bool) {
+  cir.while {
+    cir.condition(%arg0)
+  } do {
+    cir.yield
+  }
+  cir.return
+}
+
+// CHECK: @testPrintingParsing
+// CHECK: cir.while {
+// CHECK:   cir.condition(%arg0)
+// CHECK: } do {
+// CHECK:   cir.yield
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/Inputs/skip-this-header.h b/clang/test/CIR/Incubator/Inputs/skip-this-header.h
new file mode 100644
index 0000000000000..bf94a9cfeb947
--- /dev/null
+++ b/clang/test/CIR/Incubator/Inputs/skip-this-header.h
@@ -0,0 +1,12 @@
+#pragma clang system_header
+
+class String {
+  char *storage{nullptr};
+  long size;
+  long capacity;
+
+public:
+  String() : size{0} {}
+  String(int size) : size{size} {}
+  String(const char *s) {}
+};
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Inputs/std-cxx.h b/clang/test/CIR/Incubator/Inputs/std-cxx.h
new file mode 100644
index 0000000000000..1697e311bcb37
--- /dev/null
+++ b/clang/test/CIR/Incubator/Inputs/std-cxx.h
@@ -0,0 +1,1321 @@
+// This header provides reduced versions of common standard library containers
+// and whatnots. It's a copy from
+// clang/test/Analysis/Inputs/system-header-simulator-cxx.h with some additions
+// for ClangIR use cases found along the way.
+
+// Like the compiler, the static analyzer treats some functions differently if
+// they come from a system header -- for example, it is assumed that system
+// functions do not arbitrarily free() their parameters, and that some bugs
+// found in system headers cannot be fixed by the user and should be
+// suppressed.
+#pragma clang system_header
+
+typedef unsigned char uint8_t;
+
+typedef __typeof__(sizeof(int)) size_t;
+typedef __typeof__((char*)0-(char*)0) ptrdiff_t;
+void *memmove(void *s1, const void *s2, size_t n);
+
+namespace std {
+  typedef size_t size_type;
+#if __cplusplus >= 201103L
+  using nullptr_t = decltype(nullptr);
+#endif
+}
+
+namespace std {
+  struct input_iterator_tag { };
+  struct output_iterator_tag { };
+  struct forward_iterator_tag : public input_iterator_tag { };
+  struct bidirectional_iterator_tag : public forward_iterator_tag { };
+  struct random_access_iterator_tag : public bidirectional_iterator_tag { };
+
+  template <typename Iterator> struct iterator_traits {
+    typedef typename Iterator::difference_type difference_type;
+    typedef typename Iterator::value_type value_type;
+    typedef typename Iterator::pointer pointer;
+    typedef typename Iterator::reference reference;
+    typedef typename Iterator::iterator_category iterator_category;
+  };
+}
+
+template <typename T, typename Ptr, typename Ref> struct __vector_iterator {
+  typedef __vector_iterator<T, T *, T &> iterator;
+  typedef __vector_iterator<T, const T *, const T &> const_iterator;
+
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef Ptr pointer;
+  typedef Ref reference;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  __vector_iterator(const Ptr p = 0) : ptr(p) {}
+  __vector_iterator(const iterator &rhs): ptr(rhs.base()) {}
+  __vector_iterator<T, Ptr, Ref>& operator++() { ++ ptr; return *this; }
+  __vector_iterator<T, Ptr, Ref> operator++(int) {
+    auto tmp = *this;
+    ++ ptr;
+    return tmp;
+  }
+  __vector_iterator<T, Ptr, Ref> operator--() { -- ptr; return *this; }
+  __vector_iterator<T, Ptr, Ref> operator--(int) {
+    auto tmp = *this; -- ptr;
+    return tmp;
+  }
+  __vector_iterator<T, Ptr, Ref> operator+(difference_type n) {
+    return ptr + n;
+  }
+  friend __vector_iterator<T, Ptr, Ref> operator+(
+      difference_type n,
+      const __vector_iterator<T, Ptr, Ref> &iter) {
+    return n + iter.ptr;
+  }
+  __vector_iterator<T, Ptr, Ref> operator-(difference_type n) {
+    return ptr - n;
+  }
+  __vector_iterator<T, Ptr, Ref> operator+=(difference_type n) {
+    return ptr += n;
+  }
+  __vector_iterator<T, Ptr, Ref> operator-=(difference_type n) {
+    return ptr -= n;
+  }
+
+  template<typename U, typename Ptr2, typename Ref2>
+  difference_type operator-(const __vector_iterator<U, Ptr2, Ref2> &rhs);
+
+  Ref operator*() const { return *ptr; }
+  Ptr operator->() const { return ptr; }
+
+  Ref operator[](difference_type n) {
+    return *(ptr+n);
+  }
+
+  bool operator==(const iterator &rhs) const { return ptr == rhs.ptr; }
+  bool operator==(const const_iterator &rhs) const { return ptr == rhs.ptr; }
+
+  bool operator!=(const iterator &rhs) const { return ptr != rhs.ptr; }
+  bool operator!=(const const_iterator &rhs) const { return ptr != rhs.ptr; }
+
+  const Ptr& base() const { return ptr; }
+
+private:
+  Ptr ptr;
+};
+
+template <typename T, typename Ptr, typename Ref> struct __deque_iterator {
+  typedef __deque_iterator<T, T *, T &> iterator;
+  typedef __deque_iterator<T, const T *, const T &> const_iterator;
+
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef Ptr pointer;
+  typedef Ref reference;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  __deque_iterator(const Ptr p = 0) : ptr(p) {}
+  __deque_iterator(const iterator &rhs): ptr(rhs.base()) {}
+  __deque_iterator<T, Ptr, Ref>& operator++() { ++ ptr; return *this; }
+  __deque_iterator<T, Ptr, Ref> operator++(int) {
+    auto tmp = *this;
+    ++ ptr;
+    return tmp;
+  }
+  __deque_iterator<T, Ptr, Ref> operator--() { -- ptr; return *this; }
+  __deque_iterator<T, Ptr, Ref> operator--(int) {
+    auto tmp = *this; -- ptr;
+    return tmp;
+  }
+  __deque_iterator<T, Ptr, Ref> operator+(difference_type n) {
+    return ptr + n;
+  }
+  friend __deque_iterator<T, Ptr, Ref> operator+(
+      difference_type n,
+      const __deque_iterator<T, Ptr, Ref> &iter) {
+    return n + iter.ptr;
+  }
+  __deque_iterator<T, Ptr, Ref> operator-(difference_type n) {
+    return ptr - n;
+  }
+  __deque_iterator<T, Ptr, Ref> operator+=(difference_type n) {
+    return ptr += n;
+  }
+  __deque_iterator<T, Ptr, Ref> operator-=(difference_type n) {
+    return ptr -= n;
+  }
+
+  Ref operator*() const { return *ptr; }
+  Ptr operator->() const { return ptr; }
+
+  Ref operator[](difference_type n) {
+    return *(ptr+n);
+  }
+
+  bool operator==(const iterator &rhs) const { return ptr == rhs.ptr; }
+  bool operator==(const const_iterator &rhs) const { return ptr == rhs.ptr; }
+
+  bool operator!=(const iterator &rhs) const { return ptr != rhs.ptr; }
+  bool operator!=(const const_iterator &rhs) const { return ptr != rhs.ptr; }
+
+  const Ptr& base() const { return ptr; }
+
+private:
+  Ptr ptr;
+};
+
+template <typename T, typename Ptr, typename Ref> struct __list_iterator {
+  typedef __list_iterator<T, __typeof__(T::data) *, __typeof__(T::data) &> iterator;
+  typedef __list_iterator<T, const __typeof__(T::data) *, const __typeof__(T::data) &> const_iterator;
+
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef Ptr pointer;
+  typedef Ref reference;
+  typedef std::bidirectional_iterator_tag iterator_category;
+
+  __list_iterator(T* it = 0) : item(it) {}
+  __list_iterator(const iterator &rhs): item(rhs.item) {}
+  __list_iterator<T, Ptr, Ref>& operator++() { item = item->next; return *this; }
+  __list_iterator<T, Ptr, Ref> operator++(int) {
+    auto tmp = *this;
+    item = item->next;
+    return tmp;
+  }
+  __list_iterator<T, Ptr, Ref> operator--() { item = item->prev; return *this; }
+  __list_iterator<T, Ptr, Ref> operator--(int) {
+    auto tmp = *this;
+    item = item->prev;
+    return tmp;
+  }
+
+  Ref operator*() const { return item->data; }
+  Ptr operator->() const { return &item->data; }
+
+  bool operator==(const iterator &rhs) const { return item == rhs->item; }
+  bool operator==(const const_iterator &rhs) const { return item == rhs->item; }
+
+  bool operator!=(const iterator &rhs) const { return item != rhs->item; }
+  bool operator!=(const const_iterator &rhs) const { return item != rhs->item; }
+
+  const T* &base() const { return item; }
+
+  template <typename UT, typename UPtr, typename URef>
+  friend struct __list_iterator;
+
+private:
+  T* item;
+};
+
+template <typename T, typename Ptr, typename Ref> struct __fwdl_iterator {
+  typedef __fwdl_iterator<T, __typeof__(T::data) *, __typeof__(T::data) &> iterator;
+  typedef __fwdl_iterator<T, const __typeof__(T::data) *, const __typeof__(T::data) &> const_iterator;
+
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef Ptr pointer;
+  typedef Ref reference;
+  typedef std::forward_iterator_tag iterator_category;
+
+  __fwdl_iterator(T* it = 0) : item(it) {}
+  __fwdl_iterator(const iterator &rhs): item(rhs.item) {}
+  __fwdl_iterator<T, Ptr, Ref>& operator++() { item = item->next; return *this; }
+  __fwdl_iterator<T, Ptr, Ref> operator++(int) {
+    auto tmp = *this;
+    item = item->next;
+    return tmp;
+  }
+  Ref operator*() const { return item->data; }
+  Ptr operator->() const { return &item->data; }
+
+  bool operator==(const iterator &rhs) const { return item == rhs->item; }
+  bool operator==(const const_iterator &rhs) const { return item == rhs->item; }
+
+  bool operator!=(const iterator &rhs) const { return item != rhs->item; }
+  bool operator!=(const const_iterator &rhs) const { return item != rhs->item; }
+
+  const T* &base() const { return item; }
+
+  template <typename UT, typename UPtr, typename URef>
+  friend struct __fwdl_iterator;
+
+private:
+  T* item;
+};
+
+namespace std {
+  template <class T1, class T2>
+  struct pair {
+    T1 first;
+    T2 second;
+
+    pair() : first(), second() {}
+    pair(const T1 &a, const T2 &b) : first(a), second(b) {}
+
+    template<class U1, class U2>
+    pair(const pair<U1, U2> &other) : first(other.first),
+                                      second(other.second) {}
+  };
+
+  typedef __typeof__(sizeof(int)) size_t;
+
+  template <class T> class initializer_list;
+
+  template< class T > struct remove_reference      {typedef T type;};
+  template< class T > struct remove_reference<T&>  {typedef T type;};
+  template< class T > struct remove_reference<T&&> {typedef T type;};
+
+  template<class T>
+  typename remove_reference<T>::type&& move(T&& a) {
+    typedef typename remove_reference<T>::type&& RvalRef;
+    return static_cast<RvalRef>(a);
+  }
+
+  template <class T>
+  void swap(T &a, T &b) {
+    T c(std::move(a));
+    a = std::move(b);
+    b = std::move(c);
+  }
+
+  template<typename T>
+  class vector {
+    T *_start;
+    T *_finish;
+    T *_end_of_storage;
+
+  public:
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef __vector_iterator<T, T *, T &> iterator;
+    typedef __vector_iterator<T, const T *, const T &> const_iterator;
+
+    vector() : _start(0), _finish(0), _end_of_storage(0) {}
+    template <typename InputIterator>
+    vector(InputIterator first, InputIterator last);
+    vector(const vector &other);
+    vector(vector &&other);
+    explicit vector(size_type count);
+    ~vector();
+
+    size_t size() const {
+      return size_t(_finish - _start);
+    }
+    void resize(size_type __sz);
+
+    vector& operator=(const vector &other);
+    vector& operator=(vector &&other);
+    vector& operator=(std::initializer_list<T> ilist);
+
+    void assign(size_type count, const T &value);
+    template <typename InputIterator >
+    void assign(InputIterator first, InputIterator last);
+    void assign(std::initializer_list<T> ilist);
+
+    void clear();
+
+    void push_back(const T &value);
+    void push_back(T &&value);
+    template<class... Args>
+    void emplace_back(Args&&... args);
+    void pop_back();
+
+    iterator insert(const_iterator position, const value_type &val);
+    iterator insert(const_iterator position, size_type n,
+                    const value_type &val);
+    template <typename InputIterator>
+    iterator insert(const_iterator position, InputIterator first,
+                    InputIterator last);
+    iterator insert(const_iterator position, value_type &&val);
+    iterator insert(const_iterator position, initializer_list<value_type> il);
+
+    template <class... Args>
+    iterator emplace(const_iterator position, Args&&... args);
+
+    iterator erase(const_iterator position);
+    iterator erase(const_iterator first, const_iterator last);
+
+    T &operator[](size_t n) {
+      return _start[n];
+    }
+
+    const T &operator[](size_t n) const {
+      return _start[n];
+    }
+
+    iterator begin() { return iterator(_start); }
+    const_iterator begin() const { return const_iterator(_start); }
+    const_iterator cbegin() const { return const_iterator(_start); }
+    iterator end() { return iterator(_finish); }
+    const_iterator end() const { return const_iterator(_finish); }
+    const_iterator cend() const { return const_iterator(_finish); }
+    T& front() { return *begin(); }
+    const T& front() const { return *begin(); }
+    T& back() { return *(end() - 1); }
+    const T& back() const { return *(end() - 1); }
+  };
+
+  template<typename T>
+  class list {
+    struct __item {
+      T data;
+      __item *prev, *next;
+    } *_start, *_finish;
+
+  public:
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef __list_iterator<__item, T *, T &> iterator;
+    typedef __list_iterator<__item, const T *, const T &> const_iterator;
+
+    list() : _start(0), _finish(0) {}
+    template <typename InputIterator>
+    list(InputIterator first, InputIterator last);
+    list(const list &other);
+    list(list &&other);
+    ~list();
+
+    list& operator=(const list &other);
+    list& operator=(list &&other);
+    list& operator=(std::initializer_list<T> ilist);
+
+    void assign(size_type count, const T &value);
+    template <typename InputIterator >
+    void assign(InputIterator first, InputIterator last);
+    void assign(std::initializer_list<T> ilist);
+
+    void clear();
+
+    void push_back(const T &value);
+    void push_back(T &&value);
+    template<class... Args>
+    void emplace_back(Args&&... args);
+    void pop_back();
+
+    void push_front(const T &value);
+    void push_front(T &&value);
+    template<class... Args>
+    void emplace_front(Args&&... args);
+    void pop_front();
+
+    iterator insert(const_iterator position, const value_type &val);
+    iterator insert(const_iterator position, size_type n,
+                    const value_type &val);
+    template <typename InputIterator>
+    iterator insert(const_iterator position, InputIterator first,
+                    InputIterator last);
+    iterator insert(const_iterator position, value_type &&val);
+    iterator insert(const_iterator position, initializer_list<value_type> il);
+
+    template <class... Args>
+    iterator emplace(const_iterator position, Args&&... args);
+
+    iterator erase(const_iterator position);
+    iterator erase(const_iterator first, const_iterator last);
+
+    iterator begin() { return iterator(_start); }
+    const_iterator begin() const { return const_iterator(_start); }
+    const_iterator cbegin() const { return const_iterator(_start); }
+    iterator end() { return iterator(_finish); }
+    const_iterator end() const { return const_iterator(_finish); }
+    const_iterator cend() const { return const_iterator(_finish); }
+
+    T& front() { return *begin(); }
+    const T& front() const { return *begin(); }
+    T& back() { return *--end(); }
+    const T& back() const { return *--end(); }
+  };
+
+  template<typename T>
+  class deque {
+    T *_start;
+    T *_finish;
+    T *_end_of_storage;
+
+  public:
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef __deque_iterator<T, T *, T &> iterator;
+    typedef __deque_iterator<T, const T *, const T &> const_iterator;
+
+    deque() : _start(0), _finish(0), _end_of_storage(0) {}
+    template <typename InputIterator>
+    deque(InputIterator first, InputIterator last);
+    deque(const deque &other);
+    deque(deque &&other);
+    ~deque();
+
+    size_t size() const {
+      return size_t(_finish - _start);
+    }
+
+    deque& operator=(const deque &other);
+    deque& operator=(deque &&other);
+    deque& operator=(std::initializer_list<T> ilist);
+
+    void assign(size_type count, const T &value);
+    template <typename InputIterator >
+    void assign(InputIterator first, InputIterator last);
+    void assign(std::initializer_list<T> ilist);
+
+    void clear();
+
+    void push_back(const T &value);
+    void push_back(T &&value);
+    template<class... Args>
+    void emplace_back(Args&&... args);
+    void pop_back();
+
+    void push_front(const T &value);
+    void push_front(T &&value);
+    template<class... Args>
+    void emplace_front(Args&&... args);
+    void pop_front();
+
+    iterator insert(const_iterator position, const value_type &val);
+    iterator insert(const_iterator position, size_type n,
+                    const value_type &val);
+    template <typename InputIterator>
+    iterator insert(const_iterator position, InputIterator first,
+                    InputIterator last);
+    iterator insert(const_iterator position, value_type &&val);
+    iterator insert(const_iterator position, initializer_list<value_type> il);
+
+    template <class... Args>
+    iterator emplace(const_iterator position, Args&&... args);
+
+    iterator erase(const_iterator position);
+    iterator erase(const_iterator first, const_iterator last);
+
+    T &operator[](size_t n) {
+      return _start[n];
+    }
+
+    const T &operator[](size_t n) const {
+      return _start[n];
+    }
+
+    iterator begin() { return iterator(_start); }
+    const_iterator begin() const { return const_iterator(_start); }
+    const_iterator cbegin() const { return const_iterator(_start); }
+    iterator end() { return iterator(_finish); }
+    const_iterator end() const { return const_iterator(_finish); }
+    const_iterator cend() const { return const_iterator(_finish); }
+    T& front() { return *begin(); }
+    const T& front() const { return *begin(); }
+    T& back() { return *(end() - 1); }
+    const T& back() const { return *(end() - 1); }
+  };
+
+  template<typename T>
+  class forward_list {
+    struct __item {
+      T data;
+      __item *next;
+    } *_start;
+
+  public:
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef __fwdl_iterator<__item, T *, T &> iterator;
+    typedef __fwdl_iterator<__item, const T *, const T &> const_iterator;
+
+    forward_list() : _start(0) {}
+    template <typename InputIterator>
+    forward_list(InputIterator first, InputIterator last);
+    forward_list(const forward_list &other);
+    forward_list(forward_list &&other);
+    ~forward_list();
+
+    forward_list& operator=(const forward_list &other);
+    forward_list& operator=(forward_list &&other);
+    forward_list& operator=(std::initializer_list<T> ilist);
+
+    void assign(size_type count, const T &value);
+    template <typename InputIterator >
+    void assign(InputIterator first, InputIterator last);
+    void assign(std::initializer_list<T> ilist);
+
+    void clear();
+
+    void push_front(const T &value);
+    void push_front(T &&value);
+    template<class... Args>
+    void emplace_front(Args&&... args);
+    void pop_front();
+
+    iterator insert_after(const_iterator position, const value_type &val);
+    iterator insert_after(const_iterator position, value_type &&val);
+    iterator insert_after(const_iterator position, size_type n,
+                          const value_type &val);
+    template <typename InputIterator>
+    iterator insert_after(const_iterator position, InputIterator first,
+                          InputIterator last);
+    iterator insert_after(const_iterator position,
+                          initializer_list<value_type> il);
+
+    template <class... Args>
+    iterator emplace_after(const_iterator position, Args&&... args);
+
+    iterator erase_after(const_iterator position);
+    iterator erase_after(const_iterator first, const_iterator last);
+
+    iterator begin() { return iterator(_start); }
+    const_iterator begin() const { return const_iterator(_start); }
+    const_iterator cbegin() const { return const_iterator(_start); }
+    iterator end() { return iterator(); }
+    const_iterator end() const { return const_iterator(); }
+    const_iterator cend() const { return const_iterator(); }
+
+    T& front() { return *begin(); }
+    const T& front() const { return *begin(); }
+  };
+
+  template <typename CharT>
+  class basic_string {
+    class Allocator {};
+
+  public:
+    basic_string() : basic_string(Allocator()) {}
+    explicit basic_string(const Allocator &alloc);
+    basic_string(size_type count, CharT ch,
+                 const Allocator &alloc = Allocator());
+    basic_string(const basic_string &other,
+                 size_type pos,
+                 const Allocator &alloc = Allocator());
+    basic_string(const basic_string &other,
+                 size_type pos, size_type count,
+                 const Allocator &alloc = Allocator());
+    basic_string(const CharT *s, size_type count,
+                 const Allocator &alloc = Allocator());
+    basic_string(const CharT *s,
+                 const Allocator &alloc = Allocator());
+    template <class InputIt>
+    basic_string(InputIt first, InputIt last,
+                 const Allocator &alloc = Allocator());
+    basic_string(const basic_string &other);
+    basic_string(const basic_string &other,
+                 const Allocator &alloc);
+    basic_string(basic_string &&other);
+    basic_string(basic_string &&other,
+                 const Allocator &alloc);
+    basic_string(std::initializer_list<CharT> ilist,
+                 const Allocator &alloc = Allocator());
+    template <class T>
+    basic_string(const T &t, size_type pos, size_type n,
+                 const Allocator &alloc = Allocator());
+    // basic_string(std::nullptr_t) = delete;
+
+    ~basic_string();
+    void clear();
+
+    basic_string &operator=(const basic_string &str);
+    basic_string &operator+=(const basic_string &str);
+
+    const CharT *c_str() const;
+    const CharT *data() const;
+    CharT *data();
+
+    const char *begin() const;
+    const char *end() const;
+
+    basic_string &append(size_type count, CharT ch);
+    basic_string &assign(size_type count, CharT ch);
+    basic_string &erase(size_type index, size_type count);
+    basic_string &insert(size_type index, size_type count, CharT ch);
+    basic_string &replace(size_type pos, size_type count, const basic_string &str);
+    void pop_back();
+    void push_back(CharT ch);
+    void reserve(size_type new_cap);
+    void resize(size_type count);
+    void shrink_to_fit();
+    void swap(basic_string &other);
+  };
+
+  typedef basic_string<char> string;
+  typedef basic_string<wchar_t> wstring;
+#if __cplusplus >= 201103L
+  typedef basic_string<char16_t> u16string;
+  typedef basic_string<char32_t> u32string;
+#endif
+
+  class exception {
+  public:
+    exception() throw();
+    virtual ~exception() throw();
+    virtual const char *what() const throw() {
+      return 0;
+    }
+  };
+
+  class bad_alloc : public exception {
+    public:
+    bad_alloc() throw();
+    bad_alloc(const bad_alloc&) throw();
+    bad_alloc& operator=(const bad_alloc&) throw();
+    virtual const char* what() const throw() {
+      return 0;
+    }
+  };
+
+  struct nothrow_t {};
+  extern const nothrow_t nothrow;
+
+  enum class align_val_t : size_t {};
+
+  // libc++'s implementation
+  template <class _E>
+  class initializer_list
+  {
+    const _E* __begin_;
+    size_t    __size_;
+
+    initializer_list(const _E* __b, size_t __s)
+      : __begin_(__b),
+        __size_(__s)
+    {}
+
+  public:
+    typedef _E        value_type;
+    typedef const _E& reference;
+    typedef const _E& const_reference;
+    typedef size_t    size_type;
+
+    typedef const _E* iterator;
+    typedef const _E* const_iterator;
+
+    initializer_list() : __begin_(0), __size_(0) {}
+
+    size_t    size()  const {return __size_;}
+    const _E* begin() const {return __begin_;}
+    const _E* end()   const {return __begin_ + __size_;}
+  };
+
+  template <bool, class _Tp = void> struct enable_if {};
+  template <class _Tp> struct enable_if<true, _Tp> {typedef _Tp type;};
+
+  template <class _Tp, _Tp __v>
+  struct integral_constant
+  {
+      static const _Tp      value = __v;
+      typedef _Tp               value_type;
+      typedef integral_constant type;
+
+     operator value_type() const {return value;}
+
+     value_type operator ()() const {return value;}
+  };
+
+  template <class _Tp, _Tp __v>
+  const _Tp integral_constant<_Tp, __v>::value;
+
+    template <class _Tp, class _Arg>
+    struct is_trivially_assignable
+      : integral_constant<bool, __is_trivially_assignable(_Tp, _Arg)>
+    {
+    };
+
+  typedef integral_constant<bool,true>  true_type;
+  typedef integral_constant<bool,false> false_type;
+
+  template <class _Tp> struct is_const            : public false_type {};
+  template <class _Tp> struct is_const<_Tp const> : public true_type {};
+
+  template <class _Tp> struct  is_reference        : public false_type {};
+  template <class _Tp> struct  is_reference<_Tp&>  : public true_type {};
+
+  template <class _Tp, class _Up> struct  is_same           : public false_type {};
+  template <class _Tp>            struct  is_same<_Tp, _Tp> : public true_type {};
+
+  template <class _Tp, bool = is_const<_Tp>::value || is_reference<_Tp>::value    >
+  struct __add_const             {typedef _Tp type;};
+
+  template <class _Tp>
+  struct __add_const<_Tp, false> {typedef const _Tp type;};
+
+  template <class _Tp> struct add_const {typedef typename __add_const<_Tp>::type type;};
+
+  template <class _Tp> struct  remove_const            {typedef _Tp type;};
+  template <class _Tp> struct  remove_const<const _Tp> {typedef _Tp type;};
+
+  template <class _Tp> struct  add_lvalue_reference    {typedef _Tp& type;};
+
+  template <class _Tp> struct is_trivially_copy_assignable
+      : public is_trivially_assignable<typename add_lvalue_reference<_Tp>::type,
+            typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};
+
+    template<class InputIter, class OutputIter>
+    OutputIter __copy(InputIter II, InputIter IE, OutputIter OI) {
+      while (II != IE)
+        *OI++ = *II++;
+
+      return OI;
+    }
+
+  template <class _Tp, class _Up>
+  inline
+  typename enable_if
+  <
+      is_same<typename remove_const<_Tp>::type, _Up>::value &&
+      is_trivially_copy_assignable<_Up>::value,
+      _Up*
+  >::type __copy(_Tp* __first, _Tp* __last, _Up* __result) {
+      size_t __n = __last - __first;
+
+      if (__n > 0)
+        memmove(__result, __first, __n * sizeof(_Up));
+
+      return __result + __n;
+    }
+
+  template<class InputIter, class OutputIter>
+  OutputIter copy(InputIter II, InputIter IE, OutputIter OI) {
+    return __copy(II, IE, OI);
+  }
+
+  template <class _BidirectionalIterator, class _OutputIterator>
+  inline
+  _OutputIterator
+  __copy_backward(_BidirectionalIterator __first, _BidirectionalIterator __last,
+                  _OutputIterator __result)
+  {
+      while (__first != __last)
+          *--__result = *--__last;
+      return __result;
+  }
+
+  template <class _Tp, class _Up>
+  inline
+  typename enable_if
+  <
+      is_same<typename remove_const<_Tp>::type, _Up>::value &&
+      is_trivially_copy_assignable<_Up>::value,
+      _Up*
+  >::type __copy_backward(_Tp* __first, _Tp* __last, _Up* __result) {
+      size_t __n = __last - __first;
+
+    if (__n > 0)
+    {
+        __result -= __n;
+        memmove(__result, __first, __n * sizeof(_Up));
+    }
+    return __result;
+  }
+
+  template<class InputIter, class OutputIter>
+  OutputIter copy_backward(InputIter II, InputIter IE, OutputIter OI) {
+    return __copy_backward(II, IE, OI);
+  }
+}
+
+template <class BidirectionalIterator, class Distance>
+void __advance(BidirectionalIterator& it, Distance n,
+               std::bidirectional_iterator_tag)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 2
+{
+  if (n >= 0) while(n-- > 0) ++it; else while (n++<0) --it;
+}
+#else
+    ;
+#endif
+
+template <class RandomAccessIterator, class Distance>
+void __advance(RandomAccessIterator& it, Distance n,
+               std::random_access_iterator_tag)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 2
+{
+  it += n;
+}
+#else
+    ;
+#endif
+
+namespace std {
+
+template <class InputIterator, class Distance>
+void advance(InputIterator& it, Distance n)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 1
+{
+  __advance(it, n, typename InputIterator::iterator_category());
+}
+#else
+    ;
+#endif
+
+template <class BidirectionalIterator>
+BidirectionalIterator
+prev(BidirectionalIterator it,
+     typename iterator_traits<BidirectionalIterator>::difference_type n =
+         1)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 0
+{
+  advance(it, -n);
+  return it;
+}
+#else
+    ;
+#endif
+
+template <class ForwardIterator>
+ForwardIterator
+next(ForwardIterator it,
+     typename iterator_traits<ForwardIterator>::difference_type n =
+         1)
+#if !defined(STD_ADVANCE_INLINE_LEVEL) || STD_ADVANCE_INLINE_LEVEL > 0
+{
+  advance(it, n);
+  return it;
+}
+#else
+    ;
+#endif
+
+  template <class InputIt, class T>
+  InputIt find(InputIt first, InputIt last, const T& value);
+
+  template <class ExecutionPolicy, class ForwardIt, class T>
+  ForwardIt find(ExecutionPolicy&& policy, ForwardIt first, ForwardIt last,
+                 const T& value);
+
+  template <class InputIt, class UnaryPredicate>
+  InputIt find_if (InputIt first, InputIt last, UnaryPredicate p);
+
+  template <class ExecutionPolicy, class ForwardIt, class UnaryPredicate>
+  ForwardIt find_if (ExecutionPolicy&& policy, ForwardIt first, ForwardIt last,
+                     UnaryPredicate p);
+
+  template <class InputIt, class UnaryPredicate>
+  InputIt find_if_not (InputIt first, InputIt last, UnaryPredicate q);
+
+  template <class ExecutionPolicy, class ForwardIt, class UnaryPredicate>
+  ForwardIt find_if_not (ExecutionPolicy&& policy, ForwardIt first,
+                         ForwardIt last, UnaryPredicate q);
+
+  template <class InputIt, class ForwardIt>
+  InputIt find_first_of(InputIt first, InputIt last,
+                         ForwardIt s_first, ForwardIt s_last);
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2>
+  ForwardIt1 find_first_of (ExecutionPolicy&& policy,
+                            ForwardIt1 first, ForwardIt1 last,
+                            ForwardIt2 s_first, ForwardIt2 s_last);
+
+  template <class InputIt, class ForwardIt, class BinaryPredicate>
+  InputIt find_first_of (InputIt first, InputIt last,
+                         ForwardIt s_first, ForwardIt s_last,
+                         BinaryPredicate p );
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2,
+            class BinaryPredicate>
+  ForwardIt1 find_first_of (ExecutionPolicy&& policy,
+                            ForwardIt1 first, ForwardIt1 last,
+                            ForwardIt2 s_first, ForwardIt2 s_last,
+                            BinaryPredicate p );
+
+  template <class InputIt, class ForwardIt>
+  InputIt find_end(InputIt first, InputIt last,
+                   ForwardIt s_first, ForwardIt s_last);
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2>
+  ForwardIt1 find_end (ExecutionPolicy&& policy,
+                       ForwardIt1 first, ForwardIt1 last,
+                       ForwardIt2 s_first, ForwardIt2 s_last);
+
+  template <class InputIt, class ForwardIt, class BinaryPredicate>
+  InputIt find_end (InputIt first, InputIt last,
+                    ForwardIt s_first, ForwardIt s_last,
+                    BinaryPredicate p );
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2,
+            class BinaryPredicate>
+  ForwardIt1 find_end (ExecutionPolicy&& policy,
+                       ForwardIt1 first, ForwardIt1 last,
+                       ForwardIt2 s_first, ForwardIt2 s_last,
+                       BinaryPredicate p );
+
+  template <class ForwardIt, class T>
+  ForwardIt lower_bound (ForwardIt first, ForwardIt last, const T& value);
+
+  template <class ForwardIt, class T, class Compare>
+  ForwardIt lower_bound (ForwardIt first, ForwardIt last, const T& value,
+                         Compare comp);
+
+  template <class ForwardIt, class T>
+  ForwardIt upper_bound (ForwardIt first, ForwardIt last, const T& value);
+
+  template <class ForwardIt, class T, class Compare>
+  ForwardIt upper_bound (ForwardIt first, ForwardIt last, const T& value,
+                         Compare comp);
+
+  template <class ForwardIt1, class ForwardIt2>
+  ForwardIt1 search (ForwardIt1 first, ForwardIt1 last,
+                     ForwardIt2 s_first, ForwardIt2 s_last);
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2>
+  ForwardIt1 search (ExecutionPolicy&& policy,
+                     ForwardIt1 first, ForwardIt1 last,
+                     ForwardIt2 s_first, ForwardIt2 s_last);
+
+  template <class ForwardIt1, class ForwardIt2, class BinaryPredicate>
+  ForwardIt1 search (ForwardIt1 first, ForwardIt1 last,
+                     ForwardIt2 s_first, ForwardIt2 s_last, BinaryPredicate p);
+
+  template <class ExecutionPolicy, class ForwardIt1, class ForwardIt2,
+            class BinaryPredicate >
+  ForwardIt1 search (ExecutionPolicy&& policy,
+                     ForwardIt1 first, ForwardIt1 last,
+                     ForwardIt2 s_first, ForwardIt2 s_last, BinaryPredicate p);
+
+  template <class ForwardIt, class Searcher>
+  ForwardIt search (ForwardIt first, ForwardIt last, const Searcher& searcher);
+
+  template <class ForwardIt, class Size, class T>
+  ForwardIt search_n (ForwardIt first, ForwardIt last, Size count,
+                      const T& value);
+
+  template <class ExecutionPolicy, class ForwardIt, class Size, class T>
+  ForwardIt search_n (ExecutionPolicy&& policy, ForwardIt first, ForwardIt last,
+                      Size count, const T& value);
+
+  template <class ForwardIt, class Size, class T, class BinaryPredicate>
+  ForwardIt search_n (ForwardIt first, ForwardIt last, Size count,
+                      const T& value, BinaryPredicate p);
+
+  template <class ExecutionPolicy, class ForwardIt, class Size, class T,
+            class BinaryPredicate>
+  ForwardIt search_n (ExecutionPolicy&& policy, ForwardIt first, ForwardIt last,
+                      Size count, const T& value, BinaryPredicate p);
+
+  template <class InputIterator, class OutputIterator>
+  OutputIterator copy(InputIterator first, InputIterator last,
+                      OutputIterator result);
+
+}
+
+#if __cplusplus >= 201103L
+namespace std {
+template <typename T> // TODO: Implement the stub for deleter.
+class unique_ptr {
+public:
+  unique_ptr() noexcept {}
+  unique_ptr(T *) noexcept {}
+  unique_ptr(const unique_ptr &) noexcept = delete;
+  unique_ptr(unique_ptr &&) noexcept;
+
+  T *get() const noexcept;
+  T *release() noexcept;
+  void reset(T *p = nullptr) noexcept;
+  void swap(unique_ptr<T> &p) noexcept;
+
+  typename std::add_lvalue_reference<T>::type operator*() const;
+  T *operator->() const noexcept;
+  operator bool() const noexcept;
+  unique_ptr<T> &operator=(unique_ptr<T> &&p) noexcept;
+  unique_ptr<T> &operator=(nullptr_t) noexcept;
+};
+
+// TODO :: Once the deleter parameter is added update with additional template parameter.
+template <typename T>
+void swap(unique_ptr<T> &x, unique_ptr<T> &y) noexcept {
+  x.swap(y);
+}
+
+template <typename T1, typename T2>
+bool operator==(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator!=(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator<(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator>(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator<=(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T1, typename T2>
+bool operator>=(const unique_ptr<T1> &x, const unique_ptr<T2> &y);
+
+template <typename T>
+bool operator==(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator!=(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator<(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator>(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator<=(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator>=(const unique_ptr<T> &x, nullptr_t y);
+
+template <typename T>
+bool operator==(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator!=(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator>(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator<(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator>=(nullptr_t x, const unique_ptr<T> &y);
+
+template <typename T>
+bool operator<=(nullptr_t x, const unique_ptr<T> &y);
+
+template <class T, class... Args>
+unique_ptr<T> make_unique(Args &&...args);
+
+#if __cplusplus >= 202002L
+
+template <class T>
+unique_ptr<T> make_unique_for_overwrite();
+
+#endif
+
+} // namespace std
+#endif
+
+namespace std {
+template <class CharT>
+class basic_ostream;
+
+using ostream = basic_ostream<char>;
+
+extern std::ostream cout;
+
+ostream &operator<<(ostream &, const string &);
+
+#if __cplusplus >= 202002L
+template <class T>
+ostream &operator<<(ostream &, const std::unique_ptr<T> &);
+#endif
+} // namespace std
+
+#ifdef TEST_INLINABLE_ALLOCATORS
+namespace std {
+  void *malloc(size_t);
+  void free(void *);
+}
+void* operator new(std::size_t size, const std::nothrow_t&) throw() { return std::malloc(size); }
+void* operator new[](std::size_t size, const std::nothrow_t&) throw() { return std::malloc(size); }
+void operator delete(void* ptr, const std::nothrow_t&) throw() { std::free(ptr); }
+void operator delete[](void* ptr, const std::nothrow_t&) throw() { std::free(ptr); }
+#else
+// C++20 standard draft 17.6.1, from "Header <new> synopsis", but with throw()
+// instead of noexcept:
+
+void *operator new(std::size_t size);
+void *operator new(std::size_t size, std::align_val_t alignment);
+void *operator new(std::size_t size, const std::nothrow_t &) throw();
+void *operator new(std::size_t size, std::align_val_t alignment,
+                   const std::nothrow_t &) throw();
+void operator delete(void *ptr) throw();
+void operator delete(void *ptr, std::size_t size) throw();
+void operator delete(void *ptr, std::align_val_t alignment) throw();
+void operator delete(void *ptr, std::size_t size, std::align_val_t alignment) throw();
+void operator delete(void *ptr, const std::nothrow_t &)throw();
+void operator delete(void *ptr, std::align_val_t alignment,
+                     const std::nothrow_t &)throw();
+void *operator new[](std::size_t size);
+void *operator new[](std::size_t size, std::align_val_t alignment);
+void *operator new[](std::size_t size, const std::nothrow_t &) throw();
+void *operator new[](std::size_t size, std::align_val_t alignment,
+                     const std::nothrow_t &) throw();
+void operator delete[](void *ptr) throw();
+void operator delete[](void *ptr, std::size_t size) throw();
+void operator delete[](void *ptr, std::align_val_t alignment) throw();
+void operator delete[](void *ptr, std::size_t size, std::align_val_t alignment) throw();
+void operator delete[](void *ptr, const std::nothrow_t &) throw();
+void operator delete[](void *ptr, std::align_val_t alignment,
+                       const std::nothrow_t &) throw();
+#endif
+
+void* operator new (std::size_t size, void* ptr) throw() { return ptr; };
+void* operator new[] (std::size_t size, void* ptr) throw() { return ptr; };
+void operator delete (void* ptr, void*) throw() {};
+void operator delete[] (void* ptr, void*) throw() {};
+
+namespace __cxxabiv1 {
+extern "C" {
+extern char *__cxa_demangle(const char *mangled_name,
+                            char *output_buffer,
+                            size_t *length,
+                            int *status);
+}}
+namespace abi = __cxxabiv1;
+
+namespace std {
+  template<class ForwardIt>
+  bool is_sorted(ForwardIt first, ForwardIt last);
+
+  template <class RandomIt>
+  void nth_element(RandomIt first, RandomIt nth, RandomIt last);
+
+  template<class RandomIt>
+  void partial_sort(RandomIt first, RandomIt middle, RandomIt last);
+
+  template<class RandomIt>
+  void sort (RandomIt first, RandomIt last);
+
+  template<class RandomIt>
+  void stable_sort(RandomIt first, RandomIt last);
+
+  template<class BidirIt, class UnaryPredicate>
+  BidirIt partition(BidirIt first, BidirIt last, UnaryPredicate p);
+
+  template<class BidirIt, class UnaryPredicate>
+  BidirIt stable_partition(BidirIt first, BidirIt last, UnaryPredicate p);
+}
+
+namespace std {
+
+template< class T = void >
+struct less;
+
+template< class T >
+struct allocator;
+
+template< class Key >
+struct hash;
+
+template<
+  class Key,
+  class Compare = std::less<Key>,
+  class Alloc = std::allocator<Key>
+> class set {
+  public:
+    set(initializer_list<Key> __list) {}
+
+    class iterator {
+    public:
+      iterator(Key *key): ptr(key) {}
+      iterator& operator++() { ++ptr; return *this; }
+      bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+      const Key &operator*() const { return *ptr; }
+    private:
+      Key *ptr;
+    };
+
+  public:
+    Key *val;
+    iterator begin() const { return iterator(val); }
+    iterator end() const { return iterator(val + 1); }
+};
+
+template<
+  class Key,
+  class Hash = std::hash<Key>,
+  class Compare = std::less<Key>,
+  class Alloc = std::allocator<Key>
+> class unordered_set {
+  public:
+    unordered_set(initializer_list<Key> __list) {}
+
+    class iterator {
+    public:
+      iterator(Key *key): ptr(key) {}
+      iterator& operator++() { ++ptr; return *this; }
+      bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+      const Key &operator*() const { return *ptr; }
+    private:
+      Key *ptr;
+    };
+
+  public:
+    Key *val;
+    iterator begin() const { return iterator(val); }
+    iterator end() const { return iterator(val + 1); }
+};
+
+namespace execution {
+class sequenced_policy {};
+}
+
+template <class T = void> struct equal_to {};
+
+template <class ForwardIt, class BinaryPredicate = std::equal_to<> >
+class default_searcher {
+public:
+  default_searcher (ForwardIt pat_first,
+                    ForwardIt pat_last,
+                    BinaryPredicate pred = BinaryPredicate());
+  template <class ForwardIt2>
+  std::pair <ForwardIt2, ForwardIt2>
+  operator()( ForwardIt2 first, ForwardIt2 last ) const;
+};
+
+template <typename> class packaged_task;
+template <typename Ret, typename... Args> class packaged_task<Ret(Args...)> {
+  // TODO: Add some actual implementation.
+};
+
+#if __has_feature(cxx_decltype)
+typedef decltype(nullptr) nullptr_t;
+
+template<class _Tp>
+class shared_ptr
+{
+public:
+  constexpr shared_ptr(nullptr_t);
+  explicit shared_ptr(_Tp* __p);
+
+  shared_ptr(shared_ptr&& __r) { }
+
+  ~shared_ptr();
+
+  // shared_ptr& operator=(shared_ptr&& __r);
+  shared_ptr<_Tp>& operator=(const shared_ptr& __r) noexcept
+  {
+      return *this;
+  }
+
+  template<class _Yp>
+  shared_ptr<_Tp>& operator=(const shared_ptr<_Yp>& __r) noexcept
+  {
+      return *this;
+  }
+
+  shared_ptr<_Tp>& operator=(shared_ptr&& __r) noexcept
+  {
+      return *this;
+  }
+
+  template<class _Yp>
+  shared_ptr<_Tp>& operator=(shared_ptr<_Yp>&& __r)
+  {
+      return *this;
+  }
+};
+
+template<class _Tp>
+inline
+constexpr
+shared_ptr<_Tp>::shared_ptr(nullptr_t) {
+}
+
+#endif // __has_feature(cxx_decltype)
+
+template <typename T, typename... Args>
+  shared_ptr<T> make_shared(Args &&...args) {
+    return shared_ptr<T>(new T(static_cast<Args &&>(args)...));
+  }
+
+template<typename T, unsigned N> struct array {
+  T arr[N];
+  typedef T value_type;
+  typedef value_type* iterator;
+  constexpr iterator begin() { return iterator(arr); }
+  constexpr iterator end() { return iterator(arr + N); }
+};
+
+} // namespace std
diff --git a/clang/test/CIR/Incubator/Inputs/typeinfo b/clang/test/CIR/Incubator/Inputs/typeinfo
new file mode 100644
index 0000000000000..a68b10302c6fe
--- /dev/null
+++ b/clang/test/CIR/Incubator/Inputs/typeinfo
@@ -0,0 +1,24 @@
+namespace std {
+  class type_info {
+  public:
+    virtual ~type_info();
+    const char* name() const { return __name; }
+    bool operator==(const type_info& __arg) const {
+     return __name == __arg.__name;
+    }
+
+    bool operator!=(const type_info& __arg) const {
+      return !operator==(__arg);
+    }
+
+    bool before(const type_info& __arg) const {
+      return __name < __arg.__name;
+    }
+
+    unsigned long hash_code() const {
+      return reinterpret_cast<unsigned long long>(__name);
+    }
+  protected:
+    const char *__name;
+  };
+}
diff --git a/clang/test/CIR/Incubator/Lowering/OpenMP/barrier.cir b/clang/test/CIR/Incubator/Lowering/OpenMP/barrier.cir
new file mode 100644
index 0000000000000..145117ab54a0c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/OpenMP/barrier.cir
@@ -0,0 +1,15 @@
+
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering | FileCheck %s
+
+
+module {
+  cir.func @omp_barrier_1() {
+    omp.barrier
+    cir.return
+  }
+}
+
+// CHECK: define void @omp_barrier_1()
+// CHECK: call i32 @__kmpc_global_thread_num(ptr {{.*}})
+// CHECK: call void @__kmpc_barrier(ptr {{.*}}, i32 {{.*}})
+// CHECK: ret void
diff --git a/clang/test/CIR/Incubator/Lowering/OpenMP/parallel.cir b/clang/test/CIR/Incubator/Lowering/OpenMP/parallel.cir
new file mode 100644
index 0000000000000..3422eac75ea0b
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/OpenMP/parallel.cir
@@ -0,0 +1,35 @@
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+module {
+    cir.func @omp_parallel() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    omp.parallel {
+      cir.scope {
+        %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+        %3 = cir.const #cir.int<1> : !s32i
+        cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+        %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.const #cir.int<1> : !s32i
+        %6 = cir.binop(add, %4, %5) : !s32i
+        cir.store %6, %0 : !s32i, !cir.ptr<!s32i>
+      }
+      omp.terminator
+    }
+    cir.return
+  }
+}
+// CHECK-LABEL: omp_parallel
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call({{.*}}, ptr @omp_parallel..omp_par,
+// CHECK: ret void
+// CHECK-NEXT: }
+// CHECK: define{{.*}} void @omp_parallel..omp_par(ptr
+// CHECK: %[[XVar:.*]] = load ptr, ptr %{{.*}}, align 8
+// CHECK: %[[YVar:.*]] = load ptr, ptr %{{.*}}, align 8
+// CHECK: store i32 1, ptr %[[XVar]], align 4
+// CHECK: %[[XVal:.*]] = load i32, ptr %[[XVar]], align 4
+// CHECK: %[[BinOp:.*]] = add i32 %[[XVal]], 1
+// CHECK: store i32 %[[BinOp]], ptr %[[YVar]], align 4
+// CHECK: ret
diff --git a/clang/test/CIR/Incubator/Lowering/OpenMP/taskwait.cir b/clang/test/CIR/Incubator/Lowering/OpenMP/taskwait.cir
new file mode 100644
index 0000000000000..83e8119bc479d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/OpenMP/taskwait.cir
@@ -0,0 +1,14 @@
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering | FileCheck %s
+
+
+module {
+  cir.func @omp_taskwait_1() {
+    omp.taskwait
+    cir.return
+  }
+}
+
+// CHECK: define void @omp_taskwait_1()
+// CHECK: call i32 @__kmpc_global_thread_num(ptr {{.*}})
+// CHECK: call i32 @__kmpc_omp_taskwait(ptr {{.*}}, i32 {{.*}})
+// CHECK: ret void
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/OpenMP/taskyield.cir b/clang/test/CIR/Incubator/Lowering/OpenMP/taskyield.cir
new file mode 100644
index 0000000000000..a701365b798fc
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/OpenMP/taskyield.cir
@@ -0,0 +1,14 @@
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering | FileCheck %s
+
+
+module {
+  cir.func @omp_taskyield_1() {
+    omp.taskyield
+    cir.return
+  }
+}
+
+// CHECK: define void @omp_taskyield_1()
+// CHECK: call i32 @__kmpc_global_thread_num(ptr {{.*}})
+// CHECK: call i32 @__kmpc_omp_taskyield(ptr {{.*}}, i32 {{.*}}, i32 {{.*}})
+// CHECK: ret void
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/abs.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/abs.cir
new file mode 100644
index 0000000000000..e5a4dd4d095ff
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/abs.cir
@@ -0,0 +1,23 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.int<-1> : !s32i
+    %1 = cir.const #cir.int<-2> : !s64i
+    %4 = cir.abs %0 : !s32i
+    %5 = cir.abs %1 : !s64i
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant -1 : i32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant -2 : i64
+// CHECK-NEXT:     %{{.+}} = math.absi %[[C0]] : i32
+// CHECK-NEXT:     %{{.+}} = math.absi %[[C1]] : i64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/acos.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/acos.cir
new file mode 100644
index 0000000000000..f885c7adea7d4
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/acos.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.0> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.0> : !cir.double
+    %3 = cir.const #cir.fp<4.00> : !cir.long_double<!cir.double>
+    %4 = cir.acos %0 : !cir.float
+    %5 = cir.acos %1 : !cir.long_double<!cir.f80>
+    %6 = cir.acos %2 : !cir.double
+    %7 = cir.acos %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.000000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.acos %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.acos %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.acos %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.acos %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/address-space-mlir.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/address-space-mlir.cir
new file mode 100644
index 0000000000000..e9c8e4e2417fc
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/address-space-mlir.cir
@@ -0,0 +1,67 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.global external lang_address_space(offload_global) @addrspace1 = #cir.int<1> : !s32i
+  // MLIR: memref.global "public" @addrspace1 : memref<1xi32, #gpu.address_space<global>> = dense<1>
+
+  cir.global "private" internal lang_address_space(offload_local) @addrspace2 : !s32i
+  // MLIR: memref.global "private" @addrspace2 : memref<1xi32, #gpu.address_space<workgroup>>
+
+  cir.global external target_address_space(7) @addrspace3 = #cir.int<3> : !s32i
+  // MLIR: memref.global "public" @addrspace3 : memref<1xi32, 7> = dense<3>
+
+  // MLIR: func.func @test_get_global_op() {
+  cir.func @test_get_global_op() {
+    // MLIR-NEXT: memref.get_global @addrspace1 : memref<1xi32, #gpu.address_space<global>>
+    %0 = cir.get_global @addrspace1 : !cir.ptr<!s32i, lang_address_space(offload_global)>
+    cir.load %0 : !cir.ptr<!s32i, lang_address_space(offload_global)>, !s32i
+
+    // MLIR: memref.get_global @addrspace2 : memref<1xi32, #gpu.address_space<workgroup>>
+    %1 = cir.get_global @addrspace2 : !cir.ptr<!s32i, lang_address_space(offload_local)>
+    cir.load %1 : !cir.ptr<!s32i, lang_address_space(offload_local)>, !s32i
+
+    // MLIR: memref.get_global @addrspace3 : memref<1xi32, 7>
+    %2 = cir.get_global @addrspace3 : !cir.ptr<!s32i, target_address_space(7)>
+    cir.load %2 : !cir.ptr<!s32i, target_address_space(7)>, !s32i
+    cir.return
+  }
+
+  // MLIR: func.func @foo(%arg0: memref<?xi32>) {
+  cir.func @foo(%arg0: !cir.ptr<!s32i>) {
+    // MLIR-NEXT: memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xi32>>
+    %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // MLIR: func.func @bar(%arg0: memref<?xi32, 1>) {
+  cir.func @bar(%arg0: !cir.ptr<!s32i, target_address_space(1)>) {
+    // MLIR-NEXT: memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xi32, 1>>
+    %0 = cir.alloca !cir.ptr<!s32i, target_address_space(1)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(1)>>, ["arg", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // MLIR: func.func @baz(%arg0: memref<?xi32>) {
+  cir.func @baz(%arg0: !cir.ptr<!s32i, target_address_space(0)>) {
+    // MLIR-NEXT: memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xi32>>
+    %0 = cir.alloca !cir.ptr<!s32i, target_address_space(0)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(0)>>, ["arg", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // MLIR: func.func @test_lower_offload_as() {
+  cir.func @test_lower_offload_as() {
+    %0 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_private)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_private)>>, ["arg0", init] {alignment = 8 : i64}
+    // MLIR-NEXT: memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xi32, #gpu.address_space<private>>>
+    %1 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_global)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_global)>>, ["arg1", init] {alignment = 8 : i64}
+    // MLIR: memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xi32, #gpu.address_space<global>>>
+    %2 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_constant)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_constant)>>, ["arg2", init] {alignment = 8 : i64}
+    // MLIR: memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xi32>>
+    %3 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_local)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_local)>>, ["arg3", init] {alignment = 8 : i64}
+    // MLIR: memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xi32, #gpu.address_space<workgroup>>>
+    %4 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_generic)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_generic)>>, ["arg4", init] {alignment = 8 : i64}
+    // MLIR: memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xi32>>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/array.c b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/array.c
new file mode 100644
index 0000000000000..d551b0f4caaee
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/array.c
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s --check-prefix=MLIR
+
+int test_array1() {
+    // CIR-LABEL: cir.func {{.*}} @test_array1
+    // CIR: %[[ARRAY:.*]] = cir.alloca !cir.array<!s32i x 3>, !cir.ptr<!cir.array<!s32i x 3>>, ["a"] {alignment = 4 : i64}
+    // CIR: %{{.*}} = cir.get_element %[[ARRAY]][{{.*}}] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!s32i>
+
+    // MLIR-LABEL: func @test_array1
+    // MLIR: %{{.*}} = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+    // MLIR: %[[ARRAY:.*]] = memref.alloca() {alignment = 4 : i64} : memref<3xi32>
+    // MLIR: %{{.*}} = memref.load %[[ARRAY]][%{{.*}}] : memref<3xi32>
+    int a[3];
+    return a[1];
+}
+
+int test_array2() {
+    // CIR-LABEL: cir.func {{.*}} @test_array2
+    // CIR: %[[ARRAY:.*]] = cir.alloca !cir.array<!cir.array<!s32i x 4> x 3>, !cir.ptr<!cir.array<!cir.array<!s32i x 4> x 3>>, ["a"] {alignment = 16 : i64}
+    // CIR: %{{.*}} = cir.get_element %[[ARRAY]][%{{.*}}] : (!cir.ptr<!cir.array<!cir.array<!s32i x 4> x 3>>, !s32i) -> !cir.ptr<!cir.array<!s32i x 4>>
+    // CIR: %{{.*}} = cir.get_element %{{.*}}[%{{.*}}] : (!cir.ptr<!cir.array<!s32i x 4>>, !s32i) -> !cir.ptr<!s32i>
+
+    // MLIR-LABEL: func @test_array2
+    // MLIR: %{{.*}} = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+    // MLIR: %[[ARRAY:.*]] = memref.alloca() {alignment = 16 : i64} : memref<3x4xi32>
+    // MLIR: %{{.*}} = memref.load %[[ARRAY]][%{{.*}}, %{{.*}}] : memref<3x4xi32>
+    int a[3][4];
+    return a[1][2];
+}
+
+int test_array3() {
+    // CIR-LABEL: cir.func {{.*}} @test_array3()
+    // CIR: %[[ARRAY:.*]] = cir.alloca !cir.array<!s32i x 3>, !cir.ptr<!cir.array<!s32i x 3>>, ["a"] {alignment = 4 : i64}
+    // CIR: %[[ELEM1:.*]] = cir.get_element %[[ARRAY]][{{.*}}] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!s32i>
+    // CIR: {{.*}} = cir.load align(4) %[[ELEM1]] : !cir.ptr<!s32i>, !s32i
+    // CIR: %[[ELEM2:.*]] = cir.get_element %[[ARRAY]][{{.*}}] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!s32i>
+    // CIR: %{{.*}} = cir.load align(4) %[[ELEM2]] : !cir.ptr<!s32i>, !s32i
+    // CIR: cir.store align(4) {{.*}}, %[[ELEM2]] : !s32i, !cir.ptr<!s32i>
+    // CIR: %[[ELEM3:.*]] = cir.get_element %[[ARRAY]][{{.*}}] : (!cir.ptr<!cir.array<!s32i x 3>>, !s32i) -> !cir.ptr<!s32i>
+    // CIR: %{{.*}} = cir.load align(4) %[[ELEM3]] : !cir.ptr<!s32i>, !s32i
+
+    // MLIR-LABEL: func @test_array3
+    // MLIR: %{{.*}} = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+    // MLIR: %[[ARRAY:.*]] = memref.alloca() {alignment = 4 : i64} : memref<3xi32>
+    // MLIR: %[[IDX1:.*]] = arith.index_cast %{{.*}} : i32 to index
+    // MLIR: %{{.*}} = memref.load %[[ARRAY]][%[[IDX1]]] : memref<3xi32>
+    // MLIR: %[[IDX2:.*]] = arith.index_cast %{{.*}} : i32 to index
+    // MLIR: %{{.*}} = memref.load %[[ARRAY]][%[[IDX2]]] : memref<3xi32>
+    // MLIR: memref.store %{{.*}}, %[[ARRAY]][%[[IDX2]]] : memref<3xi32>
+    // MLIR: %[[IDX3:.*]] = arith.index_cast %{{.*}} : i32 to index
+    // MLIR: %{{.*}} = memref.load %[[ARRAY]][%[[IDX3]]] : memref<3xi32>
+    int a[3];
+    a[0] += a[2];
+    return a[1];
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/array.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/array.cir
new file mode 100644
index 0000000000000..cf22a2a41579d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/array.cir
@@ -0,0 +1,17 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a"] {alignment = 16 : i64}
+    cir.return
+  }
+}
+
+// CHECK: module {
+// CHECK: func @foo() {
+// CHECK:    = memref.alloca() {alignment = 16 : i64} : memref<10xi32>
+// CHECK:    return
+// CHECK:  }
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/asin.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/asin.cir
new file mode 100644
index 0000000000000..cf004f000d201
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/asin.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %1 = cir.const #cir.fp<1.0> : !cir.float
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    %3 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.f80>
+    %4 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.double>
+    %5 = cir.asin %1 : !cir.float
+    %6 = cir.asin %2 : !cir.double
+    %7 = cir.asin %3 : !cir.long_double<!cir.f80>
+    %8 = cir.asin %4 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 1.000000e+00 : f80
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.asin %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.asin %[[C1]] : f64
+// CHECK-NEXT:     %{{.+}} = math.asin %[[C2]] : f80
+// CHECK-NEXT:     %{{.+}} = math.asin %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/atan.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/atan.cir
new file mode 100644
index 0000000000000..86ad5ed60beb2
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/atan.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %1 = cir.const #cir.fp<1.0> : !cir.float
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    %3 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.f80>
+    %4 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.double>
+    %5 = cir.atan %1 : !cir.float
+    %6 = cir.atan %2 : !cir.double
+    %7 = cir.atan %3 : !cir.long_double<!cir.f80>
+    %8 = cir.atan %4 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 1.000000e+00 : f80
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.atan %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.atan %[[C1]] : f64
+// CHECK-NEXT:     %{{.+}} = math.atan %[[C2]] : f80
+// CHECK-NEXT:     %{{.+}} = math.atan %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/binop.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/binop.cpp
new file mode 100644
index 0000000000000..83ef9c3d1411c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/binop.cpp
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void testSignedIntBinOps(int a, int b) {
+  int x = a * b;
+  x = x / b;
+  x = x % b;
+  x = x + b;
+  x = x - b;
+  x = x >> b;
+  x = x << b;
+  x = x & b;
+  x = x ^ b;
+  x = x | b;
+}
+
+// CHECK: func.func @_Z19testSignedIntBinOpsii
+// CHECK:   %[[VAR2:.*]] = arith.muli %[[VAR0:.*]], %[[VAR1:.*]] : i32
+// CHECK:   %[[VAR5:.*]] = arith.divsi %[[VAR3:.*]], %[[VAR4:.*]] : i32
+// CHECK:   %[[VAR8:.*]] = arith.remsi %[[VAR6:.*]], %[[VAR7:.*]] : i32
+// CHECK:   %[[VAR11:.*]] = arith.addi %[[VAR9:.*]], %[[VAR10:.*]] : i32
+// CHECK:   %[[VAR14:.*]] = arith.subi %[[VAR12:.*]], %[[VAR13:.*]] : i32
+// CHECK:   %[[VAR18:.*]] = arith.shrsi %[[VAR15:.*]], %[[VAR16:.*]] : i32
+// CHECK:   %[[VAR22:.*]] = arith.shli %[[VAR19:.*]], %[[VAR20:.*]] : i32
+// CHECK:   %[[VAR25:.*]] = arith.andi %[[VAR23:.*]], %[[VAR24:.*]] : i32
+// CHECK:   %[[VAR28:.*]] = arith.xori %[[VAR26:.*]], %[[VAR27:.*]] : i32
+// CHECK:   %[[VAR31:.*]] = arith.ori %[[VAR29:.*]], %[[VAR30:.*]] : i32
+// CHECK: }
+
+void testUnSignedIntBinOps(unsigned a, unsigned b) {
+  unsigned x = a * b;
+  x = x / b;
+  x = x % b;
+  x = x + b;
+  x = x - b;
+  x = x >> b;
+  x = x << b;
+  x = x & b;
+  x = x ^ b;
+  x = x | b;
+}
+
+// CHECK: func.func @_Z21testUnSignedIntBinOpsjj
+// CHECK:   %[[VAR2:.*]] = arith.muli %[[VAR0:.*]], %[[VAR1:.*]] : i32
+// CHECK:   %[[VAR5:.*]] = arith.divui %[[VAR3:.*]], %[[VAR4:.*]] : i32
+// CHECK:   %[[VAR8:.*]] = arith.remui %[[VAR6:.*]], %[[VAR7:.*]] : i32
+// CHECK:   %[[VAR11:.*]] = arith.addi %[[VAR9:.*]], %[[VAR10:.*]] : i32
+// CHECK:   %[[VAR14:.*]] = arith.subi %[[VAR12:.*]], %[[VAR13:.*]] : i32
+// CHECK:   %[[VAR18:.*]] = arith.shrui %[[VAR15:.*]], %[[VAR16:.*]] : i32
+// CHECK:   %[[VAR22:.*]] = arith.shli %[[VAR19:.*]], %[[VAR20:.*]] : i32
+// CHECK:   %[[VAR25:.*]] = arith.andi %[[VAR23:.*]], %[[VAR24:.*]] : i32
+// CHECK:   %[[VAR28:.*]] = arith.xori %[[VAR26:.*]], %[[VAR27:.*]] : i32
+// CHECK:   %[[VAR31:.*]] = arith.ori %[[VAR29:.*]], %[[VAR30:.*]] : i32
+// CHECK: }
+
+void testFloatingPointBinOps(float a, float b, double c, double d) {
+  float e = a * b;
+  e = a / b;
+  e = a + b;
+  e = a - b;
+
+  double f = a * b;
+  f = c * d;
+  f = c / d;
+  f = c + d;
+  f = c - d;
+}
+
+// CHECK: func.func @_Z23testFloatingPointBinOpsffdd
+// CHECK:   %[[VAR2:.*]] = arith.mulf %[[VAR0:.*]], %[[VAR1:.*]] : f32
+// CHECK:   %[[VAR5:.*]] = arith.divf %[[VAR3:.*]], %[[VAR4:.*]] : f32
+// CHECK:   %[[VAR8:.*]] = arith.addf %[[VAR6:.*]], %[[VAR7:.*]] : f32
+// CHECK:   %[[VAR11:.*]] = arith.subf %[[VAR9:.*]], %[[VAR10:.*]] : f32
+// CHECK:   %[[VAR14:.*]] = arith.mulf %[[VAR12:.*]], %[[VAR13:.*]] : f64
+// CHECK:   %[[VAR18:.*]] = arith.divf %[[VAR16:.*]], %[[VAR17:.*]] : f64
+// CHECK:   %[[VAR22:.*]] = arith.addf %[[VAR20:.*]], %[[VAR21:.*]] : f64
+// CHECK:   %[[VAR26:.*]] = arith.subf %[[VAR24:.*]], %[[VAR25:.*]] : f64
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/bit.c b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/bit.c
new file mode 100644
index 0000000000000..e6302a5b14f22
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/bit.c
@@ -0,0 +1,133 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+int clz_u16(unsigned short x) {
+  return __builtin_clzs(x);
+}
+// CHECK-LABEL: clz_u16
+// CHECK: %[[CTLZ:.+]] = math.ctlz %[[INPUT:.+]] : i16
+// CHECK: %[[EXTUI:.+]] = arith.extui %[[CTLZ]] : i16 to i32
+
+int clz_u32(unsigned x) {
+  return __builtin_clz(x);
+}
+// CHECK-LABEL: clz_u32
+// CHECK: %[[CTLZ:.+]] = math.ctlz %[[INPUT:.+]] : i32
+// CHECK: %[[BITCAST:.+]] = arith.bitcast %[[CTLZ]] : i32 to i32
+
+int clz_u64(unsigned long x) {
+  return __builtin_clzl(x);
+}
+// CHECK-LABEL: clz_u64
+// CHECK: %[[CTLZ:.+]] = math.ctlz %[[INPUT:.+]] : i64
+// CHECK: %[[TRUNCI:.+]] = arith.trunci %[[CTLZ]] : i64 to i32
+
+int ctz_u16(unsigned short x) {
+  return __builtin_ctzs(x);
+}
+// CHECK-LABEL: ctz_u16
+// CHECK: %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i16
+// CHECK: %[[EXTUI:.+]] = arith.extui %[[CTTZ]] : i16 to i32
+
+int ctz_u32(unsigned x) {
+  return __builtin_ctz(x);
+}
+// CHECK-LABEL: ctz_u32
+// CHECK: %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i32
+// CHECK: %[[BITCAST:.+]] = arith.bitcast %[[CTTZ]] : i32 to i32
+
+int ctz_u64(unsigned long x) {
+  return __builtin_ctzl(x);
+}
+// CHECK-LABEL: ctz_u64
+// CHECK: %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i64
+// CHECK: %[[TRUNCI:.+]] = arith.trunci %[[CTTZ]] : i64 to i32
+
+int popcount_u16(unsigned short x) {
+  return __builtin_popcountg(x);
+}
+// CHECK-LABEL: popcount_u16
+// CHECK: %[[CTPOP:.+]] = math.ctpop %{{.+}} : i16
+// CHECK-NEXT: %{{.+}} = arith.extui %[[CTPOP]] : i16 to i32
+
+int popcount_u32(unsigned x) {
+  return __builtin_popcount(x);
+}
+// CHECK-LABEL: popcount_u32
+// CHECK: %[[CTPOP:.+]] = math.ctpop %{{.+}} : i32
+// CHECK-NEXT: %[[BITCAST:.+]] = arith.bitcast %[[CTPOP]] : i32 to i32
+
+int popcount_u64(unsigned long x) {
+  return __builtin_popcountl(x);
+}
+// CHECK-LABEL: popcount_u64
+// CHECK: %[[CTPOP:.+]] = math.ctpop %[[INPUT:.+]] : i64
+// CHECK-NEXT: %[[TRUNCI:.+]] = arith.trunci %[[CTPOP]] : i64 to i32
+
+int clrsb_s32(int x) {
+  return __builtin_clrsb(x);
+}
+// CHECK-LABEL: clrsb_s32
+// CHECK: %[[C0_I32:.+]] = arith.constant 0 : i32
+// CHECK-NEXT: %[[CMP:.+]] = arith.cmpi slt, %[[INPUT:.+]], %[[C0_I32]] : i32
+// CHECK-NEXT: %[[C_MINUS1_I32:.+]] = arith.constant -1 : i32
+// CHECK-NEXT: %[[XORI:.+]] = arith.xori %[[INPUT]], %[[C_MINUS1_I32]] : i32
+// CHECK-NEXT: %[[SELECT:.+]] = arith.select %[[CMP]], %[[XORI]], %[[INPUT]] : i32
+// CHECK-NEXT: %[[CTLZ:.+]] = math.ctlz %[[SELECT]] : i32
+// CHECK-NEXT: %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK-NEXT: %[[SUBI:.+]] = arith.subi %[[CTLZ]], %[[C1_I32]] : i32
+
+int clrsb_s64(long x) {
+  return __builtin_clrsbl(x);
+}
+// CHECK-LABEL: clrsb_s64
+// CHECK: %[[C0_I64:.+]] = arith.constant 0 : i64
+// CHECK-NEXT: %[[CMP:.+]] = arith.cmpi slt, %[[INPUT:.+]], %[[C0_I64]] : i64
+// CHECK-NEXT: %[[C_MINUS1_I64:.+]] = arith.constant -1 : i64
+// CHECK-NEXT: %[[XORI:.+]] = arith.xori %[[INPUT]], %[[C_MINUS1_I64]] : i64
+// CHECK-NEXT: %[[SELECT:.+]] = arith.select %[[CMP]], %[[XORI]], %[[INPUT]] : i64
+// CHECK-NEXT: %[[CTLZ:.+]] = math.ctlz %[[SELECT]] : i64
+// CHECK-NEXT: %[[C1_I64:.+]] = arith.constant 1 : i64
+// CHECK-NEXT: %[[SUBI:.+]] = arith.subi %[[CTLZ]], %[[C1_I64]] : i64
+// CHECK-NEXT: %[[TRUNCI:.+]] = arith.trunci %[[SUBI]] : i64 to i32
+
+int ffs_s32(int x) {
+  return __builtin_ffs(x);
+}
+// CHECK-LABEL: ffs_s32
+// CHECK: %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i32
+// CHECK-NEXT: %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK-NEXT: %[[ADDI:.+]] = arith.addi %[[CTTZ]], %[[C1_I32]] : i32
+// CHECK-NEXT: %[[C0_I32:.+]] = arith.constant 0 : i32
+// CHECK-NEXT: %[[CMPI:.+]] = arith.cmpi eq, %[[INPUT]], %[[C0_I32]] : i32
+// CHECK-NEXT: %[[SELECT:.+]] = arith.select %[[CMPI]], %[[C0_I32]], %[[ADDI]] : i32
+
+int ffs_s64(long x) {
+  return __builtin_ffsl(x);
+}
+// CHECK-LABEL: ffs_s64
+// CHECK: %[[CTTZ:.+]] = math.cttz %[[INPUT:.+]] : i64
+// CHECK-NEXT: %[[C1_I64:.+]] = arith.constant 1 : i64
+// CHECK-NEXT: %[[ADDI:.+]] = arith.addi %[[CTTZ]], %[[C1_I64]] : i64
+// CHECK-NEXT: %[[C0_I64:.+]] = arith.constant 0 : i64
+// CHECK-NEXT: %[[CMPI:.+]] = arith.cmpi eq, %[[INPUT]], %[[C0_I64]] : i64
+// CHECK-NEXT: %[[SELECT:.+]] = arith.select %[[CMPI]], %[[C0_I64]], %[[ADDI]] : i64
+// CHECK-NEXT: %[[TRUNCI:.+]] = arith.trunci %[[SELECT]] : i64 to i32
+
+int parity_u32(unsigned x) {
+  return __builtin_parity(x);
+}
+// CHECK-LABEL: parity_u32
+// CHECK: %[[CTPOP:.+]] = math.ctpop %[[INPUT:.+]] : i32
+// CHECK-NEXT: %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK-NEXT: %[[ANDI:.+]] = arith.andi %[[CTPOP]], %[[C1_I32]] : i32
+// CHECK-NEXT: %[[BITCAST:.+]] = arith.bitcast %[[ANDI]] : i32 to i32
+
+int parity_u64(unsigned long x) {
+  return __builtin_parityl(x);
+}
+// CHECK-LABEL: func.func @parity_u64(%arg0: i64{{.*}}) -> i32 {
+// CHECK: %[[CTPOP:.+]] = math.ctpop %[[INPUT:.+]] : i64
+// CHECK-NEXT: %[[C1_I64:.+]] = arith.constant 1 : i64
+// CHECK-NEXT: %[[ANDI:.+]] = arith.andi %[[CTPOP]], %[[C1_I64]] : i64
+// CHECK-NEXT: %[[TRUNCI:.+]] = arith.trunci %[[ANDI]] : i64 to i32
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/bool.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/bool.cir
new file mode 100644
index 0000000000000..7bd429c4e481e
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/bool.cir
@@ -0,0 +1,25 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -canonicalize -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a", init] {alignment = 1 : i64}
+    %1 = cir.const #true
+    cir.store %1, %0 : !cir.bool, !cir.ptr<!cir.bool>
+    cir.return
+  }
+}
+
+// MLIR: func @foo() {
+// MLIR: %[[VALUE:[a-z0-9]+]] = memref.alloca() {alignment = 1 : i64} : memref<1xi8>
+// MLIR: %[[CONST:.*]] = arith.constant true
+// MLIR: %[[BOOL_TO_MEM:.*]] = arith.extui %[[CONST]] : i1 to i8
+// MLIR: %[[CONST0:[a-z0-9]+]] = arith.constant 0 : index
+// MLIR-NEXT: memref.store %[[BOOL_TO_MEM]], %[[VALUE]][%[[CONST0]]] : memref<1xi8>
+// return
+
+// LLVM: = alloca i8, i64
+// LLVM: store i8 1, ptr %1
+// LLVM: ret
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/br-with-arg.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/br-with-arg.cir
new file mode 100644
index 0000000000000..11025fbbd69a7
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/br-with-arg.cir
@@ -0,0 +1,15 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @br() -> !s32i {
+    %0 = cir.const #cir.int<2> : !s32i
+    cir.br ^bb1(%0 : !s32i)
+  ^bb1(%v1 : !s32i):
+    cir.return %v1 : !s32i
+  // MLIR: %[[A:.*]] = arith.constant 2 : i32
+  // MLIR-NEXT:  cf.br ^bb1(%[[A]] : i32)
+  // MLIR-NEXT:^bb1(%[[V1:.*]]: i32):
+  // MLIR-NEXT:  return %[[V1]] : i32
+  }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/branch.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/branch.cir
new file mode 100644
index 0000000000000..89cd8849a3ca7
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/branch.cir
@@ -0,0 +1,35 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+cir.func @foo(%arg0: !cir.bool) -> !s32i {
+  cir.brcond %arg0 ^bb1, ^bb2
+  ^bb1:
+    %0 = cir.const #cir.int<1>: !s32i
+    cir.return %0 : !s32i
+  ^bb2:
+    %1 = cir.const #cir.int<0>: !s32i
+    cir.return %1 : !s32i
+}
+
+//      MLIR: module {
+// MLIR-NEXT:   func.func @foo(%arg0: i1) -> i32
+// MLIR-NEXT:     cf.cond_br %arg0, ^bb1, ^bb2
+// MLIR-NEXT:   ^bb1:  // pred: ^bb0
+// MLIR-NEXT:     %c1_i32 = arith.constant 1 : i32
+// MLIR-NEXT:     return %c1_i32 : i32
+// MLIR-NEXT:   ^bb2:  // pred: ^bb0
+// MLIR-NEXT:     %c0_i32 = arith.constant 0 : i32
+// MLIR-NEXT:     return %c0_i32 : i32
+// MLIR-NEXT:   }
+// MLIR-NEXT: }
+
+//       LLVM: define i32 @foo(i1 %0)
+//  LLVM-NEXT:   br i1 %0, label %[[TRUE:.*]], label %[[FALSE:.*]]
+// LLVM-EMPTY:
+//  LLVM-NEXT: [[TRUE]]:
+//  LLVM-NEXT:   ret i32 1
+// LLVM-EMPTY:
+//  LLVM-NEXT: [[FALSE]]:
+//  LLVM-NEXT:   ret i32 0
+//  LLVM-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/call.c b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/call.c
new file mode 100644
index 0000000000000..84dcfc4e64eb3
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/call.c
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void foo(int i) {}
+
+int test(void) {
+  foo(2);
+  return 0;
+}
+
+// CHECK-LABEL: func.func @test() -> i32 {
+//       CHECK:   %[[ARG:.+]] = arith.constant 2 : i32
+//  CHECK-NEXT:   call @foo(%[[ARG]]) : (i32) -> ()
+//       CHECK: }
+
+extern int printf(const char *str, ...);
+
+// CHECK-LABEL: llvm.func @printf(!llvm.ptr, ...) -> i32
+//       CHECK: llvm.mlir.global internal constant @[[FRMT_STR:.*]](dense<[37, 100, 44, 32, 37, 102, 44, 32, 37, 100, 44, 32, 37, 108, 108, 100, 44, 32, 37, 100, 44, 32, 37, 102, 10, 0]> : tensor<26xi8>) {addr_space = 0 : i32} : !llvm.array<26 x i8>
+
+void testfunc(short s, float X, char C, long long LL, int I, double D) {
+	printf("%d, %f, %d, %lld, %d, %f\n", s, X, C, LL, I, D);
+}
+
+// CHECK: func.func @testfunc(%[[ARG0:.*]]: i16 {{.*}}, %[[ARG1:.*]]: f32 {{.*}}, %[[ARG2:.*]]: i8 {{.*}}, %[[ARG3:.*]]: i64 {{.*}}, %[[ARG4:.*]]: i32 {{.*}}, %[[ARG5:.*]]: f64 {{.*}}) {
+// CHECK: %[[ALLOCA_S:.*]] = memref.alloca() {alignment = 2 : i64} : memref<1xi16>
+// CHECK: %[[ALLOCA_X:.*]] = memref.alloca() {alignment = 4 : i64} : memref<1xf32>
+// CHECK: %[[ALLOCA_C:.*]] = memref.alloca() {alignment = 1 : i64} : memref<1xi8>
+// CHECK: %[[ALLOCA_LL:.*]] = memref.alloca() {alignment = 8 : i64} : memref<1xi64>
+// CHECK: %[[ALLOCA_I:.*]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK: %[[ALLOCA_D:.*]] = memref.alloca() {alignment = 8 : i64} : memref<1xf64>
+// CHECK: memref.store %[[ARG0]], %[[ALLOCA_S]][{{%c0(_[0-9]+)?}}] : memref<1xi16>
+// CHECK: memref.store %[[ARG1]], %[[ALLOCA_X]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+// CHECK: memref.store %[[ARG2]], %[[ALLOCA_C]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+// CHECK: memref.store %[[ARG3]], %[[ALLOCA_LL]][{{%c0(_[0-9]+)?}}] : memref<1xi64>
+// CHECK: memref.store %[[ARG4]], %[[ALLOCA_I]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: memref.store %[[ARG5]], %[[ALLOCA_D]][{{%c0(_[0-9]+)?}}] : memref<1xf64>
+// CHECK: %[[FRMT_STR_ADDR:.*]] = llvm.mlir.addressof @[[FRMT_STR]] : !llvm.ptr 
+// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i8 
+// CHECK: %[[FRMT_STR_DATA:.*]] = llvm.getelementptr %[[FRMT_STR_ADDR]][%[[C0]], %[[C0]]] : (!llvm.ptr, i8, i8) -> !llvm.ptr, !llvm.array<26 x i8>
+// CHECK: %[[S:.*]] = memref.load %[[ALLOCA_S]][{{%c0(_[0-9]+)?}}] : memref<1xi16>
+// CHECK: %[[S_EXT:.*]] = arith.extsi %3 : i16 to i32 
+// CHECK: %[[X:.*]] = memref.load %[[ALLOCA_X]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+// CHECK: %[[X_EXT:.*]] = arith.extf %5 : f32 to f64 
+// CHECK: %[[C:.*]] = memref.load %[[ALLOCA_C]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+// CHECK: %[[C_EXT:.*]] = arith.extsi %7 : i8 to i32
+// CHECK: %[[LL:.*]] = memref.load %[[ALLOCA_LL]][{{%c0(_[0-9]+)?}}] : memref<1xi64>
+// CHECK: %[[I:.*]] = memref.load %[[ALLOCA_I]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[D:.*]] = memref.load %[[ALLOCA_D]][{{%c0(_[0-9]+)?}}] : memref<1xf64>
+// CHECK: {{.*}} = llvm.call @printf(%[[FRMT_STR_DATA]], %[[S_EXT]], %[[X_EXT]], %[[C_EXT]], %[[LL]], %[[I]], %[[D]]) vararg(!llvm.func<i32 (ptr, ...)>) : (!llvm.ptr, i32, f64, i32, i64, i32, f64) -> i32 
+// CHECK: return
+// CHECK: } 
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cast.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cast.cir
new file mode 100644
index 0000000000000..8c5f8f770965a
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cast.cir
@@ -0,0 +1,147 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+!s16i = !cir.int<s, 16>
+!u32i = !cir.int<u, 32>
+!u16i = !cir.int<u, 16>
+!u8i  = !cir.int<u, 8>
+module {
+  // MLIR-LABEL: func.func @cast_int_to_bool(%arg0: i32) -> i1
+  // LLVM-LABEL: define i1 @cast_int_to_bool(i32 %0)
+  cir.func @cast_int_to_bool(%i : !u32i) -> !cir.bool {
+    // MLIR-NEXT: %[[ZERO:.*]] = arith.constant 0 : i32
+    // MLIR-NEXT: arith.cmpi ne, %arg0, %[[ZERO]]
+    // LLVM-NEXT: icmp ne i32 %0, 0
+
+    %1 = cir.cast int_to_bool %i : !u32i -> !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  // MLIR-LABEL: func.func @cast_integral_trunc(%arg0: i32) -> i16
+  // LLVM-LABEL: define i16 @cast_integral_trunc(i32 %0)
+  cir.func @cast_integral_trunc(%i : !u32i) -> !u16i {
+    // MLIR-NEXT: arith.trunci %arg0 : i32 to i16
+    // LLVM-NEXT: trunc i32 %0 to i16
+
+    %1 = cir.cast integral %i : !u32i -> !u16i
+    cir.return %1 : !u16i
+  }
+  // MLIR-LABEL: func.func @cast_integral_extu(%arg0: i16) -> i32
+  // LLVM-LABEL: define i32 @cast_integral_extu(i16 %0)
+  cir.func @cast_integral_extu(%i : !u16i) -> !u32i {
+    // MLIR-NEXT: arith.extui %arg0 : i16 to i32
+    // LLVM-NEXT: zext i16 %0 to i32
+
+    %1 = cir.cast integral %i : !u16i -> !u32i
+    cir.return %1 : !u32i
+  }
+  // MLIR-LABEL: func.func @cast_integral_exts(%arg0: i16) -> i32
+  // LLVM-LABEL: define i32 @cast_integral_exts(i16 %0)
+  cir.func @cast_integral_exts(%i : !s16i) -> !s32i {
+    // MLIR-NEXT: arith.extsi %arg0 : i16 to i32
+    // LLVM-NEXT: sext i16 %0 to i32
+
+    %1 = cir.cast integral %i : !s16i -> !s32i
+    cir.return %1 : !s32i
+  }
+  // MLIR-LABEL: func.func @cast_integral_same_size(%arg0: i32) -> i32
+  // LLVM-LABEL: define i32 @cast_integral_same_size(i32 %0)
+  cir.func @cast_integral_same_size(%i : !u32i) -> !s32i {
+    // MLIR-NEXT: %0 = arith.bitcast %arg0 : i32 to i32
+    // LLVM-NEXT: ret i32 %0
+
+    %1 = cir.cast integral %i : !u32i -> !s32i
+    cir.return %1 : !s32i
+  }
+  // MLIR-LABEL: func.func @cast_floating_trunc(%arg0: f64) -> f32
+  // LLVM-LABEL: define float @cast_floating_trunc(double %0)
+  cir.func @cast_floating_trunc(%d : !cir.double) -> !cir.float {
+    // MLIR-NEXT: arith.truncf %arg0 : f64 to f32
+    // LLVM-NEXT: fptrunc double %0 to float
+
+    %1 = cir.cast floating %d : !cir.double -> !cir.float
+    cir.return %1 : !cir.float
+  }
+  // MLIR-LABEL: func.func @cast_floating_extf(%arg0: f32) -> f64
+  // LLVM-LABEL: define double @cast_floating_extf(float %0)
+  cir.func @cast_floating_extf(%f : !cir.float) -> !cir.double {
+    // MLIR-NEXT: arith.extf %arg0 : f32 to f64
+    // LLVM-NEXT: fpext float %0 to double
+
+    %1 = cir.cast floating %f : !cir.float -> !cir.double
+    cir.return %1 : !cir.double
+  }
+  // MLIR-LABEL: func.func @cast_float_to_bool(%arg0: f32) -> i1
+  // LLVM-LABEL: define i1 @cast_float_to_bool(float %0)
+  cir.func @cast_float_to_bool(%f : !cir.float) -> !cir.bool {
+    // MLIR-NEXT: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
+    // MLIR-NEXT: arith.cmpf une, %arg0, %[[ZERO]] : f32
+    // LLVM-NEXT: fcmp une float %0, 0.000000e+00
+
+    %1 = cir.cast float_to_bool %f : !cir.float -> !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  // MLIR-LABEL: func.func @cast_bool_to_int8(%arg0: i1) -> i8
+  // LLVM-LABEL: define i8 @cast_bool_to_int8(i1 %0)
+  cir.func @cast_bool_to_int8(%b : !cir.bool) -> !u8i {
+    // MLIR-NEXT: arith.extui %arg0 : i1 to i8
+    // LLVM-NEXT: zext i1 %0 to i8
+
+    %1 = cir.cast bool_to_int %b : !cir.bool -> !u8i
+    cir.return %1 : !u8i
+  }
+  // MLIR-LABEL: func.func @cast_bool_to_int(%arg0: i1) -> i32
+  // LLVM-LABEL: define i32 @cast_bool_to_int(i1 %0)
+  cir.func @cast_bool_to_int(%b : !cir.bool) -> !u32i {
+    // MLIR-NEXT: arith.extui %arg0 : i1 to i32
+    // LLVM-NEXT: zext i1 %0 to i32
+
+    %1 = cir.cast bool_to_int %b : !cir.bool -> !u32i
+    cir.return %1 : !u32i
+  }
+  // MLIR-LABEL: func.func @cast_bool_to_float(%arg0: i1) -> f32
+  // LLVM-LABEL: define float @cast_bool_to_float(i1 %0)
+  cir.func @cast_bool_to_float(%b : !cir.bool) -> !cir.float {
+    // MLIR-NEXT: arith.uitofp %arg0 : i1 to f32
+    // LLVM-NEXT: uitofp i1 %0 to float
+
+    %1 = cir.cast bool_to_float %b : !cir.bool -> !cir.float
+    cir.return %1 : !cir.float
+  }
+  // MLIR-LABEL: func.func @cast_signed_int_to_float(%arg0: i32) -> f32
+  // LLVM-LABEL: define float @cast_signed_int_to_float(i32 %0)
+  cir.func @cast_signed_int_to_float(%i : !s32i) -> !cir.float {
+    // MLIR-NEXT: arith.sitofp %arg0 : i32 to f32
+    // LLVM-NEXT: sitofp i32 %0 to float
+
+    %1 = cir.cast int_to_float %i : !s32i -> !cir.float
+    cir.return %1 : !cir.float
+  }
+  // MLIR-LABEL: func.func @cast_unsigned_int_to_float(%arg0: i32) -> f32
+  // LLVM-LABEL: define float @cast_unsigned_int_to_float(i32 %0)
+  cir.func @cast_unsigned_int_to_float(%i : !u32i) -> !cir.float {
+    // MLIR-NEXT: arith.uitofp %arg0 : i32 to f32
+    // LLVM-NEXT: uitofp i32 %0 to float
+
+    %1 = cir.cast int_to_float %i : !u32i -> !cir.float
+    cir.return %1 : !cir.float
+  }
+  // MLIR-LABEL: func.func @cast_float_to_int_signed(%arg0: f32) -> i32
+  // LLVM-LABEL: define i32 @cast_float_to_int_signed(float %0)
+  cir.func @cast_float_to_int_signed(%f : !cir.float) -> !s32i {
+    // MLIR-NEXT: arith.fptosi %arg0 : f32 to i32
+    // LLVM-NEXT: fptosi float %0 to i32
+
+    %1 = cir.cast float_to_int %f : !cir.float -> !s32i
+    cir.return %1 : !s32i
+  }
+  // MLIR-LABEL: func.func @cast_float_to_int_unsigned(%arg0: f32) -> i32
+  // LLVM-LABEL: define i32 @cast_float_to_int_unsigned(float %0)
+  cir.func @cast_float_to_int_unsigned(%f : !cir.float) -> !u32i {
+    // MLIR-NEXT: arith.fptoui %arg0 : f32 to i32
+    // LLVM-NEXT: fptoui float %0 to i32
+
+    %1 = cir.cast float_to_int %f : !cir.float -> !u32i
+    cir.return %1 : !u32i
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ceil.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ceil.cir
new file mode 100644
index 0000000000000..dce0012a451bb
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ceil.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.31> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.73> : !cir.double
+    %3 = cir.const #cir.fp<4.67> : !cir.long_double<!cir.double>
+    %4 = cir.ceil %0 : !cir.float
+    %5 = cir.ceil %1 : !cir.long_double<!cir.f80>
+    %6 = cir.ceil %2 : !cir.double
+    %7 = cir.ceil %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.310000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.730000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.670000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.ceil %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.ceil %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.ceil %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.ceil %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cl-kernel.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cl-kernel.cir
new file mode 100644
index 0000000000000..4d6ce9d89dd3b
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cl-kernel.cir
@@ -0,0 +1,14 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+
+#fn_attr = #cir<extra({cl.kernel = #cir.cl.kernel})>
+module {
+  cir.func @kernel1() extra(#fn_attr) {
+// MLIR: func.func @kernel1() attributes {gpu.kernel} {
+    cir.return
+  }
+  cir.func @func1() {
+// MLIR: func.func @func1() {
+    cir.return
+  }
+}
+
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cmp.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cmp.cpp
new file mode 100644
index 0000000000000..4ea8511d71803
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cmp.cpp
@@ -0,0 +1,182 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+bool testSignedIntCmpOps(int a, int b) {
+    // CHECK: %[[ALLOC1:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+    // CHECK: %[[ALLOC2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+    // CHECK: %[[ALLOC3:.+]] = memref.alloca() {alignment = 1 : i64} : memref<1xi8>
+    // CHECK: %[[ALLOC4:.+]] = memref.alloca() {alignment = 1 : i64} : memref<1xi8>
+    // CHECK: memref.store %arg0, %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: memref.store %arg1, %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+
+    bool x = a == b;
+
+    // CHECK: %[[LOAD0:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD1:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP0:.+]] = arith.cmpi eq, %[[LOAD0]], %[[LOAD1]] : i32
+    // CHECK: %[[EXT0:.+]] = arith.extui %[[CMP0]] : i1 to i8
+    // CHECK: memref.store %[[EXT0]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a != b;
+
+    // CHECK: %[[LOAD2:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD3:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP1:.+]] = arith.cmpi ne, %[[LOAD2]], %[[LOAD3]] : i32
+    // CHECK: %[[EXT1:.+]] = arith.extui %[[CMP1]] : i1 to i8
+    // CHECK: memref.store %[[EXT1]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a < b;
+
+    // CHECK: %[[LOAD4:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD5:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP2:.+]] = arith.cmpi slt, %[[LOAD4]], %[[LOAD5]] : i32
+    // CHECK: %[[EXT2:.+]] = arith.extui %[[CMP2]] : i1 to i8
+    // CHECK: memref.store %[[EXT2]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a <= b;
+
+    // CHECK: %[[LOAD6:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD7:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP3:.+]] = arith.cmpi sle, %[[LOAD6]], %[[LOAD7]] : i32
+    // CHECK: %[[EXT3:.+]] = arith.extui %[[CMP3]] : i1 to i8
+    // CHECK: memref.store %[[EXT3]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a > b;
+
+    // CHECK: %[[LOAD8:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD9:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP4:.+]] = arith.cmpi sgt, %[[LOAD8]], %[[LOAD9]] : i32
+    // CHECK: %[[EXT4:.+]] = arith.extui %[[CMP4]] : i1 to i8
+    // CHECK: memref.store %[[EXT4]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a >= b;
+
+    // CHECK: %[[LOAD10:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD11:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP5:.+]] = arith.cmpi sge, %[[LOAD10]], %[[LOAD11]] : i32
+    // CHECK: %[[EXT5:.+]] = arith.extui %[[CMP5]] : i1 to i8
+    // CHECK: memref.store %[[EXT5]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    return x;
+    // CHECK: return
+}
+
+bool testUnSignedIntBinOps(unsigned a, unsigned b) {
+    // CHECK: %[[ALLOC1:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+    // CHECK: %[[ALLOC2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+    // CHECK: %[[ALLOC3:.+]] = memref.alloca() {alignment = 1 : i64} : memref<1xi8>
+    // CHECK: %[[ALLOC4:.+]] = memref.alloca() {alignment = 1 : i64} : memref<1xi8>
+    // CHECK: memref.store %arg0, %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: memref.store %arg1, %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+
+    bool x = a == b;
+
+    // CHECK: %[[LOAD0:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD1:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP0:.+]] = arith.cmpi eq, %[[LOAD0]], %[[LOAD1]] : i32
+    // CHECK: %[[EXT0:.+]] = arith.extui %[[CMP0]] : i1 to i8
+    // CHECK: memref.store %[[EXT0]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a != b;
+
+    // CHECK: %[[LOAD2:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD3:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP1:.+]] = arith.cmpi ne, %[[LOAD2]], %[[LOAD3]] : i32
+    // CHECK: %[[EXT1:.+]] = arith.extui %[[CMP1]] : i1 to i8
+    // CHECK: memref.store %[[EXT1]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a < b;
+
+    // CHECK: %[[LOAD4:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD5:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP2:.+]] = arith.cmpi ult, %[[LOAD4]], %[[LOAD5]] : i32
+    // CHECK: %[[EXT2:.+]] = arith.extui %[[CMP2]] : i1 to i8
+    // CHECK: memref.store %[[EXT2]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a <= b;
+
+    // CHECK: %[[LOAD6:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD7:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP3:.+]] = arith.cmpi ule, %[[LOAD6]], %[[LOAD7]] : i32
+    // CHECK: %[[EXT3:.+]] = arith.extui %[[CMP3]] : i1 to i8
+    // CHECK: memref.store %[[EXT3]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a > b;
+
+    // CHECK: %[[LOAD8:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD9:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP4:.+]] = arith.cmpi ugt, %[[LOAD8]], %[[LOAD9]] : i32
+    // CHECK: %[[EXT4:.+]] = arith.extui %[[CMP4]] : i1 to i8
+    // CHECK: memref.store %[[EXT4]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a >= b;
+
+    // CHECK: %[[LOAD10:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[LOAD11:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+    // CHECK: %[[CMP5:.+]] = arith.cmpi uge, %[[LOAD10]], %[[LOAD11]] : i32
+    // CHECK: %[[EXT5:.+]] = arith.extui %[[CMP5]] : i1 to i8
+    // CHECK: memref.store %[[EXT5]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    return x;
+    // CHECK: return
+}
+
+bool testFloatingPointCmpOps(float a, float b) {
+    // CHECK: %[[ALLOC1:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xf32>
+    // CHECK: %[[ALLOC2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xf32>
+    // CHECK: %[[ALLOC3:.+]] = memref.alloca() {alignment = 1 : i64} : memref<1xi8>
+    // CHECK: %[[ALLOC4:.+]] = memref.alloca() {alignment = 1 : i64} : memref<1xi8>
+    // CHECK: memref.store %arg0, %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: memref.store %arg1, %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+
+    bool x = a == b;
+
+    // CHECK: %[[LOAD0:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[LOAD1:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[CMP0:.+]] = arith.cmpf oeq, %[[LOAD0]], %[[LOAD1]] : f32
+    // CHECK: %[[EXT0:.+]] = arith.extui %[[CMP0]] : i1 to i8
+    // CHECK: memref.store %[[EXT0]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a != b;
+
+    // CHECK: %[[LOAD2:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[LOAD3:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[CMP1:.+]] = arith.cmpf une, %[[LOAD2]], %[[LOAD3]] : f32
+    // CHECK: %[[EXT1:.+]] = arith.extui %[[CMP1]] : i1 to i8
+    // CHECK: memref.store %[[EXT1]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a < b;
+
+    // CHECK: %[[LOAD4:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[LOAD5:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[CMP2:.+]] = arith.cmpf olt, %[[LOAD4]], %[[LOAD5]] : f32
+    // CHECK: %[[EXT2:.+]] = arith.extui %[[CMP2]] : i1 to i8
+    // CHECK: memref.store %[[EXT2]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a <= b;
+
+    // CHECK: %[[LOAD6:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[LOAD7:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[CMP3:.+]] = arith.cmpf ole, %[[LOAD6]], %[[LOAD7]] : f32
+    // CHECK: %[[EXT3:.+]] = arith.extui %[[CMP3]] : i1 to i8
+    // CHECK: memref.store %[[EXT3]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a > b;
+
+    // CHECK: %[[LOAD8:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[LOAD9:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[CMP4:.+]] = arith.cmpf ogt, %[[LOAD8]], %[[LOAD9]] : f32
+    // CHECK: %[[EXT4:.+]] = arith.extui %[[CMP4]] : i1 to i8
+    // CHECK: memref.store %[[EXT4]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    x = a >= b;
+
+    // CHECK: %[[LOAD10:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[LOAD11:.+]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xf32>
+    // CHECK: %[[CMP5:.+]] = arith.cmpf oge, %[[LOAD10]], %[[LOAD11]] : f32
+    // CHECK: %[[EXT5:.+]] = arith.extui %[[CMP5]] : i1 to i8
+    // CHECK: memref.store %[[EXT5]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi8>
+
+    return x;
+    // CHECK: return
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cos.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cos.cir
new file mode 100644
index 0000000000000..93b102b7a854a
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/cos.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %1 = cir.const #cir.fp<1.0> : !cir.float
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    %3 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.f80>
+    %4 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.double>
+    %5 = cir.cos %1 : !cir.float
+    %6 = cir.cos %2 : !cir.double
+    %7 = cir.cos %3 : !cir.long_double<!cir.f80>
+    %8 = cir.cos %4 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 1.000000e+00 : f80
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.cos %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.cos %[[C1]] : f64
+// CHECK-NEXT:     %{{.+}} = math.cos %[[C2]] : f80
+// CHECK-NEXT:     %{{.+}} = math.cos %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/doWhile.c b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/doWhile.c
new file mode 100644
index 0000000000000..36b744acfe46f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/doWhile.c
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+int sum() {
+  int s = 0;
+  int i = 0;
+  do {
+    s += i;
+    ++i;
+  } while (i <= 10);
+  return s;
+}
+
+void nestedDoWhile() {
+  int a = 0;
+  do {
+    a++;
+    int b = 0;
+    while(b < 2) {
+      b++;
+    }
+  }while(a < 2);
+}
+
+// CHECK: func.func @sum() -> i32 {
+// CHECK: %[[ALLOC:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK: %[[ALLOC0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK: %[[ALLOC1:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK: %[[C0_I32:.+]] = arith.constant 0 : i32
+// CHECK: memref.store %[[C0_I32]], %[[ALLOC0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[C0_I32_2:.+]] = arith.constant 0 : i32
+// CHECK: memref.store %[[C0_I32_2]], %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: memref.alloca_scope {
+// CHECK:   scf.while : () -> () {
+// CHECK:     memref.alloca_scope {
+// CHECK:       %[[VAR1:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:       %[[VAR2:.+]] = memref.load %[[ALLOC0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:       %[[ADD:.+]] = arith.addi %[[VAR2]], %[[VAR1]] : i32
+// CHECK:       memref.store %[[ADD]], %[[ALLOC0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:       %[[VAR3:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK:       %[[ADD1:.+]] = arith.addi %[[VAR3]], %[[C1_I32]] : i32
+// CHECK:       memref.store %[[ADD1]], %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:     }
+// CHECK:     %[[VAR4:.+]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:     %[[C10_I32:.+]] = arith.constant 10 : i32
+// CHECK:     %[[CMP:.+]] = arith.cmpi sle, %[[VAR4]], %[[C10_I32]] : i32
+// CHECK:     scf.condition(%[[CMP]])
+// CHECK:   } do {
+// CHECK:     scf.yield
+// CHECK:   }
+// CHECK: }
+// CHECK: %[[LOAD:.+]] = memref.load %[[ALLOC0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: memref.store %[[LOAD]], %[[ALLOC]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[RET:.+]] = memref.load %[[ALLOC]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: return %[[RET]] : i32
+
+// CHECK: func.func @nestedDoWhile() {
+// CHECK:     %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK:     %[[C0_I32:.+]] = arith.constant 0 : i32
+// CHECK:     memref.store %[[C0_I32]], %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:     memref.alloca_scope  {
+// CHECK:       scf.while : () -> () {
+// CHECK:         memref.alloca_scope {
+// CHECK:           %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK:           %[[ZERO:.+]] = memref.load %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:           %[[C1_I32:.+]] = arith.constant 1 : i32
+// CHECK:           %[[ONE:.+]] = arith.addi %[[ZERO]], %[[C1_I32]] : i32
+// CHECK:           memref.store %[[ONE]], %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:           %[[C0_I32_1:.+]] = arith.constant 0 : i32
+// CHECK:           memref.store %[[C0_I32_1]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:           memref.alloca_scope  {
+// CHECK:             scf.while : () -> () {
+// CHECK:               %[[EIGHT:.+]] = memref.load %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:               %[[C2_I32_3:.+]] = arith.constant 2 : i32
+// CHECK:               %[[NINE:.+]] = arith.cmpi slt, %[[EIGHT]], %[[C2_I32_3]] : i32
+// CHECK:               scf.condition(%[[NINE]])
+// CHECK:             } do {
+// CHECK:               %[[EIGHT]] = memref.load %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:               %[[C1_I32_3:.+]] = arith.constant 1 : i32
+// CHECK:               %[[NINE]] = arith.addi %[[EIGHT]], %[[C1_I32_3]] : i32
+// CHECK:               memref.store %[[NINE]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:               scf.yield
+// CHECK:             }
+// CHECK:           }
+// CHECK:         }
+// CHECK:         %[[TWO:.+]] = memref.load %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK:         %[[C2_I32:.+]] = arith.constant 2 : i32
+// CHECK:         %[[THREE:.+]] = arith.cmpi slt, %[[TWO]], %[[C2_I32]] : i32
+// CHECK:         scf.condition(%[[THREE]])
+// CHECK:       } do {
+// CHECK:         scf.yield
+// CHECK:       }
+// CHECK:     }
+// CHECK:     return
+// CHECK:   }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/dot.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/dot.cir
new file mode 100644
index 0000000000000..0dbeec2845145
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/dot.cir
@@ -0,0 +1,29 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @dot(%arg0: !cir.ptr<f64>) -> !s32i {
+    %0 = cir.alloca !cir.ptr<f64>, !cir.ptr<!cir.ptr<f64>>, ["x", init] {alignment = 8 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.ptr<f64>, !cir.ptr<!cir.ptr<f64>>, ["y", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.ptr<f64>, !cir.ptr<!cir.ptr<f64>>
+    %3 = cir.load %0 : !cir.ptr<!cir.ptr<f64>>, !cir.ptr<f64>
+    cir.store %3, %2 : !cir.ptr<f64>, !cir.ptr<!cir.ptr<f64>>
+    %4 = cir.const #cir.int<0> : !s32i
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %5 : !s32i
+  }
+}
+
+//      CHECK: module {
+// CHECK-NEXT:   func.func @dot(%arg0: memref<?xf64>) -> i32 {
+// CHECK-NEXT:     %alloca = memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xf64>>
+// CHECK-NEXT:     %alloca_0 = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK-NEXT:     %alloca_1 = memref.alloca() {alignment = 8 : i64} : memref<1xmemref<?xf64>>
+// CHECK:          memref.store %arg0, %alloca[{{%c0(_[0-9]+)?}}] : memref<1xmemref<?xf64>>
+// CHECK:          %0 = memref.load %alloca[{{%c0(_[0-9]+)?}}] : memref<1xmemref<?xf64>>
+// CHECK:          memref.store %0, %alloca_1[{{%c0(_[0-9]+)?}}] : memref<1xmemref<?xf64>>
+// CHECK:          %c0_i32 = arith.constant 0 : i32
+// CHECK:          %1 = memref.load %alloca_0[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK-NEXT:     return %1 : i32
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/exp.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/exp.cir
new file mode 100644
index 0000000000000..13294b7532dcd
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/exp.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.0> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.0> : !cir.double
+    %3 = cir.const #cir.fp<4.00> : !cir.long_double<!cir.double>
+    %4 = cir.exp %0 : !cir.float
+    %5 = cir.exp %1 : !cir.long_double<!cir.f80>
+    %6 = cir.exp2 %2 : !cir.double
+    %7 = cir.exp2 %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.000000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.exp %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.exp %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.exp2 %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.exp2 %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/fabs.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/fabs.cir
new file mode 100644
index 0000000000000..9a6c33fd8ab65
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/fabs.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<-1.0> : !cir.float
+    %1 = cir.const #cir.fp<-3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<-2.0> : !cir.double
+    %3 = cir.const #cir.fp<-4.00> : !cir.long_double<!cir.double>
+    %4 = cir.fabs %0 : !cir.float
+    %5 = cir.fabs %1 : !cir.long_double<!cir.f80>
+    %6 = cir.fabs %2 : !cir.double
+    %7 = cir.fabs %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant -1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant -3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant -2.000000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant -4.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.absf %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.absf %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.absf %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.absf %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/float.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/float.cir
new file mode 100644
index 0000000000000..002f1c5053ce2
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/float.cir
@@ -0,0 +1,23 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.0> : !cir.float
+    %1 = cir.const #cir.fp<1.0> : !cir.double
+    %2 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.f80>
+    %3 = cir.const #cir.fp<1.0> : !cir.f128
+    %4 = cir.const #cir.fp<1.0> : !cir.f16
+    %5 = cir.const #cir.fp<1.0> : !cir.bf16
+    cir.return
+  }
+
+  // CHECK-LABEL: @foo
+  //      CHECK:   %{{.+}} = arith.constant 1.000000e+00 : f32
+  // CHECK-NEXT:   %{{.+}} = arith.constant 1.000000e+00 : f64
+  // CHECK-NEXT:   %{{.+}} = arith.constant 1.000000e+00 : f80
+  // CHECK-NEXT:   %{{.+}} = arith.constant 1.000000e+00 : f128
+  // CHECK-NEXT:   %{{.+}} = arith.constant 1.000000e+00 : f16
+  // CHECK-NEXT:   %{{.+}} = arith.constant 1.000000e+00 : bf16
+  //      CHECK: }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/floor.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/floor.cir
new file mode 100644
index 0000000000000..e4718468966c8
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/floor.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.51> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.73> : !cir.double
+    %3 = cir.const #cir.fp<4.67> : !cir.long_double<!cir.double>
+    %4 = cir.floor %0 : !cir.float
+    %5 = cir.floor %1 : !cir.long_double<!cir.f80>
+    %6 = cir.floor %2 : !cir.double
+    %7 = cir.floor %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.510000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.730000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.670000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.floor %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.floor %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.floor %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.floor %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject-1.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject-1.cpp
new file mode 100644
index 0000000000000..a9640ae093eeb
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject-1.cpp
@@ -0,0 +1,9 @@
+// RUN: not %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o - 2>&1 | FileCheck %s
+// XFAIL: *
+
+void f() {}
+
+void reject() {
+  for (int i = 0; i < 100; i++, f());
+  // CHECK: failed to legalize operation 'cir.scope'
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject-2.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject-2.cpp
new file mode 100644
index 0000000000000..1881ee497cbf5
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject-2.cpp
@@ -0,0 +1,7 @@
+// RUN: not %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o - 2>&1 | FileCheck %s
+// XFAIL: *
+
+void reject() {
+  for (int i = 0; i < 100; i++, i++);
+  // CHECK: failed to legalize operation 'cir.for'
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject.cpp
new file mode 100644
index 0000000000000..a0c80d9f8a163
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for-reject.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void f() {}
+
+void reject_test1() {
+  for (int i = 0; i < 100; i++, f());
+  // CHECK: %[[ALLOCA:.+]] = memref.alloca
+  // CHECK: %[[ZERO:.+]] = arith.constant 0
+  // CHECK: memref.store %[[ZERO]], %[[ALLOCA]]
+  // CHECK: %[[HUNDRED:.+]] = arith.constant 100
+  // CHECK: scf.while : () -> () {
+  // CHECK:   %[[TMP:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[TMP1:.+]] = arith.cmpi slt, %0, %[[HUNDRED]]
+  // CHECK:   scf.condition(%[[TMP1]])
+  // CHECK: } do {
+  // CHECK:   %[[TMP2:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[ONE:.+]] = arith.constant 1
+  // CHECK:   %[[TMP3:.+]] = arith.addi %[[TMP2]], %[[ONE]]
+  // CHECK:   memref.store %[[TMP3]], %[[ALLOCA]]
+  // CHECK:   func.call @_Z1fv()
+  // CHECK:   scf.yield
+  // CHECK: }
+}
+
+void reject_test2() {
+  for (int i = 0; i < 100; i++, i++);
+  // CHECK: %[[ALLOCA:.+]] = memref.alloca
+  // CHECK: %[[ZERO:.+]] = arith.constant 0
+  // CHECK: memref.store %[[ZERO]], %[[ALLOCA]]
+  // CHECK: %[[HUNDRED:.+]] = arith.constant 100
+  // CHECK: scf.while : () -> () {
+  // CHECK:   %[[TMP:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[TMP2:.+]] = arith.cmpi slt, %[[TMP]], %[[HUNDRED]]
+  // CHECK:   scf.condition(%[[TMP2]])
+  // CHECK: } do {
+  // CHECK:   %[[TMP3:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[ONE:.+]] = arith.constant 1
+  // CHECK:   %[[ADD:.+]] = arith.addi %[[TMP3]], %[[ONE]]
+  // CHECK:   memref.store %[[ADD]], %[[ALLOCA]]
+  // CHECK:   %[[LOAD:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[ONE2:.+]] = arith.constant 1
+  // CHECK:   %[[ADD2:.+]] = arith.addi %[[LOAD]], %[[ONE2]]
+  // CHECK:   memref.store %[[ADD2]], %[[ALLOCA]]
+  // CHECK:   scf.yield
+  // CHECK: }
+}
+
+void reject_test3() {
+  int i;
+  for (i = 0; i < 100; i++);
+  i += 10;
+  // CHECK: %[[ALLOCA:.+]] = memref.alloca()
+  // CHECK: memref.alloca_scope  {
+  // CHECK: %[[ZERO:.+]] = arith.constant 0
+  // CHECK: memref.store %[[ZERO]], %[[ALLOCA]]
+  // CHECK: %[[HUNDRED:.+]] = arith.constant 100
+  // CHECK: scf.while : () -> () {
+  // CHECK:   %[[TMP:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[TMP2:.+]] = arith.cmpi slt, %[[TMP]], %[[HUNDRED]]
+  // CHECK:   scf.condition(%[[TMP2]])
+  // CHECK: } do {
+  // CHECK:   %[[TMP3:.+]] = memref.load %[[ALLOCA]]
+  // CHECK:   %[[ONE:.+]] = arith.constant 1
+  // CHECK:   %[[ADD:.+]] = arith.addi %[[TMP3]], %[[ONE]]
+  // CHECK:   memref.store %[[ADD]], %[[ALLOCA]]
+  // CHECK:   scf.yield
+  // CHECK: }
+  // CHECK: }
+  // CHECK: %[[TEN:.+]] = arith.constant 10
+  // CHECK: %[[TMP4:.+]] = memref.load %[[ALLOCA]]
+  // CHECK: %[[TMP5:.+]] = arith.addi %[[TMP4]], %[[TEN]]
+  // CHECK: memref.store %[[TMP5]], %[[ALLOCA]]
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for.cpp
new file mode 100644
index 0000000000000..987978ccad6d3
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for.cpp
@@ -0,0 +1,111 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+int a[101], b[101];
+
+void constantLoopBound() {
+  for (int i = 0; i < 100; ++i)
+    a[i] = 3;
+}
+// CHECK-LABEL: func.func @_Z17constantLoopBoundv() {
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK-NOT: memref.store %[[C0]], {{.*}}[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[C100:.*]] = arith.constant 100 : i32
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 {
+// CHECK:   %[[C3:.*]] = arith.constant 3 : i32
+// CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
+// CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
+// CHECK: }
+// CHECK: }
+
+void constantLoopBound_LE() {
+  for (int i = 0; i <= 100; ++i)
+    a[i] = 3;
+}
+// CHECK-LABEL: func.func @_Z20constantLoopBound_LEv() {
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK-NOT: memref.store %[[C0]], {{.*}}[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[C100:.*]] = arith.constant 100 : i32
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: %[[C101:.*]] = arith.addi %c100_i32, %c1_i32 : i32
+// CHECK: %[[C1_STEP:.*]] = arith.constant 1 : i32
+// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C101]] step %[[C1_STEP]] : i32 {
+// CHECK:   %[[C3:.*]] = arith.constant 3 : i32
+// CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
+// CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
+// CHECK: }
+// CHECK: }
+
+void variableLoopBound(int l, int u) {
+  for (int i = l; i < u; ++i)
+    a[i] = 3;
+}
+// CHECK-LABEL: func.func @_Z17variableLoopBoundii
+// CHECK: memref.store %arg0, %alloca[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: memref.store %arg1, %alloca_0[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK: %[[LOWER:.*]] = memref.load %alloca[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK-NOT: memref.store %[[LOWER]], {{.*}}[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[UPPER:.*]] = memref.load %alloca_0[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C1]] : i32 {
+// CHECK:   %[[C3:.*]] = arith.constant 3 : i32
+// CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
+// CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
+// CHECK: }
+// CHECK: }
+
+void variableLoopBound_LE(int l, int u) {
+  for (int i = l; i <= u; i+=4)
+    a[i] = 3;
+}
+// CHECK-LABEL: func.func @_Z20variableLoopBound_LEii
+// CHECK: memref.store %arg0, %alloca[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: memref.store %arg1, %alloca_0[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK: %[[LOWER:.*]] = memref.load %alloca[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK-NOT: memref.store %[[LOWER]], {{.*}}[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[UPPER_DEC_1:.*]] = memref.load %alloca_0[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: %[[UPPER:.*]] = arith.addi %[[UPPER_DEC_1]], %[[C1]] : i32
+// CHECK: %[[C4:.*]] = arith.constant 4 : i32
+// CHECK: scf.for %[[I:.*]] = %[[LOWER]] to %[[UPPER]] step %[[C4]] : i32 {
+// CHECK:   %[[C3:.*]] = arith.constant 3 : i32
+// CHECK:   %[[BASE:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[INDEX:.*]] = arith.index_cast %[[I]] : i32 to index
+// CHECK:   memref.store %[[C3]], %[[BASE]][%[[INDEX]]] : memref<101xi32>
+// CHECK: }
+// CHECK: }
+
+void incArray() {
+  for (int i = 0; i < 100; ++i)
+    a[i] += b[i];
+}
+// CHECK-LABEL: func.func @_Z8incArrayv() {
+// CHECK: memref.alloca_scope  {
+// CHECK-NOT: {{.*}} = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// CHECK: %[[C0:.*]] = arith.constant 0 : i32
+// CHECK-NOT: memref.store %[[C0]], {{.*}}[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+// CHECK: %[[C100:.*]] = arith.constant 100 : i32
+// CHECK: %[[C1:.*]] = arith.constant 1 : i32
+// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C100]] step %[[C1]] : i32 {
+// CHECK:   %[[B:.*]] = memref.get_global @b : memref<101xi32>
+// CHECK:   %[[INDEX_2:.*]] = arith.index_cast %[[I]] : i32 to index
+// CHECK:   %[[B_VALUE:.*]] = memref.load %[[B]][%[[INDEX_2]]] : memref<101xi32>
+// CHECK:   %[[A:.*]] = memref.get_global @a : memref<101xi32>
+// CHECK:   %[[INDEX_1:.*]] = arith.index_cast %[[I]] : i32 to index
+// CHECK:   %[[A_VALUE:.*]] = memref.load %[[A]][%[[INDEX_1]]] : memref<101xi32>
+// CHECK:   %[[SUM:.*]] = arith.addi %[[A_VALUE]], %[[B_VALUE]] : i32
+// CHECK:   memref.store %[[SUM]], %[[A]][%[[INDEX_1]]] : memref<101xi32>
+// CHECK: }
+// CHECK: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for_with_continue.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for_with_continue.cpp
new file mode 100644
index 0000000000000..7fa4cf0d738f1
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/for_with_continue.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void for_continue() {
+  for (int i = 0; i < 100; i++)
+    continue;
+
+  // CHECK: scf.while : () -> () {
+  // CHECK:   %[[IV:.+]] = memref.load %alloca[{{%c0(_[0-9]+)?}}]
+  // CHECK:   %[[CMP:.+]] = arith.cmpi slt, %[[IV]], %c100_i32
+  // CHECK:   scf.condition(%[[CMP]])
+  // CHECK: } do {
+  // CHECK:   %[[IV2:.+]] = memref.load %alloca[{{%c0(_[0-9]+)?}}]
+  // CHECK:   %[[ONE:.+]] = arith.constant 1
+  // CHECK:   %[[CMP2:.+]] = arith.addi %[[IV2]], %[[ONE]]
+  // CHECK:   memref.store %[[CMP2]], %alloca[{{%c0(_[0-9]+)?}}]
+  // CHECK:   scf.yield
+  // CHECK: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/function-attributes.c b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/function-attributes.c
new file mode 100644
index 0000000000000..e8a196e650059
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/function-attributes.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir 
+// RUN: FileCheck --input-file=%t.mlir %s
+
+// CHECK: func.func private @declaration(i32) -> i32
+
+int declaration(int x);
+int declaration_test() {
+  return declaration(15);
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/global.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/global.cir
new file mode 100644
index 0000000000000..e9ba9e42b2bc6
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/global.cir
@@ -0,0 +1,55 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.global external @i = #cir.int<2> : !u32i
+  cir.global external @f = #cir.fp<3.000000e+00> : !cir.float
+  cir.global external @b = #cir.bool<true> : !cir.bool
+  cir.global "private" external @a : !cir.array<!u32i x 100>
+  cir.global external @aa = #cir.zero : !cir.array<!cir.array<!u32i x 256> x 256>
+
+  cir.func @get_global_int_value() -> !u32i {
+    %0 = cir.get_global @i : !cir.ptr<!u32i>
+    %1 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %1 : !u32i
+  }
+  cir.func @get_global_float_value() -> !cir.float {
+    %0 = cir.get_global @f : !cir.ptr<!cir.float>
+    %1 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+    cir.return %1 : !cir.float
+  }
+  cir.func @get_global_bool_value() -> !cir.bool {
+    %0 = cir.get_global @b : !cir.ptr<!cir.bool>
+    %1 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  cir.func @get_global_array_pointer() -> !cir.ptr<!cir.array<!u32i x 100>> {
+    %0 = cir.get_global @a : !cir.ptr<!cir.array<!u32i x 100>>
+    cir.return %0 : !cir.ptr<!cir.array<!u32i x 100>>
+  }
+  cir.func @get_global_multi_array_pointer() -> !cir.ptr<!cir.array<!cir.array<!u32i x 256> x 256>> {
+    %0 = cir.get_global @aa : !cir.ptr<!cir.array<!cir.array<!u32i x 256> x 256>>
+    cir.return %0 : !cir.ptr<!cir.array<!cir.array<!u32i x 256> x 256>>
+  }
+}
+
+// MLIR: memref.global "public" @i : memref<1xi32> = dense<2>
+// MLIR: memref.global "public" @f : memref<1xf32> = dense<3.000000e+00>
+// MLIR: memref.global "public" @b : memref<1xi8> = dense<1>
+// MLIR: memref.global "private" @a : memref<100xi32>
+// MLIR: memref.global "public" @aa : memref<256x256xi32> = dense<0>
+// MLIR: memref.get_global @i : memref<1xi32>
+// MLIR: memref.get_global @f : memref<1xf32>
+// MLIR: memref.get_global @b : memref<1xi8>
+// MLIR: memref.get_global @a : memref<100xi32>
+// MLIR: memref.get_global @aa : memref<256x256xi32>
+
+// LLVM: @i = global [1 x i32] [i32 2]
+// LLVM: @f = global [1 x float] [float 3.000000e+00]
+// LLVM: @b = global [1 x i8] c"\01"
+// LLVM: @a = private global [100 x i32] undef
+// LLVM: @aa = global [256 x [256 x i32]] zeroinitializer
+// LLVM: load i32, ptr @i
+// LLVM: load float, ptr @f
+// LLVM: load i8, ptr @b
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/global.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/global.cpp
new file mode 100644
index 0000000000000..d64488a11d2ec
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/global.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+float f[32000];
+// CHECK: memref.global "public" @f : memref<32000xf32> = dense<0.000000e+00>
+double d;
+// CHECK: memref.global "public" @d : memref<1xf64> = dense<0.000000e+00>
+float f_init[] = {1.0, 2.0};
+// CHECK: memref.global "public" @f_init : memref<2xf32> = dense<[1.000000e+00, 2.000000e+00]>
+int i_init[2] = {0, 1};
+// CHECK: memref.global "public" @i_init : memref<2xi32> = dense<[0, 1]>
+char string[] = "whatnow";
+// CHECK: memref.global "public" @string : memref<8xi8> = dense<[119, 104, 97, 116, 110, 111, 119, 0]>
+int excess_sint[4] = {1, 2};
+// CHECK: memref.global "public" @excess_sint : memref<4xi32> = dense<[1, 2, 0, 0]>
+int sint[] = {123, 456, 789};
+// CHECK: memref.global "public" @sint : memref<3xi32> = dense<[123, 456, 789]>
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/goto.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/goto.cir
new file mode 100644
index 0000000000000..6c1d5c66fffa1
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/goto.cir
@@ -0,0 +1,35 @@
+// RUN: cir-opt %s -canonicalize -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -canonicalize -cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["b", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    cir.br ^bb2
+  ^bb1:  // no predecessors
+    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %3 = cir.const #cir.int<1> : !u32i
+    %4 = cir.binop(add, %2, %3) : !u32i
+    cir.store %4, %0 : !u32i, !cir.ptr<!u32i>
+    cir.br ^bb2
+  ^bb2:  // 2 preds: ^bb0, ^bb1
+    %5 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %6 = cir.const #cir.int<2> : !u32i
+    %7 = cir.binop(add, %5, %6) : !u32i
+    cir.store %7, %0 : !u32i, !cir.ptr<!u32i>
+    cir.return
+  }
+}
+
+//      MLIR: module {
+// MLIR-NEXT: func @foo
+//      MLIR: cf.br ^bb1
+//      MLIR: ^bb1:
+//      MLIR: return
+
+//      LLVM: br label %[[Value:[0-9]+]]
+// LLVM-EMPTY:
+// LLVM-NEXT: [[Value]]:              ; preds =
+//      LLVM: ret void
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/if.c b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/if.c
new file mode 100644
index 0000000000000..30b78bb8835d4
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/if.c
@@ -0,0 +1,117 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void foo() {
+  int a = 2;
+  int b = 0;
+  if (a > 0) {
+    b++;
+  } else {
+    b--;
+  }
+}
+
+//CHECK: func.func @foo() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:   memref.store %[[C2_I32]], %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:   memref.alloca_scope  {
+//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:     %[[C0_I32_1:.+]] = arith.constant 0 : i32
+//CHECK:     %[[ONE:.+]] = arith.cmpi sgt, %[[ZERO]], %[[C0_I32_1]] : i32
+//CHECK:     scf.if %[[ONE]] {
+//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:       %[[SEVEN:.+]] = arith.addi %[[SIX]], %[[C1_I32]] : i32
+//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:     } else {
+//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:       %[[C1_I32:.+]] = arith.constant -1 : i32
+//CHECK:       %[[SEVEN:.+]] = arith.addi %[[SIX]], %[[C1_I32]] : i32
+//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:     }
+//CHECK:   }
+//CHECK:   return
+//CHECK: }
+
+void foo2() {
+  int a = 2;
+  int b = 0;
+  if (a < 3) {
+    b++;
+  }
+}
+
+//CHECK: func.func @foo2() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:   memref.store %[[C2_I32]], %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:   memref.alloca_scope  {
+//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:     %[[C3_I32:.+]] = arith.constant 3 : i32
+//CHECK:     %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C3_I32]] : i32
+//CHECK:     scf.if %[[ONE]] {
+//CHECK:       %[[SIX:.+]] = memref.load %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:       %[[SEVEN:.+]] = arith.addi %[[SIX]], %[[C1_I32]] : i32
+//CHECK:       memref.store %[[SEVEN]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:     }
+//CHECK:   }
+//CHECK:   return
+//CHECK: }
+
+void foo3() {
+  int a = 2;
+  int b = 0;
+  if (a < 3) {
+    int c = 1;
+    if (c > 2) {
+      b++;
+    } else {
+      b--;
+    }
+  }
+}
+
+
+//CHECK: func.func @foo3() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:   %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:   %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:   memref.store %[[C2_I32]], %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   memref.store %[[C0_I32]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:   memref.alloca_scope  {
+//CHECK:     %[[ZERO:.+]] = memref.load %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:     %[[C3_I32:.+]] = arith.constant 3 : i32
+//CHECK:     %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C3_I32]] : i32
+//CHECK:     scf.if %[[ONE]] {
+//CHECK:       %[[alloca_2:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:       %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:       memref.store %[[C1_I32]], %[[alloca_2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:       memref.alloca_scope  {
+//CHECK:         %[[SIX:.+]] = memref.load %[[alloca_2]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:         %[[C2_I32_3:.+]] = arith.constant 2 : i32
+//CHECK:         %[[SEVEN:.+]] = arith.cmpi sgt, %[[SIX]], %[[C2_I32_3]] : i32
+//CHECK:         scf.if %[[SEVEN]] {
+//CHECK:           %[[TWELVE:.+]] = memref.load %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:           %[[C1_I32_5:.+]] = arith.constant 1 : i32
+//CHECK:           %[[THIRTEEN:.+]] = arith.addi %[[TWELVE]], %[[C1_I32_5]] : i32
+//CHECK:           memref.store %[[THIRTEEN]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:         } else {
+//CHECK:           %[[TWELVE:.+]] = memref.load %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:           %[[C1_I32_5:.+]] = arith.constant -1 : i32
+//CHECK:           %[[THIRTEEN:.+]] = arith.addi %[[TWELVE]], %[[C1_I32_5]] : i32
+//CHECK:           memref.store %[[THIRTEEN]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:         }
+//CHECK:       }
+//CHECK:     }
+//CHECK:   }
+//CHECK:   return
+//CHECK: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/log.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/log.cir
new file mode 100644
index 0000000000000..e9af7c88ca8a2
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/log.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.0> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.0> : !cir.double
+    %3 = cir.const #cir.fp<4.0> : !cir.long_double<!cir.double>
+    %4 = cir.log %0 : !cir.float
+    %5 = cir.log %1 : !cir.long_double<!cir.f80>
+    %6 = cir.log2 %2 : !cir.double
+    %7 = cir.log10 %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.000000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.log %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.log %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.log2 %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.log10 %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/memref.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/memref.cir
new file mode 100644
index 0000000000000..f2ab7f328bd0a
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/memref.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @foo() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %2 : !u32i
+  }
+}
+
+//      MLIR: module {
+// MLIR-NEXT:   func @foo() -> i32 {
+// MLIR-NEXT:     [[alloca:%[a-z0-9]+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// MLIR-NEXT:     %c1_i32 = arith.constant 1 : i32
+// MLIR-NEXT:     [[const0:%[a-z0-9_]+]] = arith.constant 0 : index
+// MLIR-NEXT:     memref.store %c1_i32, [[alloca]][[[const0]]] : memref<1xi32>
+// MLIR-NEXT:     [[const0_1:%[a-z0-9_]+]] = arith.constant 0 : index
+// MLIR-NEXT:     [[load:%[a-z0-9]+]] = memref.load [[alloca]][[[const0_1]]] : memref<1xi32>
+// MLIR-NEXT:     return [[load]] : i32
+// MLIR-NEXT:   }
+// MLIR-NEXT: }
+
+//      LLVM: define i32 @foo()
+// LLVM-NEXT:   %1 = alloca i32, i64
+// LLVM-NEXT:   %2 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } poison, ptr %1, 0
+// LLVM-NEXT:   %3 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %2, ptr %1, 1
+// LLVM-NEXT:   %4 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %3, i64 0, 2
+// LLVM-NEXT:   %5 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %4, i64 1, 3, 0
+// LLVM-NEXT:   %6 = insertvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %5, i64 1, 4, 0
+// LLVM-NEXT:   %7 = extractvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %6, 1
+// LLVM-NEXT:   %8 = getelementptr inbounds nuw i32, ptr %7, i64 0
+// LLVM-NEXT:   store i32 1, ptr %8, align 4
+// LLVM-NEXT:   %9 = extractvalue { ptr, ptr, i64, [1 x i64], [1 x i64] } %6, 1
+// LLVM-NEXT:   %10 = getelementptr inbounds nuw i32, ptr %9, i64 0
+// LLVM-NEXT:   %11 = load i32, ptr %10, align 4
+// LLVM-NEXT:   ret i32 %11
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptr-arg.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptr-arg.cir
new file mode 100644
index 0000000000000..5e8da3ad9ce70
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptr-arg.cir
@@ -0,0 +1,46 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @ptrLoad(%arg0: !cir.ptr<!s32i>) -> !s32i {
+    %0 = cir.load align(4) %arg0 : !cir.ptr<!s32i>, !s32i
+    cir.return %0 : !s32i
+// MLIR-LABEL: func.func @ptrLoad
+// MLIR:       %[[VALUE:.*]] = memref.load %arg0[{{%c0(_[0-9]+)?}}] : memref<?xi32>
+// MLIR:       return %[[VALUE]] : i32
+  }
+  cir.func @ptrStore(%arg0: !cir.ptr<!s32i>, %arg1: !s32i) -> !s32i {
+    cir.store align(4) %arg1, %arg0 : !s32i, !cir.ptr<!s32i>
+    %0 = cir.load align(4) %arg0 : !cir.ptr<!s32i>, !s32i
+    cir.return %0 : !s32i
+// MLIR-LABEL: func.func @ptrStore
+// MLIR:       memref.store %arg1, %arg0[{{%c0(_[0-9]+)?}}] : memref<?xi32>
+// MLIR:       %[[VALUE:.*]] = memref.load %arg0[{{%c0(_[0-9]+)?}}] : memref<?xi32>
+// MLIR:       return %[[VALUE]] : i32
+  }
+  cir.func @arrayLoad(%arg0: !cir.ptr<!s32i>, %arg1: !u32i) -> !s32i {
+    %0 = cir.ptr_stride %arg0, %arg1 : (!cir.ptr<!s32i>, !u32i) -> !cir.ptr<!s32i>
+    %1 = cir.load align(4) %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %1 : !s32i
+// MLIR-LABEL: func.func @arrayLoad
+// MLIR:       %[[IDX:.*]] = arith.index_cast %arg1 : i32 to index
+// MLIR:       %[[VALUE:.*]] = memref.load %arg0[%[[IDX]]] : memref<?xi32>
+// MLIR:       return %[[VALUE]] : i32
+  }
+
+  cir.func @arrayStore(%arg0: !cir.ptr<!s32i>, %arg1: !s32i, %arg2: !u32i) -> !s32i {
+    %0 = cir.ptr_stride %arg0, %arg2 : (!cir.ptr<!s32i>, !u32i) -> !cir.ptr<!s32i>
+    cir.store align(4) %arg1, %0 : !s32i, !cir.ptr<!s32i>
+    %1 = cir.ptr_stride %arg0, %arg2 : (!cir.ptr<!s32i>, !u32i) -> !cir.ptr<!s32i>
+    %2 = cir.load align(4) %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %2 : !s32i
+// MLIR-LABEL: func.func @arrayStore
+// MLIR:       %[[IDX:.*]] = arith.index_cast %arg2 : i32 to index
+// MLIR:       memref.store %arg1, %arg0[%[[IDX]]] : memref<?xi32>
+// MLIR:       %[[IDX2:.*]] = arith.index_cast %arg2 : i32 to index
+// MLIR:       %[[VALUE:.*]] = memref.load %arg0[%[[IDX2]]] : memref<?xi32>
+// MLIR:       return %[[VALUE]] : i32
+  }
+}
+
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptrstride-ptr.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptrstride-ptr.cir
new file mode 100644
index 0000000000000..b3a5dc64709fd
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptrstride-ptr.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt %s --cir-to-mlir | FileCheck %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @raw_pointer(%p : !cir.ptr<!s32i>) -> !s32i {
+    // MLIR:     func.func @raw_pointer(%[[ARG0:.*]]: memref<?xi32>) -> i32 {
+    // MLIR:      %[[TWO:.*]] = arith.constant 2 : i32
+    // MLIR-NEXT: %[[I:.*]] = arith.index_cast %[[TWO]] : i32 to index
+    // MLIR-NEXT: %[[R:.*]] = memref.load %[[ARG0]][%[[I]]] : memref<?xi32>
+    // MLIR-NEXT: return %[[R]] : i32
+
+    %0 = cir.const #cir.int<2> : !s32i
+    %1 = cir.ptr_stride %p, %0 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %2 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %2 : !s32i
+  }
+
+  cir.func @raw_complex_pointer(%p : !cir.ptr<!cir.array<!s32i x 8>>) -> !s32i {
+    // MLIR: %[[C2_I32:.*]] = arith.constant 2 : i32
+    // MLIR-NEXT: %[[INNER_OFFSET:.*]] = ptr.type_offset i32 : index
+    // MLIR-NEXT: %[[C2_INDEX:.*]] = arith.index_cast %[[C2_I32]] : i32 to index
+    // MLIR-NEXT: %[[MUL1:.*]] = arith.muli %[[C2_INDEX]], %[[INNER_OFFSET]] : index
+    // MLIR-NEXT: %[[C8:.*]] = arith.constant 8 : index
+    // MLIR-NEXT: %[[MUL2:.*]] = arith.muli %[[MUL1]], %[[C8]] : index
+    // MLIR-NEXT: %[[CAST1:.*]] = memref.memory_space_cast %arg0 : memref<8xi32> to memref<8xi32, #ptr.generic_space>
+    // MLIR-NEXT: %[[META:.*]] = ptr.get_metadata %[[CAST1]] : memref<8xi32, #ptr.generic_space>
+    // MLIR-NEXT: %[[P1:.*]] = ptr.to_ptr %[[CAST1]] : memref<8xi32, #ptr.generic_space> -> <#ptr.generic_space>
+    // MLIR-NEXT: %[[P2:.*]] = ptr.ptr_add %[[P1]], %[[MUL2]] : !ptr.ptr<#ptr.generic_space>, index
+    // MLIR-NEXT: %[[PP:.*]] = ptr.from_ptr %[[P2]] metadata %[[META]] : <#ptr.generic_space> -> memref<8xi32, #ptr.generic_space>
+    // MLIR-NEXT: %[[CAST2:.*]] = memref.memory_space_cast %[[PP]] : memref<8xi32, #ptr.generic_space> to memref<8xi32>
+    // MLIR-NEXT: %[[I2:.*]] = arith.index_cast %[[C2_I32]] : i32 to index
+    // MLIR-NEXT: %[[R:.*]] = memref.load %[[CAST2]][%[[I2]]] : memref<8xi32>
+    // MLIR-NEXT: return %[[R]] : i32
+    %0 = cir.const #cir.int<2> : !s32i
+    %1 = cir.ptr_stride %p, %0 : (!cir.ptr<!cir.array<!s32i x 8>>, !s32i) -> !cir.ptr<!cir.array<!s32i x 8>>
+    %2 = cir.get_element %1[%0] : (!cir.ptr<!cir.array<!s32i x 8>>, !s32i) -> !cir.ptr<!s32i>
+    %3 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.return %3 : !s32i
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptrstride.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptrstride.cir
new file mode 100644
index 0000000000000..38d8b21785bbc
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/ptrstride.cir
@@ -0,0 +1,78 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.global "private" external @a : !cir.array<!s32i x 100>
+  cir.global "private" external @aa : !cir.array<!cir.array<!s32i x 100> x 100>
+
+  // int get_1d_array_value() { return a[1]; }
+  // MLIR-LABEL: func.func @get_1d_array_value() -> i32
+  // LLVM-LABEL: define i32 @get_1d_array_value()
+  cir.func @get_1d_array_value() -> !s32i {
+    // MLIR-NEXT: %[[BASE:.*]] = memref.get_global @a : memref<100xi32>
+    // MLIR-NEXT: %[[ONE:.*]] = arith.constant 1 : i32
+    // MLIR-NEXT: %[[INDEX:.*]] = arith.index_cast %[[ONE]] : i32 to index
+    // MLIR-NEXT: %[[VALUE:.*]] = memref.load %[[BASE]][%[[INDEX]]] : memref<100xi32>
+
+    // LLVM-NEXT: load i32, ptr getelementptr inbounds nuw (i8, ptr @a, i64 4)
+
+    %1 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.cast array_to_ptrdecay %1 : !cir.ptr<!cir.array<!s32i x 100>> -> !cir.ptr<!s32i>
+    %4 = cir.ptr_stride %3, %2 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %5 = cir.load %4 : !cir.ptr<!s32i>, !s32i
+    cir.return %5 : !s32i
+  }
+
+  // int get_2d_array_value() { return aa[1][2]; }
+  // MLIR-LABEL: func.func @get_2d_array_value() -> i32
+  // LLVM-LABEL: define i32 @get_2d_array_value()
+  cir.func @get_2d_array_value() -> !s32i {
+    // MLIR-NEXT: %[[BASE:.*]] = memref.get_global @aa : memref<100x100xi32>
+    // MLIR-NEXT: %[[ONE:.*]] = arith.constant 1 : i32
+    // MLIR-NEXT: %[[INDEX1:.*]] = arith.index_cast %[[ONE]] : i32 to index
+    // MLIR-NEXT: %[[TWO:.*]] = arith.constant 2 : i32
+    // MLIR-NEXT: %[[INDEX2:.*]] = arith.index_cast %[[TWO]] : i32 to index
+    // MLIR-NEXT: %[[VALUE:.*]] = memref.load %[[BASE]][%[[INDEX1]], %[[INDEX2]]] : memref<100x100xi32>
+
+    // LLVM-NEXT: load i32, ptr getelementptr inbounds nuw (i8, ptr @aa, i64 408)
+
+    %1 = cir.get_global @aa : !cir.ptr<!cir.array<!cir.array<!s32i x 100> x 100>>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.cast array_to_ptrdecay %1 : !cir.ptr<!cir.array<!cir.array<!s32i x 100> x 100>> -> !cir.ptr<!cir.array<!s32i x 100>>
+    %4 = cir.ptr_stride %3, %2 : (!cir.ptr<!cir.array<!s32i x 100>>, !s32i) -> !cir.ptr<!cir.array<!s32i x 100>>
+    %5 = cir.const #cir.int<2> : !s32i
+    %6 = cir.cast array_to_ptrdecay %4 : !cir.ptr<!cir.array<!s32i x 100>> -> !cir.ptr<!s32i>
+    %7 = cir.ptr_stride %6, %5 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %8 = cir.load %7 : !cir.ptr<!s32i>, !s32i
+    cir.return %8 : !s32i
+  }
+
+  // void inc_1d_array_value() { a[1] += 2; }
+  // MLIR-LABEL: func.func @inc_1d_array_value()
+  // LLVM-LABEL: define void @inc_1d_array_value()
+  cir.func @inc_1d_array_value() {
+    // MLIR-NEXT: %[[TWO:.*]] = arith.constant 2 : i32
+    // MLIR-NEXT: %[[BASE:.*]] = memref.get_global @a : memref<100xi32>
+    // MLIR-NEXT: %[[ONE:.*]] = arith.constant 1 : i32
+    // MLIR-NEXT: %[[INDEX:.*]] = arith.index_cast %[[ONE]] : i32 to index
+    // MLIR-NEXT: %[[VALUE:.*]] = memref.load %[[BASE]][%[[INDEX]]] : memref<100xi32>
+    // MLIR-NEXT: %[[VALUE_INC:.*]] = arith.addi %[[VALUE]], %[[TWO]] : i32
+    // MLIR-NEXT: memref.store %[[VALUE_INC]], %[[BASE]][%[[INDEX]]] : memref<100xi32>
+
+    // LLVM-NEXT: %[[VALUE:.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @a, i64 4)
+    // LLVM-NEXT: %[[VALUE_INC:.*]] = add i32 %[[VALUE]], 2
+    // LLVM-NEXT: store i32 %[[VALUE_INC]], ptr getelementptr inbounds nuw (i8, ptr @a, i64 4)
+
+    %0 = cir.const #cir.int<2> : !s32i
+    %1 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.cast array_to_ptrdecay %1 : !cir.ptr<!cir.array<!s32i x 100>> -> !cir.ptr<!s32i>
+    %4 = cir.ptr_stride %3, %2 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %5 = cir.load %4 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.binop(add, %5, %0) : !s32i
+    cir.store %6, %4 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/round.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/round.cir
new file mode 100644
index 0000000000000..117a93bcba9b3
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/round.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<1.31> : !cir.float
+    %1 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<2.73> : !cir.double
+    %3 = cir.const #cir.fp<4.67> : !cir.long_double<!cir.double>
+    %4 = cir.round %0 : !cir.float
+    %5 = cir.round %1 : !cir.long_double<!cir.f80>
+    %6 = cir.round %2 : !cir.double
+    %7 = cir.round %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.310000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 2.730000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.670000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.round %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.round %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.round %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.round %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/scope.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/scope.cir
new file mode 100644
index 0000000000000..fddf16b09aeef
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/scope.cir
@@ -0,0 +1,50 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @foo() {
+    cir.scope {
+      %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<4> : !u32i
+      cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    }
+    cir.return
+  }
+
+//      MLIR: func.func @foo()
+// MLIR-NEXT:   memref.alloca_scope
+// MLIR-NEXT:     %alloca = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// MLIR-NEXT:     %c4_i32 = arith.constant 4 : i32
+// MLIR-NEXT:     %c0 = arith.constant 0 : index
+// MLIR-NEXT:     memref.store %c4_i32, %alloca[%c0] : memref<1xi32>
+// MLIR-NEXT:   }
+// MLIR-NEXT:   return
+
+// LLVM:      define void @foo() {
+// LLVM-NEXT:   %1 = call ptr @llvm.stacksave.p0()
+// LLVM-NEXT:   br label %2
+// LLVM-EMPTY:
+// LLVM-NEXT: 2:
+// LLVM-NEXT:   %3 = alloca i32, i64 1, align 4
+// LLVM:        store i32 4, ptr {{.*}}, align 4
+// LLVM-NEXT:   call void @llvm.stackrestore.p0(ptr %1)
+// LLVM-NEXT:   br label %[[BRANCH:[0-9]+]]
+// LLVM-EMPTY:
+// LLVM-NEXT: [[BRANCH]]:
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+
+  // Should drop empty scopes.
+  cir.func @empty_scope() {
+    cir.scope {
+    }
+    cir.return
+  }
+  //      MLIR: func.func @empty_scope()
+  // MLIR-NEXT:   return
+  // MLIR-NEXT: }
+
+  // LLVM: define void @empty_scope()
+  // LLVM:   ret void
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/select.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/select.cir
new file mode 100644
index 0000000000000..8591d39d9fa3d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/select.cir
@@ -0,0 +1,32 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+
+!u32i = !cir.int<u, 32>
+module {
+  cir.func @foo(%arg0: !u32i) -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !u32i, !cir.ptr<!u32i>
+    %1 = cir.const #cir.int<100> : !u32i
+    %2 = cir.const #cir.int<10> : !u32i
+    %3 = cir.const #cir.int<20> : !u32i
+    %4 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %5 = cir.cmp(gt, %4, %1) : !u32i, !cir.bool
+    %6 = cir.select if %5 then %1 else %2 : (!cir.bool, !u32i, !u32i) -> !u32i
+    cir.return %6 : !u32i
+  }
+}
+
+// MLIR: module {
+// MLIR-NEXT:  func.func @foo(%[[ARG0:.*]]: i32) -> i32 {
+// MLIR-NEXT:    %[[ALLOCA:.*]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+// MLIR-NEXT:    %[[C0_IDX:.*]] = arith.constant 0 : index
+// MLIR-NEXT:    memref.store %[[ARG0]], %[[ALLOCA]][%[[C0_IDX]]] : memref<1xi32>
+// MLIR-NEXT:    %[[C100_I32:.*]] = arith.constant 100 : i32
+// MLIR-NEXT:    %[[C10_I32:.*]] = arith.constant 10 : i32
+// MLIR-NEXT:    %[[C20_I32:.*]] = arith.constant 20 : i32
+// MLIR-NEXT:    %[[C0_IDX2:.*]] = arith.constant 0 : index
+// MLIR-NEXT:    %[[V:.*]] = memref.load %[[ALLOCA]][%[[C0_IDX2]]] : memref<1xi32>
+// MLIR-NEXT:    %[[C:.*]] = arith.cmpi ugt, %[[V]], %[[C100_I32]] : i32
+// MLIR-NEXT:    %[[OP:.*]] = arith.select %[[C]], %[[C100_I32]], %[[C10_I32]] : i32
+// MLIR-NEXT:    return %[[OP]] : i32
+// MLIR-NEXT:  }
+// MLIR-NEXT:}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/shift.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/shift.cir
new file mode 100644
index 0000000000000..aecbc3f45940c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/shift.cir
@@ -0,0 +1,31 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u16i = !cir.int<u, 16>
+module {
+  cir.func @testShiftWithDifferentValueAndAmountTypes(%arg0: !s16i, %arg1: !s32i, %arg2: !s64i, %arg3: !u16i) {
+    %1 = cir.shift(left, %arg1: !s32i, %arg2 : !s64i) -> !s32i
+    %2 = cir.shift(left, %arg1 : !s32i, %arg0 : !s16i) -> !s32i
+    %3 = cir.shift(left, %arg1 : !s32i, %arg3 : !u16i) -> !s32i
+    %4 = cir.shift(left, %arg1 : !s32i, %arg1 : !s32i) -> !s32i
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @testShiftWithDifferentValueAndAmountTypes(%arg0: i16, %arg1: i32, %arg2: i64, %arg3: i16) {
+// CHECK-NEXT:     %[[TRUNC:.+]] = arith.trunci %arg2 : i64 to i32
+// CHECK-NEXT:     %[[SHIFT_TRUNC:.+]] = arith.shli %arg1, %[[TRUNC]] : i32
+// CHECK-NEXT:     %[[EXTS:.+]] = arith.extsi %arg0 : i16 to i32
+// CHECK-NEXT:     %[[SHIFT_EXTS:.+]] = arith.shli %arg1, %[[EXTS]] : i32
+// CHECK-NEXT:     %[[EXTU:.+]] = arith.extui %arg3 : i16 to i32
+// CHECK-NEXT:     %[[SHIFT_EXTU:.+]] = arith.shli %arg1, %[[EXTU]] : i32
+// CHECK-NEXT:     %[[BITCAST:.+]] = arith.bitcast %arg1 : i32 to i32
+// CHECK-NEXT:     %[[SHIFT_BITCAST:.+]] = arith.shli %arg1, %[[BITCAST]] : i32
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/sin.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/sin.cir
new file mode 100644
index 0000000000000..c433b52e105cc
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/sin.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %1 = cir.const #cir.fp<1.0> : !cir.float
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    %3 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.f80>
+    %4 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.double>
+    %5 = cir.sin %1 : !cir.float
+    %6 = cir.sin %2 : !cir.double
+    %7 = cir.sin %3 : !cir.long_double<!cir.f80>
+    %8 = cir.sin %4 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 1.000000e+00 : f80
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.sin %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.sin %[[C1]] : f64
+// CHECK-NEXT:     %{{.+}} = math.sin %[[C2]] : f80
+// CHECK-NEXT:     %{{.+}} = math.sin %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/sqrt.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/sqrt.cir
new file mode 100644
index 0000000000000..a9b8c1a7efa68
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/sqrt.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %0 = cir.const #cir.fp<9.0> : !cir.float
+    %1 = cir.const #cir.fp<100.0> : !cir.long_double<!cir.f80>
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    %3 = cir.const #cir.fp<2.56> : !cir.long_double<!cir.double>
+    %4 = cir.sqrt %0 : !cir.float
+    %5 = cir.sqrt %1 : !cir.long_double<!cir.f80>
+    %6 = cir.sqrt %2 : !cir.double
+    %7 = cir.sqrt %3 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 9.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 1.000000e+02 : f80
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 1.000000e+00 : f64
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 2.560000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.sqrt %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.sqrt %[[C1]] : f80
+// CHECK-NEXT:     %{{.+}} = math.sqrt %[[C2]] : f64
+// CHECK-NEXT:     %{{.+}} = math.sqrt %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/store-memcpy-mlir.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/store-memcpy-mlir.cpp
new file mode 100644
index 0000000000000..fdab5deab00b2
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/store-memcpy-mlir.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+
+void foo() {
+  char s1[] = "Hello";
+}
+
+// MLIR:     memref.global "private" constant @[[GLOBAL_ARRAY:.*]] : memref<6xi8> = dense<[72, 101, 108, 108, 111, 0]>
+// MLIR:     @_Z3foov() {
+// MLIR-DAG: %[[ALLOCA:.*]] = memref.alloca() {alignment = 1 : i64} : memref<6xi8>
+// MLIR-DAG: %[[SOURCE:.*]] = memref.get_global @[[GLOBAL_ARRAY]] : memref<6xi8>
+// MLIR:     memref.copy %[[SOURCE]], %[[ALLOCA]] : memref<6xi8> to memref<6xi8>
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/tan.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/tan.cir
new file mode 100644
index 0000000000000..be8e0ccb8168c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/tan.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-mlir -o %t.mlir
+// RUN: FileCheck %s --input-file %t.mlir
+
+module {
+  cir.func @foo() {
+    %1 = cir.const #cir.fp<1.0> : !cir.float
+    %2 = cir.const #cir.fp<2.0> : !cir.double
+    %3 = cir.const #cir.fp<3.0> : !cir.long_double<!cir.f80>
+    %4 = cir.const #cir.fp<4.0> : !cir.long_double<!cir.double>
+    %5 = cir.tan %1 : !cir.float
+    %6 = cir.tan %2 : !cir.double
+    %7 = cir.tan %3 : !cir.long_double<!cir.f80>
+    %8 = cir.tan %4 : !cir.long_double<!cir.double>
+    cir.return
+  }
+}
+
+// CHECK:      module {
+// CHECK-NEXT:   func.func @foo() {
+// CHECK-NEXT:     %[[C0:.+]] = arith.constant 1.000000e+00 : f32
+// CHECK-NEXT:     %[[C1:.+]] = arith.constant 2.000000e+00 : f64
+// CHECK-NEXT:     %[[C2:.+]] = arith.constant 3.000000e+00 : f80
+// CHECK-NEXT:     %[[C3:.+]] = arith.constant 4.000000e+00 : f64
+// CHECK-NEXT:     %{{.+}} = math.tan %[[C0]] : f32
+// CHECK-NEXT:     %{{.+}} = math.tan %[[C1]] : f64
+// CHECK-NEXT:     %{{.+}} = math.tan %[[C2]] : f80
+// CHECK-NEXT:     %{{.+}} = math.tan %[[C3]] : f64
+// CHECK-NEXT:     return
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/tenary.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/tenary.cir
new file mode 100644
index 0000000000000..29ceb9ada51df
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/tenary.cir
@@ -0,0 +1,43 @@
+// RUN: cir-opt %s -cir-to-mlir | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir --canonicalize | FileCheck %s --check-prefix=MLIR-CANONICALIZE
+// RUN: cir-opt %s -cir-to-mlir --canonicalize -cir-mlir-to-llvm | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+
+module {
+cir.func @_Z1xi(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+    %5 = cir.ternary(%4, true {
+      %7 = cir.const #cir.int<3> : !s32i
+      cir.yield %7 : !s32i
+    }, false {
+      %7 = cir.const #cir.int<5> : !s32i
+      cir.yield %7 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %6 : !s32i
+  }
+}
+
+// MLIR:      %1 = arith.cmpi sgt, %0, %c0_i32 : i32
+// MLIR-NEXT: %2 = scf.if %1 -> (i32) {
+// MLIR-NEXT:   %c3_i32 = arith.constant 3 : i32
+// MLIR-NEXT:   scf.yield %c3_i32 : i32
+// MLIR-NEXT: } else {
+// MLIR-NEXT:   %c5_i32 = arith.constant 5 : i32
+// MLIR-NEXT:   scf.yield %c5_i32 : i32
+// MLIR-NEXT: }
+// MLIR-NEXT: %[[CONST0:[a-z0-9_]+]] = arith.constant 0 : index
+// MLIR-NEXT: memref.store %2, %alloca_0[%[[CONST0]]] : memref<1xi32>
+
+// MLIR-CANONICALIZE: %[[CMP:.*]] = arith.cmpi sgt
+// MLIR-CANONICALIZE: arith.select %[[CMP]]
+
+// LLVM: %[[CMP:.*]] = icmp sgt
+// LLVM: select i1 %[[CMP]]
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/unary-inc-dec.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/unary-inc-dec.cir
new file mode 100644
index 0000000000000..35bcc3a42962f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/unary-inc-dec.cir
@@ -0,0 +1,48 @@
+// RUN: cir-opt %s --cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s --cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<2> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.unary(inc, %3) : !s32i, !s32i
+    cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.unary(dec, %5) : !s32i, !s32i
+    cir.store %6, %1 : !s32i, !cir.ptr<!s32i>
+
+    // test float
+    %7 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    cir.return
+  }
+
+// MLIR: = arith.constant 1
+// MLIR: = arith.addi
+// MLIR: = arith.constant -1
+// MLIR: = arith.addi
+
+// LLVM: = add i32 %[[#]], 1
+// LLVM: = add i32 %[[#]], -1
+
+
+  cir.func @floatingPoints(%arg0: !cir.double) {
+    %0 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["X", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.double, !cir.ptr<!cir.double>
+    %1 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+    %2 = cir.unary(inc, %1) : !cir.double, !cir.double
+    %3 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+    %4 = cir.unary(dec, %3) : !cir.double, !cir.double
+    cir.return
+  }
+// MLIR: = arith.constant 1.0
+// MLIR: = arith.addf
+// MLIR: = arith.constant -1.0
+// MLIR: = arith.addf
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/unary-plus-minus.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/unary-plus-minus.cir
new file mode 100644
index 0000000000000..a6101358e5a43
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/unary-plus-minus.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o - | mlir-translate -mlir-to-llvmir | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<2> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.unary(plus, %3) : !s32i, !s32i
+    cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.unary(minus, %5) : !s32i, !s32i
+    cir.store %6, %1 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+  
+  cir.func @floatingPoints(%arg0: !cir.double) {
+    %0 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["X", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.double, !cir.ptr<!cir.double>
+    %1 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+    %2 = cir.unary(plus, %1) : !cir.double, !cir.double
+    %3 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+    %4 = cir.unary(minus, %3) : !cir.double, !cir.double
+    cir.return
+  }
+}
+
+// MLIR: %[[#INPUT_PLUS:]] = memref.load
+// MLIR: memref.store %[[#INPUT_PLUS]]
+// MLIR: %[[#INPUT_MINUS:]] = memref.load
+// MLIR: %[[ZERO:[a-z0-9_]+]] = arith.constant 0
+// MLIR: arith.subi %[[ZERO]], %[[#INPUT_MINUS]]
+
+// LLVM: = sub i32 0, %[[#]]
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/unreachable.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/unreachable.cir
new file mode 100644
index 0000000000000..843f9ce416077
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/unreachable.cir
@@ -0,0 +1,19 @@
+// RUN: cir-opt %s -cir-to-mlir -o - | FileCheck %s -check-prefix=MLIR
+
+module {
+  cir.func @test_unreachable() {
+    cir.unreachable
+  }
+
+  //      MLIR: func.func @test_unreachable()
+  // MLIR-NEXT:   llvm.unreachable
+
+  cir.func @test_trap() {
+    cir.trap
+  }
+
+  //      MLIR: func.func @test_trap() {
+  // MLIR-NEXT:   llvm.call_intrinsic "llvm.trap"() : () -> ()
+  // MLIR-NEXT:   llvm.unreachable
+  // MLIR-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/vectype.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/vectype.cpp
new file mode 100644
index 0000000000000..5263e32ac8464
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/vectype.cpp
@@ -0,0 +1,176 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+typedef int vi4 __attribute__((vector_size(16)));
+
+void vector_int_test(int x) {
+
+  // CHECK: %[[ALLOC1:.*]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+  // CHECK: %[[ALLOC2:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC3:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC4:.*]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+  // CHECK: %[[ALLOC5:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC6:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC7:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC8:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC9:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC10:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC11:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC12:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC13:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC14:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC15:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC16:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC17:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC18:.*]] = memref.alloca() {alignment = 16 : i64} : memref<1xvector<4xi32>>
+
+  // CHECK: memref.store %arg0, %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+
+  vi4 a = { 1, 2, 3, 4 };
+
+  // CHECK: %[[CST:.*]] = arith.constant dense<[1, 2, 3, 4]> : vector<4xi32>
+  // CHECK: memref.store %[[CST]], %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  
+  vi4 b = {x, 5, 6, x + 1};
+
+  // CHECK: %[[VAL1:.*]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+  // CHECK: %[[C5:.*]] = arith.constant 5 : i32
+  // CHECK: %[[C6:.*]] = arith.constant 6 : i32
+  // CHECK: %[[VAL2:.*]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+  // CHECK: %[[C1_I32_2:.*]] = arith.constant 1 : i32
+  // CHECK: %[[SUM:.*]] = arith.addi %[[VAL2]], %[[C1_I32_2]] : i32
+  // CHECK: %[[CST2:.*]] = arith.constant dense<0> : vector<4xi32>
+  // CHECK: %[[VEC4:.*]] = vector.insert %[[VAL1]], %[[CST2]] [0] : i32 into vector<4xi32>
+  // CHECK: %[[VEC5:.*]] = vector.insert %[[C5]], %[[VEC4]] [1] : i32 into vector<4xi32>
+  // CHECK: %[[VEC6:.*]] = vector.insert %[[C6]], %[[VEC5]] [2] : i32 into vector<4xi32>
+  // CHECK: %[[VEC7:.*]] = vector.insert %[[SUM]], %[[VEC6]] [3] : i32 into vector<4xi32>
+  // CHECK: memref.store %[[VEC7]], %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  a[x] = x;
+  
+  // CHECK: %[[VAL3:.*]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+  // CHECK: %[[VAL4:.*]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+  // CHECK: %[[VEC8:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[IDX_CAST:.*]] = arith.index_cast %[[VAL4]] : i32 to index
+  // CHECK: %[[VEC9:.*]] = vector.insert %[[VAL3]], %[[VEC8]] [%[[IDX_CAST]]] : i32 into vector<4xi32>
+  // CHECK: memref.store %[[VEC9]], %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  int c = a[x];
+
+  // CHECK: %[[VEC10:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[VAL5:.*]] = memref.load %[[ALLOC1]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+  // CHECK: %[[IDX_CAST2:.*]] = arith.index_cast %[[VAL5]] : i32 to index
+  // CHECK: %[[EXTRACT:.*]] = vector.extract %[[VEC10]][%[[IDX_CAST2]]] : i32 from vector<4xi32>
+  // CHECK: memref.store %[[EXTRACT]], %[[ALLOC4]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+
+  vi4 d = a + b;
+  
+  // CHECK: %[[ALLOC0_1:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC1_1:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC2_1:.*]] = arith.addi %[[ALLOC0_1]], %[[ALLOC1_1]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC2_1]], %[[ALLOC5]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  
+  vi4 e = a - b;
+
+  // CHECK: %[[ALLOC0_2:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC1_2:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC3_2:.*]] = arith.subi %[[ALLOC0_2]], %[[ALLOC1_2]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC3_2]], %[[ALLOC6]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 f = a * b;
+
+  // CHECK: %[[ALLOC0_3:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC1_3:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC4_1:.*]] = arith.muli %[[ALLOC0_3]], %[[ALLOC1_3]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC4_1]], %[[ALLOC7]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 g = a / b;
+
+  // CHECK: %[[ALLOC0_4:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC1_4:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC5_1:.*]] = arith.divsi %[[ALLOC0_4]], %[[ALLOC1_4]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC5_1]], %[[ALLOC8]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 h = a % b;
+
+  // CHECK: %[[ALLOC0_5:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC1_5:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC6_1:.*]] = arith.remsi %[[ALLOC0_5]], %[[ALLOC1_5]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC6_1]], %[[ALLOC9]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 i = a & b;
+
+  // CHECK: %[[ALLOC0_6:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC1_6:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC7_1:.*]] = arith.andi %[[ALLOC0_6]], %[[ALLOC1_6]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC7_1]], %[[ALLOC10]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 j = a | b;
+
+  // CHECK: %[[ALLOC0_7:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC1_7:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC8_1:.*]] = arith.ori %[[ALLOC0_7]], %[[ALLOC1_7]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC8_1]], %[[ALLOC11]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 k = a ^ b;
+
+  // CHECK: %[[ALLOC0_8:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC1_8:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[ALLOC9_1:.*]] = arith.xori %[[ALLOC0_8]], %[[ALLOC1_8]] : vector<4xi32>
+  // CHECK: memref.store %[[ALLOC9_1]], %[[ALLOC12]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  // TODO(cir) : Fix the lowering of unary operators
+  // vi4 l = +a;
+  // vi4 m = -a;
+  // vi4 n = ~a;
+
+  vi4 o = a == b;
+
+  // CHECK: %[[VAL11:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[VAL12:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[CMP_EQ:.*]] = arith.cmpi eq, %[[VAL11]], %[[VAL12]] : vector<4xi32>
+  // CHECK: %[[EXT_EQ:.*]] = arith.extsi %[[CMP_EQ]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_EQ]], %[[ALLOC13]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 p = a != b;
+
+  // CHECK: %[[VAL13:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[VAL14:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[CMP_NE:.*]] = arith.cmpi ne, %[[VAL13]], %[[VAL14]] : vector<4xi32>
+  // CHECK: %[[EXT_NE:.*]] = arith.extsi %[[CMP_NE]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_NE]], %[[ALLOC14]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 q = a < b;
+
+  // CHECK: %[[VAL15:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[VAL16:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[CMP_SLT:.*]] = arith.cmpi slt, %[[VAL15]], %[[VAL16]] : vector<4xi32>
+  // CHECK: %[[EXT_SLT:.*]] = arith.extsi %[[CMP_SLT]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_SLT]], %[[ALLOC15]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  
+  vi4 r = a > b;
+  
+  // CHECK: %[[VAL17:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[VAL18:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[CMP_SGT:.*]] = arith.cmpi sgt, %[[VAL17]], %[[VAL18]] : vector<4xi32>
+  // CHECK: %[[EXT_SGT:.*]] = arith.extsi %[[CMP_SGT]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_SGT]], %[[ALLOC16]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 s = a <= b;
+
+  // CHECK: %[[VAL19:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[VAL20:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[CMP_SLE:.*]] = arith.cmpi sle, %[[VAL19]], %[[VAL20]] : vector<4xi32>
+  // CHECK: %[[EXT_SLE:.*]] = arith.extsi %[[CMP_SLE]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_SLE]], %[[ALLOC17]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  vi4 t = a >= b;
+
+  // CHECK: %[[VAL21:.*]] = memref.load %[[ALLOC2]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[VAL22:.*]] = memref.load %[[ALLOC3]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+  // CHECK: %[[CMP_SGE:.*]] = arith.cmpi sge, %[[VAL21]], %[[VAL22]] : vector<4xi32>
+  // CHECK: %[[EXT_SGE:.*]] = arith.extsi %[[CMP_SGE]] : vector<4xi1> to vector<4xi32>
+  // CHECK: memref.store %[[EXT_SGE]], %[[ALLOC18]][{{%c0(_[0-9]+)?}}] : memref<1xvector<4xi32>>
+
+  // CHECK: return
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/vtable.cir b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/vtable.cir
new file mode 100644
index 0000000000000..85f4a26642f5d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/vtable.cir
@@ -0,0 +1,73 @@
+// RUN: cir-opt %s --cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+!rec_anon_struct = !cir.record<struct  {!cir.ptr<!cir.int<u, 8>>, !cir.ptr<!cir.int<u, 8>>}>
+!rec_anon_struct1 = !cir.record<struct  {!cir.ptr<!cir.int<u, 8>>, !cir.ptr<!cir.int<u, 8>>, !cir.int<u, 32>, !cir.int<u, 32>, !cir.ptr<!cir.int<u, 8>>, !cir.int<s, 64>, !cir.ptr<!cir.int<u, 8>>, !cir.int<s, 64>}>
+!rec_anon_struct2 = !cir.record<struct  {!cir.array<!cir.ptr<!cir.int<u, 8>> x 4>}>
+!rec_anon_struct3 = !cir.record<struct  {!cir.array<!cir.ptr<!cir.int<u, 8>> x 3>}>
+!rec_anon_struct4 = !cir.record<struct  {!cir.array<!cir.ptr<!cir.int<u, 8>> x 4>, !cir.array<!cir.ptr<!cir.int<u, 8>> x 3>}>
+!rec_Father = !cir.record<class "Father" {!cir.ptr<!cir.ptr<!cir.func<() -> !cir.int<u, 32>>>>} #cir.record.decl.ast>
+!rec_Mother = !cir.record<class "Mother" {!cir.ptr<!cir.ptr<!cir.func<() -> !cir.int<u, 32>>>>} #cir.record.decl.ast>
+!rec_Child = !cir.record<class "Child" {!cir.record<class "Mother" {!cir.ptr<!cir.ptr<!cir.func<() -> !cir.int<u, 32>>>>} #cir.record.decl.ast>, !cir.record<class "Father" {!cir.ptr<!cir.ptr<!cir.func<() -> !cir.int<u, 32>>>>} #cir.record.decl.ast>} #cir.record.decl.ast>
+
+module {
+  cir.func linkonce_odr @_ZN6Mother6simpleEv(%arg0: !cir.ptr<!rec_Mother>) { 
+    %0 = cir.alloca !cir.ptr<!rec_Mother>, !cir.ptr<!cir.ptr<!rec_Mother>>, ["this", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.ptr<!rec_Mother>, !cir.ptr<!cir.ptr<!rec_Mother>>
+    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!rec_Mother>>, !cir.ptr<!rec_Mother>
+    cir.return 
+  }
+  cir.func private @_ZN5ChildC2Ev(%arg0: !cir.ptr<!rec_Child>) { cir.return }
+  cir.global linkonce_odr @_ZTV6Mother = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI6Mother> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother9MotherFooEv> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother10MotherFoo2Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>}> : !rec_anon_struct2 {alignment = 8 : i64} 
+  cir.global "private" external @_ZTVN10__cxxabiv117__class_type_infoE : !cir.ptr<!cir.ptr<!u8i>> 
+  cir.global linkonce_odr @_ZTS6Mother = #cir.const_array<"6Mother" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7> {alignment = 1 : i64} 
+  cir.global constant external @_ZTI6Mother = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS6Mother> : !cir.ptr<!u8i>}> : !rec_anon_struct {alignment = 8 : i64} 
+  cir.func linkonce_odr @_ZN6Mother9MotherFooEv(%arg0: !cir.ptr<!rec_Mother> ) { cir.return }
+  cir.func linkonce_odr @_ZN6Mother10MotherFoo2Ev(%arg0: !cir.ptr<!rec_Mother> ) { cir.return }
+  cir.global linkonce_odr @_ZTV6Father = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI6Father> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Father9FatherFooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !rec_anon_struct3 {alignment = 8 : i64} 
+  cir.func linkonce_odr @_ZN6FatherC2Ev(%arg0: !cir.ptr<!rec_Father> ) { cir.return }
+  cir.global linkonce_odr @_ZTV5Child = #cir.vtable<{#cir.const_array<[#cir.ptr<null> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI5Child> : !cir.ptr<!u8i>, #cir.global_view<@_ZN5Child9MotherFooEv> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Mother10MotherFoo2Ev> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 4>, #cir.const_array<[#cir.ptr<-8 : i64> : !cir.ptr<!u8i>, #cir.global_view<@_ZTI5Child> : !cir.ptr<!u8i>, #cir.global_view<@_ZN6Father9FatherFooEv> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 3>}> : !rec_anon_struct4 {alignment = 8 : i64} 
+  cir.global "private" external @_ZTVN10__cxxabiv121__vmi_class_type_infoE : !cir.ptr<!cir.ptr<!u8i>> 
+  cir.global linkonce_odr @_ZTS5Child = #cir.const_array<"5Child" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6> {alignment = 1 : i64} 
+  cir.global linkonce_odr @_ZTS6Father = #cir.const_array<"6Father" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7> {alignment = 1 : i64} 
+  cir.global constant external @_ZTI6Father = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv117__class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS6Father> : !cir.ptr<!u8i>}> : !rec_anon_struct {alignment = 8 : i64} 
+  cir.global constant external @_ZTI5Child = #cir.typeinfo<{#cir.global_view<@_ZTVN10__cxxabiv121__vmi_class_type_infoE, [2 : i32]> : !cir.ptr<!u8i>, #cir.global_view<@_ZTS5Child> : !cir.ptr<!u8i>, #cir.int<0> : !u32i, #cir.int<2> : !u32i, #cir.global_view<@_ZTI6Mother> : !cir.ptr<!u8i>, #cir.int<2> : !s64i, #cir.global_view<@_ZTI6Father> : !cir.ptr<!u8i>, #cir.int<2050> : !s64i}> : !rec_anon_struct1 {alignment = 8 : i64} 
+  cir.func linkonce_odr @_ZN5Child9MotherFooEv(%arg0: !cir.ptr<!rec_Child> ) { cir.return }
+  cir.func linkonce_odr @_ZN6Father9FatherFooEv(%arg0: !cir.ptr<!rec_Father> ) { cir.return }
+} 
+
+// MLIR:  llvm.mlir.global linkonce_odr @_ZTV5Child() {addr_space = 0 : i32, alignment = 8 : i64} : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)> {
+// MLIR:    %{{[0-9]+}} = llvm.mlir.undef : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)>
+// MLIR:    %{{[0-9]+}} = llvm.mlir.undef : !llvm.array<4 x ptr>
+// MLIR:    %{{[0-9]+}} = llvm.mlir.zero : !llvm.ptr
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[0] : !llvm.array<4 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZTI5Child : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[1] : !llvm.array<4 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZN5Child9MotherFooEv : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[2] : !llvm.array<4 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZN6Mother10MotherFoo2Ev : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[3] : !llvm.array<4 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[0] : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.undef : !llvm.array<3 x ptr>
+// MLIR:    %{{[0-9]+}} = llvm.mlir.constant(-8 : i64) : i64
+// MLIR:    %{{[0-9]+}} = llvm.inttoptr %{{[0-9]+}} : i64 to !llvm.ptr
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[0] : !llvm.array<3 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZTI5Child : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[1] : !llvm.array<3 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.mlir.addressof @_ZN6Father9FatherFooEv : !llvm.ptr
+
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[2] : !llvm.array<3 x ptr> 
+// MLIR:    %{{[0-9]+}} = llvm.insertvalue %{{[0-9]+}}, %{{[0-9]+}}[1] : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)> 
+// MLIR:    llvm.return %{{[0-9]+}} : !llvm.struct<(array<4 x ptr>, array<3 x ptr>)>
+// MLIR:  }
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/while-with-continue.cpp b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/while-with-continue.cpp
new file mode 100644
index 0000000000000..541814506581c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/while-with-continue.cpp
@@ -0,0 +1,106 @@
+// XFAIL: *
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+ 
+void while_continue() {
+  int i = 0;
+  while (i < 100) {
+    i++;
+    continue;
+    i++;
+  }
+  // Only the first `i++` will be emitted.
+
+  // CHECK: scf.while : () -> () {
+  // CHECK:   %[[TMP0:.+]] = memref.load %alloca[{{%c0(_[0-9]+)?}}]
+  // CHECK:   %[[HUNDRED:.+]] = arith.constant 100
+  // CHECK:   %[[TMP1:.+]] = arith.cmpi slt, %[[TMP0]], %[[HUNDRED]]
+  // CHECK:   scf.condition(%[[TMP1]])
+  // CHECK: } do {
+  // CHECK:   memref.alloca_scope  {
+  // CHECK:     %[[TMP2:.+]] = memref.load %alloca[{{%c0(_[0-9]+)?}}]
+  // CHECK:     %[[ONE:.+]] = arith.constant 1
+  // CHECK:     %[[TMP3:.+]] = arith.addi %[[TMP2]], %[[ONE]]
+  // CHECK:     memref.store %[[TMP3]], %alloca[{{%c0(_[0-9]+)?}}]
+  // CHECK:   }
+  // CHECK:   scf.yield
+  // CHECK: }
+}
+
+void while_continue_2() {
+  int i = 0;
+  while (i < 10) {
+    if (i == 5) {
+      i += 3;
+      continue;
+    }
+  
+    i++;
+  }
+  // The final i++ will have a `if (!(i == 5))` guarded against it.
+
+  // CHECK: do {
+  // CHECK:   %[[NOTALLOCA:.+]] = memref.alloca
+  // CHECK:   memref.alloca_scope  {
+  // CHECK:     memref.alloca_scope  {
+  // CHECK:       %[[IV:.+]] = memref.load %[[IVADDR:.+]][{{%c0(_[0-9]+)?}}]
+  // CHECK:       %[[FIVE:.+]] = arith.constant 5
+  // CHECK:       %[[COND:.+]] = arith.cmpi eq, %[[IV]], %[[FIVE]]
+  // CHECK:       %true = arith.constant true
+  // CHECK:       %[[NOT:.+]] = arith.xori %true, %[[COND]]
+  // CHECK:       %[[EXT:.+]] = arith.extui %[[NOT]] : i1 to i8
+  // CHECK:       memref.store %[[EXT]], %[[NOTALLOCA]]
+  // CHECK:       scf.if %[[COND]] {
+  // CHECK:         %[[THREE:.+]] = arith.constant 3
+  // CHECK:         %[[IV2:.+]] = memref.load %[[IVADDR]]
+  // CHECK:         %[[TMP:.+]] = arith.addi %[[IV2]], %[[THREE]]
+  // CHECK:         memref.store %[[TMP]], %[[IVADDR]]
+  // CHECK:       }
+  // CHECK:     }
+  // CHECK:     %[[NOTCOND:.+]] = memref.load %[[NOTALLOCA]]
+  // CHECK:     %[[TRUNC:.+]] = arith.trunci %[[NOTCOND]] : i8 to i1
+  // CHECK:     scf.if %[[TRUNC]] {
+  // CHECK:       %[[IV3:.+]] = memref.load %[[IVADDR]]
+  // CHECK:       %[[ONE:.+]] = arith.constant 1
+  // CHECK:       %[[TMP2:.+]] = arith.addi %[[IV3]], %[[ONE]]
+  // CHECK:       memref.store %[[TMP2]], %[[IVADDR]]
+  // CHECK:     }
+  // CHECK:   }
+  // CHECK:   scf.yield
+  // CHECK: }
+}
+
+void while_continue_nested() {
+  int i = 0;
+  while (i < 10) {
+    while (true) {
+      continue;
+      i--;
+    }
+    i++;
+  }
+  // The continue will only work on the inner while.
+
+  // CHECK: scf.while : () -> () {
+  // CHECK:   %[[IV:.+]] = memref.load %alloca[{{%c0(_[0-9]+)?}}]
+  // CHECK:   %[[TEN:.+]] = arith.constant 10
+  // CHECK:   %[[LT:.+]] = arith.cmpi slt, %[[IV]], %[[TEN]]
+  // CHECK:   scf.condition(%[[LT]])
+  // CHECK: } do {
+  // CHECK:   memref.alloca_scope  {
+  // CHECK:     memref.alloca_scope  {
+  // CHECK:       scf.while : () -> () {
+  // CHECK:         %[[TRUE:.+]] = arith.constant true
+  // CHECK:         scf.condition(%[[TRUE]])
+  // CHECK:       } do {
+  // CHECK:         scf.yield
+  // CHECK:       }
+  // CHECK:     }
+  // CHECK:     %[[IV2:.+]] = memref.load %alloca[{{%c0(_[0-9]+)?}}]
+  // CHECK:     %[[ONE:.+]] = arith.constant 1
+  // CHECK:     %[[ADD:.+]] = arith.addi %[[IV2]], %[[ONE]]
+  // CHECK:     memref.store %[[ADD]], %alloca[{{%c0(_[0-9]+)?}}]
+  // CHECK:   }
+  // CHECK:   scf.yield
+  // CHECK: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/ThroughMLIR/while.c b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/while.c
new file mode 100644
index 0000000000000..463ace944c9c1
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ThroughMLIR/while.c
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+void singleWhile() {
+  int a = 0;
+  while(a < 2) {
+    a++;
+  }
+}
+
+void nestedWhile() {
+  int a = 0;
+  while(a < 2) {
+    int b = 0;
+    while(b < 2) {
+      b++;
+    }
+    a++;
+  }
+}
+
+//CHECK: func.func @singleWhile() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   %[[C0_IDX:.+]] = arith.constant 0 : index
+//CHECK:   memref.store %[[C0_I32]], %[[alloca]][%[[C0_IDX]]] : memref<1xi32>
+//CHECK:   memref.alloca_scope  {
+//CHECK:     scf.while : () -> () {
+//CHECK:       %[[C0_IDX2:.+]] = arith.constant 0 : index
+//CHECK:       %[[ZERO:.+]] = memref.load %[[alloca]][%[[C0_IDX2]]] : memref<1xi32>
+//CHECK:       %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:       %[[ONE:.+]] = arith.cmpi slt, %[[ZERO:.+]], %[[C2_I32]] : i32
+//CHECK:       scf.condition(%[[ONE]])
+//CHECK:     } do {
+//CHECK:       memref.alloca_scope {
+//CHECK:         %[[C0_IDX3:.+]] = arith.constant 0 : index
+//CHECK:         %[[ZERO:.+]] = memref.load %[[alloca]][%[[C0_IDX3]]] : memref<1xi32>
+//CHECK:         %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:         %[[ONE:.+]] = arith.addi %0, %[[C1_I32:.+]] : i32
+//CHECK:         %[[C0_IDX4:.+]] = arith.constant 0 : index
+//CHECK:         memref.store %[[ONE:.+]], %[[alloca]][%[[C0_IDX4]]] : memref<1xi32>
+//CHECK:       }
+//CHECK:       scf.yield
+//CHECK:     }
+//CHECK:  }
+//CHECK:   return
+//CHECK: }
+
+//CHECK: func.func @nestedWhile() {
+//CHECK:   %[[alloca:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:   %[[C0_I32:.+]] = arith.constant 0 : i32
+//CHECK:   memref.store %[[C0_I32]], %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:   memref.alloca_scope  {
+//CHECK:     scf.while : () -> () {
+//CHECK:       %[[ZERO:.+]] = memref.load %alloca[{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:       %[[C2_I32:.+]] = arith.constant 2 : i32
+//CHECK:       %[[ONE:.+]] = arith.cmpi slt, %[[ZERO]], %[[C2_I32]] : i32
+//CHECK:       scf.condition(%[[ONE]])
+//CHECK:     } do {
+//CHECK:       memref.alloca_scope {
+//CHECK:         %[[alloca_0:.+]] = memref.alloca() {alignment = 4 : i64} : memref<1xi32>
+//CHECK:         %[[C0_I32_1:.+]] = arith.constant 0 : i32
+//CHECK:         memref.store %[[C0_I32_1]], %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:         memref.alloca_scope  {
+//CHECK:           scf.while : () -> () {
+//CHECK:             %{{.*}} = memref.load %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:             %[[C2_I32]] = arith.constant 2 : i32
+//CHECK:             %[[SEVEN:.*]] = arith.cmpi slt, %{{.*}}, %[[C2_I32]] : i32
+//CHECK:             scf.condition(%[[SEVEN]])
+//CHECK:           } do {
+//CHECK:             %{{.*}} = memref.load %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:             %[[C1_I32_2:.+]] = arith.constant 1 : i32
+//CHECK:             %{{.*}} = arith.addi %{{.*}}, %[[C1_I32_2]] : i32
+//CHECK:             memref.store %{{.*}}, %[[alloca_0]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:             scf.yield
+//CHECK:           }
+//CHECK:         }
+//CHECK:         %[[ZERO]] = memref.load %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:         %[[C1_I32:.+]] = arith.constant 1 : i32
+//CHECK:         %[[ONE]] = arith.addi %[[ZERO]], %[[C1_I32]] : i32
+//CHECK:         memref.store %[[ONE]], %[[alloca]][{{%c0(_[0-9]+)?}}] : memref<1xi32>
+//CHECK:       }
+//CHECK:       scf.yield
+//CHECK:     }
+//CHECK:   }
+//CHECK:   return
+//CHECK: }
diff --git a/clang/test/CIR/Incubator/Lowering/address-space.cir b/clang/test/CIR/Incubator/Lowering/address-space.cir
new file mode 100644
index 0000000000000..b28e44266d189
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/address-space.cir
@@ -0,0 +1,59 @@
+// RUN: cir-translate %s -cir-to-llvmir --target spirv64-unknown-unknown --disable-cc-lowering -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.global external lang_address_space(offload_global) @addrspace1 = #cir.int<1> : !s32i
+  // LLVM: @addrspace1 = addrspace(1) global i32
+
+  cir.global "private" internal lang_address_space(offload_local) @addrspace2 : !s32i
+  // LLVM: @addrspace2 = internal addrspace(3) global i32 undef
+
+  cir.global external target_address_space(7) @addrspace3 = #cir.int<3> : !s32i
+  // LLVM: @addrspace3 = addrspace(7) global i32
+
+  // Test GlobalViewAttr with address space cast.
+  cir.global external target_address_space(1) @global_in_as1 = #cir.int<42> : !s32i
+  // LLVM: @global_in_as1 = addrspace(1) global i32 42
+
+  // Reference to @global_in_as1 with a pointer in default address space (0)
+  cir.global external @ref_with_addrspacecast = #cir.const_array<[#cir.global_view<@global_in_as1> : !cir.ptr<!cir.void>]> : !cir.array<!cir.ptr<!cir.void> x 1>
+  // LLVM: @ref_with_addrspacecast = global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @global_in_as1 to ptr)]
+
+  // LLVM: define void @foo(ptr %0)
+  cir.func @foo(%arg0: !cir.ptr<!s32i>) {
+    // LLVM-NEXT: alloca ptr,
+    %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arg", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // LLVM: define void @bar(ptr addrspace(1) %0)
+  cir.func @bar(%arg0: !cir.ptr<!s32i, target_address_space(1)>) {
+    // LLVM-NEXT: alloca ptr addrspace(1)
+    %0 = cir.alloca !cir.ptr<!s32i, target_address_space(1)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(1)>>, ["arg", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // LLVM: define void @baz(ptr %0)
+  cir.func @baz(%arg0: !cir.ptr<!s32i, target_address_space(0)>) {
+    // LLVM-NEXT: alloca ptr,
+    %0 = cir.alloca !cir.ptr<!s32i, target_address_space(0)>, !cir.ptr<!cir.ptr<!s32i, target_address_space(0)>>, ["arg", init] {alignment = 8 : i64}
+    cir.return
+  }
+
+  // LLVM: define void @test_lower_offload_as()
+  cir.func @test_lower_offload_as() {
+    %0 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_private)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_private)>>, ["arg0", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr,
+    %1 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_global)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_global)>>, ["arg1", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr addrspace(1),
+    %2 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_constant)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_constant)>>, ["arg2", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr addrspace(2),
+    %3 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_local)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_local)>>, ["arg3", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr addrspace(3),
+    %4 = cir.alloca !cir.ptr<!s32i, lang_address_space(offload_generic)>, !cir.ptr<!cir.ptr<!s32i, lang_address_space(offload_generic)>>, ["arg4", init] {alignment = 8 : i64}
+    // LLVM-NEXT: alloca ptr addrspace(4),
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/alloca.cir b/clang/test/CIR/Incubator/Lowering/alloca.cir
new file mode 100644
index 0000000000000..62b8c1c601112
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/alloca.cir
@@ -0,0 +1,17 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @foo(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, %arg0 : !s32i, ["tmp"] {alignment = 16 : i64}
+    cir.return
+  }
+}
+
+//      MLIR: module {
+// MLIR-NEXT:  llvm.func @foo(%arg0: i32) attributes {cir.extra_attrs = #fn_attr, global_visibility = #cir<visibility default>} {
+// MLIR-NEXT:    %0 = llvm.alloca %arg0 x i32 {alignment = 16 : i64} : (i32) -> !llvm.ptr
+// MLIR-NEXT:    llvm.return
+// MLIR-NEXT:  }
+// MLIR-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/applearm64-new.cpp b/clang/test/CIR/Incubator/Lowering/applearm64-new.cpp
new file mode 100644
index 0000000000000..b72995d02477e
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/applearm64-new.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -triple=arm64e-apple-darwin -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+class C {
+  public:
+    ~C();
+};
+
+void t_constant_size_nontrivial() {
+  auto p = new C[3];
+}
+
+// Note: The below differs from the IR emitted by clang without -fclangir in
+//       several respects. (1) The alloca here has an extra "i64 1"
+//       (2) The operator new call is missing "noalias noundef nonnull" on
+//       the call and "noundef" on the argument, (3) The getelementptr is
+//       missing "inbounds"
+
+// LLVM: @_Z26t_constant_size_nontrivialv()
+// LLVM:   %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 19)
+// LLVM:   store i64 1, ptr %[[COOKIE_PTR]], align 8
+// LLVM:   %[[NUM_ELEMENTS_PTR:.*]] = getelementptr i64, ptr %[[COOKIE_PTR]], i64 1
+// LLVM:   store i64 3, ptr %[[NUM_ELEMENTS_PTR]], align 8
+// LLVM:   %[[ALLOCATED_PTR:.*]] = getelementptr i8, ptr %[[COOKIE_PTR]], i64 16
+
+class D {
+  public:
+    int x;
+    ~D();
+};
+
+void t_constant_size_nontrivial2() {
+  auto p = new D[3];
+}
+
+// LLVM: @_Z27t_constant_size_nontrivial2v()
+// LLVM:   %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 28)
+// LLVM:   store i64 4, ptr %[[COOKIE_PTR]], align 8
+// LLVM:   %[[NUM_ELEMENTS_PTR:.*]] = getelementptr i64, ptr %[[COOKIE_PTR]], i64 1
+// LLVM:   store i64 3, ptr %[[NUM_ELEMENTS_PTR]], align 8
+// LLVM:   %[[ALLOCATED_PTR:.*]] = getelementptr i8, ptr %[[COOKIE_PTR]], i64 16
diff --git a/clang/test/CIR/Incubator/Lowering/array-init.c b/clang/test/CIR/Incubator/Lowering/array-init.c
new file mode 100644
index 0000000000000..9e16c80420957
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/array-init.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+// LLVM-DAG: @__const.charInit3.arr
+// LLVM-DAG: @__const.charInit2.arr
+// LLVM-DAG: @charInit1.ar = internal global [4 x [4 x i8]] {{.*}}4 x i8] c"aa\00\00", [4 x i8] c"aa\00\00", [4 x i8] c"aa\00\00", [4 x i8] c"aa\00\00"], align 16
+char charInit1() {
+  static char ar[][4] = {"aa", "aa", "aa", "aa"};
+  return ar[0][0];
+}
+
+// LLVM: define dso_local void @zeroInit
+// LLVM: [[RES:%.*]] = alloca [3 x i32], i64 1
+// LLVM: store [3 x i32] zeroinitializer, ptr [[RES]]
+void zeroInit() {
+  int a[3] = {0, 0, 0};
+}
+
+// LLVM: %[[PTR:.*]] = alloca [4 x [1 x i8]], i64 1, align 1
+// FIXME: OG uses @llvm.memcpy.p0.p0.i64
+// LLVM: void @llvm.memcpy.p0.p0.i32(ptr %[[PTR]], ptr @__const.charInit2.arr, i32 4, i1 false)
+void charInit2() {
+  char arr[4][1] = {"a", "b", "c", "d"};
+}
+
+// LLVM: %[[PTR:.*]] = alloca [4 x [2 x i8]], i64 1, align 1
+// FIXME: OG uses @llvm.memcpy.p0.p0.i64
+// LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[PTR]], ptr @__const.charInit3.arr, i32 8, i1 false)
+void charInit3() {
+  char arr[4][2] = {"ab", "cd", "ef", "gh"};
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/array.cir b/clang/test/CIR/Incubator/Lowering/array.cir
new file mode 100644
index 0000000000000..b6c6938ad4159
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/array.cir
@@ -0,0 +1,35 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering -o -  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+!rec_S = !cir.record<struct "S" {!s32i} #cir.record.decl.ast>
+
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !cir.array<i32 x 10>, !cir.ptr<!cir.array<i32 x 10>>, ["a"] {alignment = 16 : i64}
+    cir.return
+  }
+
+//      MLIR: module {
+// MLIR-NEXT: func @foo()
+// MLIR-NEXT:  %0 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:  %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 16 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:    llvm.return
+// MLIR-NEXT:  }
+// MLIR-NEXT: }
+
+//      LLVM: %1 = alloca [10 x i32], i64 1, align 16
+// LLVM-NEXT: ret void
+
+  cir.global external @arr = #cir.const_array<[#cir.const_record<{#cir.int<1> : !s32i}> : !rec_S, #cir.zero : !rec_S]> : !cir.array<!rec_S x 2>
+  // CHECK: llvm.mlir.global external @arr() {addr_space = 0 : i32} : !llvm.array<2 x struct<"struct.S", (i32)>> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.array<2 x struct<"struct.S", (i32)>>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"struct.S", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"struct.S", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.array<2 x struct<"struct.S", (i32)>>
+  // CHECK:   %5 = cir.llvmir.zeroinit : !llvm.struct<"struct.S", (i32)>
+  // CHECK:   %6 = llvm.insertvalue %5, %4[1] : !llvm.array<2 x struct<"struct.S", (i32)>>
+  // CHECK:   llvm.return %6 : !llvm.array<2 x struct<"struct.S", (i32)>>
+  // CHECK: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/asm.cir b/clang/test/CIR/Incubator/Lowering/asm.cir
new file mode 100644
index 0000000000000..3ba57ac17b2c0
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/asm.cir
@@ -0,0 +1,55 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+
+  cir.func @simple(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+
+    cir.asm(x86_att, 
+      out = [],
+      in = [],
+      in_out = [],
+      {"" "~{dirflag},~{fpsr},~{flags}"}) -> !s32i
+    // CHECK: llvm.inline_asm asm_dialect = att operand_attrs = [] "", "~{dirflag},~{fpsr},~{flags}"  : () -> i32
+        
+    cir.asm(x86_att, 
+      out = [],
+      in = [],
+      in_out = [],
+      {"xyz" "~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [] "xyz", "~{dirflag},~{fpsr},~{flags}" : () -> i32
+
+    cir.asm(x86_att, 
+      out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+      in = [],
+      in_out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+      {"" "=*m,*m,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [{elementtype = i32}, {elementtype = i32}] "", "=*m,*m,~{dirflag},~{fpsr},~{flags}" %1, %1 : (!llvm.ptr, !llvm.ptr) -> i32
+
+    cir.asm(x86_att, 
+      out = [],
+      in = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+      in_out = [],
+      {"" "*m,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i      
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [{elementtype = i32}] "", "*m,~{dirflag},~{fpsr},~{flags}" %1 : (!llvm.ptr) -> i32
+
+    cir.asm(x86_att, 
+      out = [%0 : !cir.ptr<!s32i> (maybe_memory)],
+      in = [],
+      in_out = [],
+      {"" "=*m,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [{elementtype = i32}] "", "=*m,~{dirflag},~{fpsr},~{flags}" %1 : (!llvm.ptr) -> i32
+   
+    cir.asm(x86_att, 
+      out = [],
+      in = [],
+      in_out = [],
+      {"" "=&r,=&r,1,~{dirflag},~{fpsr},~{flags}"}) side_effects -> !s32i
+    // CHECK: llvm.inline_asm has_side_effects asm_dialect = att operand_attrs = [] "", "=&r,=&r,1,~{dirflag},~{fpsr},~{flags}"  : () -> i32
+    cir.return
+  }
+
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/atomic-runtime.cpp b/clang/test/CIR/Incubator/Lowering/atomic-runtime.cpp
new file mode 100644
index 0000000000000..411a08dc5af2a
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/atomic-runtime.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s
+
+// Test __atomic_* built-ins that have a memory order parameter with a runtime
+// value.  This requires generating a switch statement, so the amount of
+// generated code is surprisingly large.
+//
+// This is just a quick smoke test.  Only atomic_load_n is tested.
+
+int runtime_load(int *ptr, int order) {
+  return __atomic_load_n(ptr, order);
+}
+
+// CHECK:   %[[T7:[0-9]+]] = load ptr, ptr %[[T3:[0-9]+]], align 8
+// CHECK:   %[[T8:[0-9]+]] = load i32, ptr %[[T4:[0-9]+]], align 4
+// CHECK:   switch i32 %[[T8]], label %[[L9:[0-9]+]] [
+// CHECK:     i32 1, label %[[L11:[0-9]+]]
+// CHECK:     i32 2, label %[[L11]]
+// CHECK:     i32 5, label %[[L13:[0-9]+]]
+// CHECK:   ]
+// CHECK: [[L9]]:
+// CHECK:   %[[T10:[0-9]+]] = load atomic i32, ptr %[[T7]] monotonic, align 4
+// CHECK:   store i32 %[[T10]], ptr %[[T6:[0-9]+]], align 4
+// CHECK:   br label %[[L15:[0-9]+]]
+// CHECK: [[L11]]:
+// CHECK:   %[[T12:[0-9]+]] = load atomic i32, ptr %[[T7]] acquire, align 4
+// CHECK:   store i32 %[[T12]], ptr %[[T6]], align 4
+// CHECK:   br label %[[L15]]
+// CHECK: [[L13]]:
+// CHECK:   %[[T14:[0-9]+]] = load atomic i32, ptr %[[T7]] seq_cst, align 4
+// CHECK:   store i32 %[[T14]], ptr %[[T6]], align 4
+// CHECK:   br label %[[L15]]
+// CHECK: [[L15]]:
+// CHECK:   %[[T16:[0-9]+]] = load i32, ptr %[[T6]], align 4
+// CHECK:   store i32 %[[T16]], ptr %[[T5:[0-9]+]], align 4
+// CHECK:   %[[T17:[0-9]+]] = load i32, ptr %[[T5]], align 4
+// CHECK:   ret i32 %[[T17]]
diff --git a/clang/test/CIR/Incubator/Lowering/attribute-lowering.cir b/clang/test/CIR/Incubator/Lowering/attribute-lowering.cir
new file mode 100644
index 0000000000000..180ce6367aa70
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/attribute-lowering.cir
@@ -0,0 +1,23 @@
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering -o -  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+!s8i = !cir.int<u, 8>
+!u8i = !cir.int<u, 8>
+
+module {
+  cir.global "private" internal @const_array = #cir.const_array<[#cir.int<1> : !s32i]> : !cir.array<!s32i x 1> {section = ".abc"}
+  // LLVM: @const_array = internal global [1 x i32] [i32 1], section ".abc"
+
+  cir.global "private" internal @const_struct = #cir.const_record<{#cir.int<1> : !s32i}> : !cir.record<struct {!s32i}> {section = ".abc"}
+  // LLVM: @const_struct = internal global { i32 } { i32 1 }, section ".abc"
+
+  cir.func @func_zeroext(%arg0: !u8i {cir.zeroext}) -> (!u8i {cir.zeroext}) {
+    cir.return %arg0 : !u8i
+  }
+  // LLVM: define zeroext i8 @func_zeroext(i8 zeroext %0) 
+
+  cir.func @func_signext(%arg0: !s8i {cir.signext}) -> (!s8i {cir.signext}) {
+    cir.return %arg0 : !s8i
+  }
+  // LLVM: define signext i8 @func_signext(i8 signext %0) 
+}
diff --git a/clang/test/CIR/Incubator/Lowering/binop-bool.cir b/clang/test/CIR/Incubator/Lowering/binop-bool.cir
new file mode 100644
index 0000000000000..7267c407cc0a7
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/binop-bool.cir
@@ -0,0 +1,18 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
+    %3 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
+    %4 = cir.binop(or, %2, %3) : !cir.bool
+    // CHECK: = llvm.or {{.*}}, {{.*}} : i1
+    %5 = cir.binop(xor, %2, %3) : !cir.bool
+    // CHECK: = llvm.xor {{.*}}, {{.*}} : i1
+    %6 = cir.binop(and, %2, %3) : !cir.bool
+    // CHECK: = llvm.and {{.*}}, {{.*}} : i1
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/binop-fp.cir b/clang/test/CIR/Incubator/Lowering/binop-fp.cir
new file mode 100644
index 0000000000000..a2800a847c853
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/binop-fp.cir
@@ -0,0 +1,68 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["c"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["d"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["y", init] {alignment = 4 : i64}
+    %3 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["e"] {alignment = 8 : i64}
+    %4 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["f"] {alignment = 8 : i64}
+    %5 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["g", init] {alignment = 8 : i64}
+    %6 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+    %7 = cir.load %1 : !cir.ptr<!cir.float>, !cir.float
+    %8 = cir.binop(mul, %6, %7) : !cir.float
+    cir.store %8, %2 : !cir.float, !cir.ptr<!cir.float>
+    %9 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %10 = cir.load %1 : !cir.ptr<!cir.float>, !cir.float
+    %11 = cir.binop(div, %9, %10) : !cir.float
+    cir.store %11, %2 : !cir.float, !cir.ptr<!cir.float>
+    %12 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %13 = cir.load %1 : !cir.ptr<!cir.float>, !cir.float
+    %14 = cir.binop(add, %12, %13) : !cir.float
+    cir.store %14, %2 : !cir.float, !cir.ptr<!cir.float>
+    %15 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %16 = cir.load %1 : !cir.ptr<!cir.float>, !cir.float
+    %17 = cir.binop(sub, %15, %16) : !cir.float
+    cir.store %17, %2 : !cir.float, !cir.ptr<!cir.float>
+    %18 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    %19 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    %20 = cir.binop(add, %18, %19) : !cir.double
+    cir.store %20, %5 : !cir.double, !cir.ptr<!cir.double>
+    %21 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    %22 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    %23 = cir.binop(sub, %21, %22) : !cir.double
+    cir.store %23, %5 : !cir.double, !cir.ptr<!cir.double>
+    %24 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    %25 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    %26 = cir.binop(mul, %24, %25) : !cir.double
+    cir.store %26, %5 : !cir.double, !cir.ptr<!cir.double>
+    %27 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    %28 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    %29 = cir.binop(div, %27, %28) : !cir.double
+    cir.store %29, %5 : !cir.double, !cir.ptr<!cir.double>
+    cir.return
+  }
+}
+
+// MLIR: = llvm.alloca {{.*}} f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR: = llvm.alloca {{.*}} f64 {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR: = llvm.fmul {{.*}} : f32
+// MLIR: = llvm.fdiv
+// MLIR: = llvm.fadd
+// MLIR: = llvm.fsub
+// MLIR: = llvm.fadd {{.*}} : f64
+// MLIR: = llvm.fsub
+// MLIR: = llvm.fmul
+// MLIR: = llvm.fdiv
+
+// LLVM: = alloca float, i64
+// LLVM: = alloca double, i64
+// LLVM: = fmul float
+// LLVM: = fdiv float
+// LLVM: = fadd float
+// LLVM: = fsub float
+// LLVM: = fadd double
+// LLVM: = fsub double
+// LLVM: = fmul double
+// LLVM: = fdiv double
diff --git a/clang/test/CIR/Incubator/Lowering/binop-overflow.cir b/clang/test/CIR/Incubator/Lowering/binop-overflow.cir
new file mode 100644
index 0000000000000..6a2ef54c15013
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/binop-overflow.cir
@@ -0,0 +1,63 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering -o -  | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @test_add_u32_u32_u32(%lhs: !u32i, %rhs: !u32i, %res: !cir.ptr<!u32i>) -> !cir.bool {
+    %result, %overflow = cir.binop.overflow(add, %lhs, %rhs) : !u32i, (!u32i, !cir.bool)
+    cir.store %result, %res : !u32i, !cir.ptr<!u32i>
+    cir.return %overflow : !cir.bool
+  }
+
+  //      MLIR: llvm.func @test_add_u32_u32_u32(%[[LHS:.+]]: i32, %[[RHS:.+]]: i32, %[[RES_PTR:.+]]: !llvm.ptr) -> i1
+  // MLIR-NEXT:   %[[#INTRIN_RET:]] = llvm.call_intrinsic "llvm.uadd.with.overflow.i32"(%[[LHS]], %[[RHS]]) : (i32, i32) -> !llvm.struct<(i32, i1)>
+  // MLIR-NEXT:   %[[#RES:]] = llvm.extractvalue %[[#INTRIN_RET]][0] : !llvm.struct<(i32, i1)>
+  // MLIR-NEXT:   %[[#OVFL:]] = llvm.extractvalue %[[#INTRIN_RET]][1] : !llvm.struct<(i32, i1)>
+  // MLIR-NEXT:   llvm.store %[[#RES]], %[[RES_PTR]] {{.*}} : i32, !llvm.ptr
+  // MLIR-NEXT:   llvm.return %[[#OVFL]] : i1
+  // MLIR-NEXT: }
+
+  //      LLVM: define i1 @test_add_u32_u32_u32(i32 %[[#LHS:]], i32 %[[#RHS:]], ptr %[[#RES_PTR:]])
+  // LLVM-NEXT:   %[[#INTRIN_RET:]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %[[#LHS]], i32 %[[#RHS]])
+  // LLVM-NEXT:   %[[#RES:]] = extractvalue { i32, i1 } %[[#INTRIN_RET]], 0
+  // LLVM-NEXT:   %[[#OVFL:]] = extractvalue { i32, i1 } %[[#INTRIN_RET]], 1
+  // LLVM-NEXT:   store i32 %[[#RES]], ptr %[[#RES_PTR]], align 4
+  // LLVM-NEXT:   ret i1 %[[#OVFL]]
+  // LLVM-NEXT: }
+
+  cir.func @test_add_u32_u32_i32(%lhs: !u32i, %rhs: !u32i, %res: !cir.ptr<!s32i>) -> !cir.bool {
+    %result, %overflow = cir.binop.overflow(add, %lhs, %rhs) : !u32i, (!s32i, !cir.bool)
+    cir.store %result, %res : !s32i, !cir.ptr<!s32i>
+    cir.return %overflow : !cir.bool
+  }
+
+  //      MLIR: llvm.func @test_add_u32_u32_i32(%[[LHS:.+]]: i32, %[[RHS:.+]]: i32, %[[RES_PTR:.+]]: !llvm.ptr) -> i1
+  // MLIR-NEXT:   %[[#LHS_EXT:]] = llvm.zext %[[LHS]] : i32 to i33
+  // MLIR-NEXT:   %[[#RHS_EXT:]] = llvm.zext %[[RHS]] : i32 to i33
+  // MLIR-NEXT:   %[[#INTRIN_RET:]] = llvm.call_intrinsic "llvm.sadd.with.overflow.i33"(%[[#LHS_EXT]], %[[#RHS_EXT]]) : (i33, i33) -> !llvm.struct<(i33, i1)>
+  // MLIR-NEXT:   %[[#RES_EXT:]] = llvm.extractvalue %[[#INTRIN_RET]][0] : !llvm.struct<(i33, i1)>
+  // MLIR-NEXT:   %[[#ARITH_OVFL:]] = llvm.extractvalue %[[#INTRIN_RET]][1] : !llvm.struct<(i33, i1)>
+  // MLIR-NEXT:   %[[#RES:]] = llvm.trunc %[[#RES_EXT]] : i33 to i32
+  // MLIR-NEXT:   %[[#RES_EXT_2:]] = llvm.sext %[[#RES]] : i32 to i33
+  // MLIR-NEXT:   %[[#TRUNC_OVFL:]] = llvm.icmp "ne" %[[#RES_EXT_2]], %[[#RES_EXT]] : i33
+  // MLIR-NEXT:   %[[#OVFL:]] = llvm.or %[[#ARITH_OVFL]], %[[#TRUNC_OVFL]]  : i1
+  // MLIR-NEXT:   llvm.store %[[#RES]], %[[RES_PTR]] {{.*}} : i32, !llvm.ptr
+  // MLIR-NEXT:   llvm.return %[[#OVFL]] : i1
+  // MLIR-NEXT: }
+
+  //      LLVM: define i1 @test_add_u32_u32_i32(i32 %[[#LHS:]], i32 %[[#RHS:]], ptr %[[#RES_PTR:]])
+  // LLVM-NEXT:   %[[#LHS_EXT:]] = zext i32 %[[#LHS]] to i33
+  // LLVM-NEXT:   %[[#RHS_EXT:]] = zext i32 %[[#RHS]] to i33
+  // LLVM-NEXT:   %[[#INTRIN_RET:]] = call { i33, i1 } @llvm.sadd.with.overflow.i33(i33 %[[#LHS_EXT]], i33 %[[#RHS_EXT]])
+  // LLVM-NEXT:   %[[#RES_EXT:]] = extractvalue { i33, i1 } %[[#INTRIN_RET]], 0
+  // LLVM-NEXT:   %[[#ARITH_OVFL:]] = extractvalue { i33, i1 } %[[#INTRIN_RET]], 1
+  // LLVM-NEXT:   %[[#RES:]] = trunc i33 %[[#RES_EXT]] to i32
+  // LLVM-NEXT:   %[[#RES_EXT_2:]] = sext i32 %[[#RES]] to i33
+  // LLVM-NEXT:   %[[#TRUNC_OVFL:]] = icmp ne i33 %[[#RES_EXT_2]], %[[#RES_EXT]]
+  // LLVM-NEXT:   %[[#OVFL:]] = or i1 %[[#ARITH_OVFL]], %[[#TRUNC_OVFL]]
+  // LLVM-NEXT:   store i32 %[[#RES]], ptr %[[#RES_PTR]], align 4
+  // LLVM-NEXT:   ret i1 %[[#OVFL]]
+  // LLVM-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/binop-signed-int.cir b/clang/test/CIR/Incubator/Lowering/binop-signed-int.cir
new file mode 100644
index 0000000000000..0aa698098e2e0
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/binop-signed-int.cir
@@ -0,0 +1,76 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %100 = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["vec1", init] {alignment = 8 : i64}
+    %101 = cir.alloca !cir.vector<!s32i x 2>, !cir.ptr<!cir.vector<!s32i x 2>>, ["vec2", init] {alignment = 8 : i64}
+    %3 = cir.const #cir.int<2> : !s32i    cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.const #cir.int<1> : !s32i    cir.store %4, %1 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %7 = cir.binop(mul, %5, %6) : !s32i
+    // CHECK: = llvm.mul
+    cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+    %8 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %9 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %10 = cir.binop(div, %8, %9) : !s32i
+      // CHECK: = llvm.sdiv
+    cir.store %10, %2 : !s32i, !cir.ptr<!s32i>
+    %11 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %12 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %13 = cir.binop(rem, %11, %12) : !s32i
+    // CHECK: = llvm.srem
+    cir.store %13, %2 : !s32i, !cir.ptr<!s32i>
+    %14 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %15 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %16 = cir.binop(add, %14, %15) : !s32i
+    // CHECK: = llvm.add
+    cir.store %16, %2 : !s32i, !cir.ptr<!s32i>
+    %17 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %18 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %19 = cir.binop(sub, %17, %18) : !s32i
+    // CHECK: = llvm.sub
+    cir.store %19, %2 : !s32i, !cir.ptr<!s32i>
+    %20 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %21 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %22 = cir.shift(right, %20 : !s32i, %21 : !s32i) -> !s32i
+    // CHECK: = llvm.ashr
+    cir.store %22, %2 : !s32i, !cir.ptr<!s32i>
+    %23 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %24 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %25 = cir.shift(left, %23 : !s32i, %24 : !s32i) -> !s32i
+    // CHECK: = llvm.shl
+    cir.store %25, %2 : !s32i, !cir.ptr<!s32i>
+    %26 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %27 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %28 = cir.binop(and, %26, %27) : !s32i
+    // CHECK: = llvm.and
+    cir.store %28, %2 : !s32i, !cir.ptr<!s32i>
+    %29 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %30 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %31 = cir.binop(xor, %29, %30) : !s32i
+    // CHECK: = llvm.xor
+    cir.store %31, %2 : !s32i, !cir.ptr<!s32i>
+    %32 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %33 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %34 = cir.binop(or, %32, %33) : !s32i
+    // CHECK: = llvm.or
+    %35 = cir.binop(add, %32, %33) sat: !s32i
+    // CHECK: = llvm.intr.sadd.sat{{.*}}(i32, i32) -> i32
+    %36 = cir.binop(sub, %32, %33) sat: !s32i
+    // CHECK: = llvm.intr.ssub.sat{{.*}}(i32, i32) -> i32 
+    cir.store %34, %2 : !s32i, !cir.ptr<!s32i>
+    %37 = cir.binop(max, %32, %33) : !s32i
+    // CHECK: = llvm.intr.smax
+    %38 = cir.load %100 : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+    %39 = cir.load %101 : !cir.ptr<!cir.vector<!s32i x 2>>, !cir.vector<!s32i x 2>
+    %40 = cir.binop(max, %38, %39) : !cir.vector<!s32i x 2>
+    // CHECK: = llvm.intr.smax({{%.*}}, {{%.*}}) : (vector<2xi32>, vector<2xi32>) -> vector<2xi32>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/binop-unsigned-int.cir b/clang/test/CIR/Incubator/Lowering/binop-unsigned-int.cir
new file mode 100644
index 0000000000000..b783509d06ed0
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/binop-unsigned-int.cir
@@ -0,0 +1,92 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !u32i, !cir.ptr<!u32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    %100 = cir.alloca !cir.vector<!u32i x 2>, !cir.ptr<!cir.vector<!u32i x 2>>, ["vec1", init] {alignment = 8 : i64}
+    %101 = cir.alloca !cir.vector<!u32i x 2>, !cir.ptr<!cir.vector<!u32i x 2>>, ["vec2", init] {alignment = 8 : i64}
+    %3 = cir.const #cir.int<2> : !u32i    cir.store %3, %0 : !u32i, !cir.ptr<!u32i>
+    %4 = cir.const #cir.int<1> : !u32i    cir.store %4, %1 : !u32i, !cir.ptr<!u32i>
+    %5 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %6 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %7 = cir.binop(mul, %5, %6) : !u32i
+    cir.store %7, %2 : !u32i, !cir.ptr<!u32i>
+    %8 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %9 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %10 = cir.binop(div, %8, %9) : !u32i
+    cir.store %10, %2 : !u32i, !cir.ptr<!u32i>
+    %11 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %12 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %13 = cir.binop(rem, %11, %12) : !u32i
+    cir.store %13, %2 : !u32i, !cir.ptr<!u32i>
+    %14 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %15 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %16 = cir.binop(add, %14, %15) : !u32i
+    cir.store %16, %2 : !u32i, !cir.ptr<!u32i>
+    %17 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %18 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %19 = cir.binop(sub, %17, %18) : !u32i
+    cir.store %19, %2 : !u32i, !cir.ptr<!u32i>
+    %20 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %21 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %22 = cir.shift(right, %20 : !u32i, %21 : !u32i) -> !u32i
+    cir.store %22, %2 : !u32i, !cir.ptr<!u32i>
+    %23 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %24 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %25 = cir.shift(left, %23 : !u32i, %24 : !u32i) -> !u32i
+    cir.store %25, %2 : !u32i, !cir.ptr<!u32i>
+    %26 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %27 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %28 = cir.binop(and, %26, %27) : !u32i
+    cir.store %28, %2 : !u32i, !cir.ptr<!u32i>
+    %29 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %30 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %31 = cir.binop(xor, %29, %30) : !u32i
+    cir.store %31, %2 : !u32i, !cir.ptr<!u32i>
+    %32 = cir.load %2 : !cir.ptr<!u32i>, !u32i
+    %33 = cir.load %1 : !cir.ptr<!u32i>, !u32i
+    %34 = cir.binop(or, %32, %33) : !u32i
+    cir.store %34, %2 : !u32i, !cir.ptr<!u32i>
+    %35 = cir.binop(add, %32, %33) sat: !u32i
+    %36 = cir.binop(sub, %32, %33) sat: !u32i  
+    %37 = cir.binop(max, %32, %33) : !u32i
+    %38 = cir.load %100 : !cir.ptr<!cir.vector<!u32i x 2>>, !cir.vector<!u32i x 2>
+    %39 = cir.load %101 : !cir.ptr<!cir.vector<!u32i x 2>>, !cir.vector<!u32i x 2>
+    %40 = cir.binop(max, %38, %39) : !cir.vector<!u32i x 2>
+    cir.return
+  }
+}
+
+// MLIR: = llvm.mul
+// MLIR: = llvm.udiv
+// MLIR: = llvm.urem
+// MLIR: = llvm.add
+// MLIR: = llvm.sub
+// MLIR: = llvm.lshr
+// MLIR: = llvm.shl
+// MLIR: = llvm.and
+// MLIR: = llvm.xor
+// MLIR: = llvm.or
+// MLIR: = llvm.intr.uadd.sat{{.*}}(i32, i32) -> i32
+// MLIR: = llvm.intr.usub.sat{{.*}}(i32, i32) -> i32 
+// MLIR: = llvm.intr.umax
+// MLIR: = llvm.intr.umax
+
+// LLVM: = mul i32
+// LLVM: = udiv i32
+// LLVM: = urem i32
+// LLVM: = add i32
+// LLVM: = sub i32
+// LLVM: = lshr i32
+// LLVM: = shl i32
+// LLVM: = and i32
+// LLVM: = xor i32
+// LLVM: = or i32
+// LLVM: = call i32 @llvm.uadd.sat.i32
+// LLVM: = call i32 @llvm.usub.sat.i32
+// LLVM: = call i32 @llvm.umax.i32
+// LLVM: = call <2 x i32> @llvm.umax.v2i32
diff --git a/clang/test/CIR/Incubator/Lowering/bit.cir b/clang/test/CIR/Incubator/Lowering/bit.cir
new file mode 100644
index 0000000000000..f8e34505f85d4
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/bit.cir
@@ -0,0 +1,189 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u16i = !cir.int<u, 16>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+
+cir.func @clrsb_s32(%arg : !s32i) {
+  %0 = cir.clrsb %arg : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @clrsb_s32(%arg0: i32)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(0 : i32) : i32
+// CHECK-NEXT:   %1 = llvm.icmp "slt" %arg0, %0 : i32
+// CHECK-NEXT:   %2 = llvm.mlir.constant(-1 : i32) : i32
+// CHECK-NEXT:   %3 = llvm.xor %arg0, %2  : i32
+// CHECK-NEXT:   %4 = llvm.select %1, %3, %arg0 : i1, i32
+// CHECK-NEXT:   %5 = "llvm.intr.ctlz"(%4) <{is_zero_poison = false}> : (i32) -> i32
+// CHECK-NEXT:   %6 = llvm.mlir.constant(1 : i32) : i32
+// CHECK-NEXT:   %7 = llvm.sub %5, %6 : i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @clrsb_s64(%arg : !s64i) {
+  %0 = cir.clrsb %arg : !s64i
+  cir.return
+}
+
+//      CHECK: llvm.func @clrsb_s64(%arg0: i64)
+// CHECK-NEXT:   %0 = llvm.mlir.constant(0 : i64) : i64
+// CHECK-NEXT:   %1 = llvm.icmp "slt" %arg0, %0 : i64
+// CHECK-NEXT:   %2 = llvm.mlir.constant(-1 : i64) : i64
+// CHECK-NEXT:   %3 = llvm.xor %arg0, %2  : i64
+// CHECK-NEXT:   %4 = llvm.select %1, %3, %arg0 : i1, i64
+// CHECK-NEXT:   %5 = "llvm.intr.ctlz"(%4) <{is_zero_poison = false}> : (i64) -> i64
+// CHECK-NEXT:   %6 = llvm.mlir.constant(1 : i64) : i64
+// CHECK-NEXT:   %7 = llvm.sub %5, %6 : i64
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @clz_u16(%arg : !u16i) {
+  %0 = cir.clz %arg zero_poison : !u16i
+  cir.return
+}
+
+//      CHECK: llvm.func @clz_u16(%arg0: i16)
+// CHECK-NEXT:   %0 = "llvm.intr.ctlz"(%arg0) <{is_zero_poison = true}> : (i16) -> i16
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @clz_u32(%arg : !u32i) {
+  %0 = cir.clz %arg : !u32i
+  cir.return
+}
+
+//      CHECK: llvm.func @clz_u32(%arg0: i32)
+// CHECK-NEXT:   %0 = "llvm.intr.ctlz"(%arg0) <{is_zero_poison = false}> : (i32) -> i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @clz_u64(%arg : !u64i) {
+  %0 = cir.clz %arg zero_poison : !u64i
+  cir.return
+}
+
+//      CHECK: llvm.func @clz_u64(%arg0: i64)
+// CHECK-NEXT:   %0 = "llvm.intr.ctlz"(%arg0) <{is_zero_poison = true}> : (i64) -> i64
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ctz_u16(%arg : !u16i) {
+  %0 = cir.ctz %arg : !u16i
+  cir.return
+}
+
+//      CHECK: llvm.func @ctz_u16(%arg0: i16)
+// CHECK-NEXT:   %0 = "llvm.intr.cttz"(%arg0) <{is_zero_poison = false}> : (i16) -> i16
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ctz_u32(%arg : !u32i) {
+  %0 = cir.ctz %arg zero_poison : !u32i
+  cir.return
+}
+
+//      CHECK: llvm.func @ctz_u32(%arg0: i32)
+// CHECK-NEXT:   %0 = "llvm.intr.cttz"(%arg0) <{is_zero_poison = true}> : (i32) -> i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ctz_u64(%arg : !u64i) {
+  %0 = cir.ctz %arg : !u64i
+  cir.return
+}
+
+//      CHECK: llvm.func @ctz_u64(%arg0: i64)
+// CHECK-NEXT:   %0 = "llvm.intr.cttz"(%arg0) <{is_zero_poison = false}> : (i64) -> i64
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ffs_s32(%arg : !s32i) {
+  %0 = cir.ffs %arg : !s32i
+  cir.return
+}
+
+//      CHECK: llvm.func @ffs_s32(%arg0: i32)
+// CHECK-NEXT:   %0 = "llvm.intr.cttz"(%arg0) <{is_zero_poison = false}> : (i32) -> i32
+// CHECK-NEXT:   %1 = llvm.mlir.constant(1 : i32) : i32
+// CHECK-NEXT:   %2 = llvm.add %0, %1  : i32
+// CHECK-NEXT:   %3 = llvm.mlir.constant(0 : i32) : i32
+// CHECK-NEXT:   %4 = llvm.icmp "eq" %arg0, %3 : i32
+// CHECK-NEXT:   %5 = llvm.mlir.constant(0 : i32) : i32
+// CHECK-NEXT:   %6 = llvm.select %4, %5, %2 : i1, i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @ffs_s64(%arg : !s64i) {
+  %0 = cir.ffs %arg : !s64i
+  cir.return
+}
+
+//      CHECK: llvm.func @ffs_s64(%arg0: i64)
+// CHECK-NEXT:   %0 = "llvm.intr.cttz"(%arg0) <{is_zero_poison = false}> : (i64) -> i64
+// CHECK-NEXT:   %1 = llvm.mlir.constant(1 : i64) : i64
+// CHECK-NEXT:   %2 = llvm.add %0, %1  : i64
+// CHECK-NEXT:   %3 = llvm.mlir.constant(0 : i64) : i64
+// CHECK-NEXT:   %4 = llvm.icmp "eq" %arg0, %3 : i64
+// CHECK-NEXT:   %5 = llvm.mlir.constant(0 : i64) : i64
+// CHECK-NEXT:   %6 = llvm.select %4, %5, %2 : i1, i64
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @parity_s32(%arg : !u32i) {
+  %0 = cir.parity %arg : !u32i
+  cir.return
+}
+
+//      CHECK: llvm.func @parity_s32(%arg0: i32)
+// CHECK-NEXT:   %0 = llvm.intr.ctpop(%arg0) : (i32) -> i32
+// CHECK-NEXT:   %1 = llvm.mlir.constant(1 : i32) : i32
+// CHECK-NEXT:   %2 = llvm.and %0, %1  : i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @parity_s64(%arg : !u64i) {
+  %0 = cir.parity %arg : !u64i
+  cir.return
+}
+
+//      CHECK: llvm.func @parity_s64(%arg0: i64)
+// CHECK-NEXT:   %0 = llvm.intr.ctpop(%arg0) : (i64) -> i64
+// CHECK-NEXT:   %1 = llvm.mlir.constant(1 : i64) : i64
+// CHECK-NEXT:   %2 = llvm.and %0, %1 : i64
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @popcount_u16(%arg : !u16i) {
+  %0 = cir.popcount %arg : !u16i
+  cir.return
+}
+
+//      CHECK: llvm.func @popcount_u16(%arg0: i16)
+// CHECK-NEXT:   %0 = llvm.intr.ctpop(%arg0) : (i16) -> i16
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @popcount_u32(%arg : !u32i) {
+  %0 = cir.popcount %arg : !u32i
+  cir.return
+}
+
+//      CHECK: llvm.func @popcount_u32(%arg0: i32)
+// CHECK-NEXT:   %0 = llvm.intr.ctpop(%arg0) : (i32) -> i32
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
+
+cir.func @popcount_u64(%arg : !u64i) {
+  %0 = cir.popcount %arg : !u64i
+  cir.return
+}
+
+//      CHECK: llvm.func @popcount_u64(%arg0: i64)
+// CHECK-NEXT:   %0 = llvm.intr.ctpop(%arg0) : (i64) -> i64
+// CHECK-NEXT:   llvm.return
+// CHECK-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/bitfieils.c b/clang/test/CIR/Incubator/Lowering/bitfieils.c
new file mode 100644
index 0000000000000..19de2ab194d41
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/bitfieils.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+typedef struct {
+  int a : 4;
+} B;
+
+// LLVM: define dso_local void @set_signed
+// LLVM:   [[TMP0:%.*]] = load ptr
+// LLVM:   [[TMP1:%.*]] = getelementptr %struct.B, ptr [[TMP0]], i32 0, i32 0
+// LLVM:   [[TMP2:%.*]] = load i8, ptr [[TMP1]]
+// LLVM:   [[TMP3:%.*]] = and i8 [[TMP2]], -16
+// LLVM:   [[TMP4:%.*]] = or i8 [[TMP3]], 14
+// LLVM:   store i8 [[TMP4]], ptr [[TMP1]]
+void set_signed(B* b) {
+  b->a = -2;
+}
+
+// LLVM: define dso_local i32 @get_signed
+// LLVM:   [[TMP0:%.*]] = alloca i32
+// LLVM:   [[TMP1:%.*]] = load ptr
+// LLVM:   [[TMP2:%.*]] = getelementptr %struct.B, ptr [[TMP1]], i32 0, i32 0
+// LLVM:   [[TMP3:%.*]] = load i8, ptr [[TMP2]]
+// LLVM:   [[TMP4:%.*]] = shl i8 [[TMP3]], 4
+// LLVM:   [[TMP5:%.*]] = ashr i8 [[TMP4]], 4
+// LLVM:   [[TMP6:%.*]] = sext i8 [[TMP5]] to i32
+// LLVM:   store i32 [[TMP6]], ptr [[TMP0]]
+// LLVM:   [[TMP7:%.*]] = load i32, ptr [[TMP0]]
+// LLVM:   ret i32 [[TMP7]]
+int get_signed(B* b) {
+  return b->a;
+}
diff --git a/clang/test/CIR/Incubator/Lowering/bitint.cir b/clang/test/CIR/Incubator/Lowering/bitint.cir
new file mode 100644
index 0000000000000..2303b92a6f68a
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/bitint.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @ParamPassing(%arg0: !cir.int<s, 15>, %arg1: !cir.int<s, 31>) -> !cir.int<s, 2> {
+    %0 = cir.cast integral %arg0 : !cir.int<s, 15> -> !s32i
+    %1 = cir.cast integral %arg1 : !cir.int<s, 31> -> !s32i
+    %2 = cir.binop(add, %0, %1) : !s32i
+    %3 = cir.cast integral %2 : !s32i -> !cir.int<s, 2>
+    cir.return %3 : !cir.int<s, 2>
+  }
+}
+
+//      MLIR: llvm.func @ParamPassing(%arg0: i15, %arg1: i31) -> i2
+// MLIR-NEXT:   %0 = llvm.sext %arg0 : i15 to i32
+// MLIR-NEXT:   %1 = llvm.sext %arg1 : i31 to i32
+// MLIR-NEXT:   %2 = llvm.add %0, %1  : i32
+// MLIR-NEXT:   %3 = llvm.trunc %2 : i32 to i2
+// MLIR-NEXT:   llvm.return %3 : i2
+// MLIR-NEXT: }
+
+//      LLVM: define i2 @ParamPassing(i15 %0, i31 %1) !dbg !3 {
+// LLVM-NEXT:   %3 = sext i15 %0 to i32
+// LLVM-NEXT:   %4 = sext i31 %1 to i32
+// LLVM-NEXT:   %5 = add i32 %3, %4
+// LLVM-NEXT:   %6 = trunc i32 %5 to i2
+// LLVM-NEXT:   ret i2 %6
+// LLVM-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/bool-to-int.cir b/clang/test/CIR/Incubator/Lowering/bool-to-int.cir
new file mode 100644
index 0000000000000..c9b46715422e8
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/bool-to-int.cir
@@ -0,0 +1,21 @@
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+
+module {
+  cir.func @foo(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %1 = cir.const #true
+    %2 = cir.cast bool_to_int %1 : !cir.bool -> !s32i
+    cir.return %2 : !s32i
+  }
+  cir.func @bar(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %1 = cir.const #false
+    %2 = cir.cast bool_to_int %1 : !cir.bool -> !s32i
+    cir.return %2 : !s32i
+  }
+}
+
+// CHECK: ret i32 1
+// CHECK: ret i32 0
diff --git a/clang/test/CIR/Incubator/Lowering/bool.cir b/clang/test/CIR/Incubator/Lowering/bool.cir
new file mode 100644
index 0000000000000..848b552f897ab
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/bool.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+
+module {
+  cir.global external @g_bl = #false
+// MLIR: llvm.mlir.global external @g_bl(false) {addr_space = 0 : i32} : i8
+// LLVM: @g_bl = global i8 0
+
+  cir.func @foo() {
+    %1 = cir.const #true
+    %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["a", init] {alignment = 1 : i64}
+    cir.store %1, %0 : !cir.bool, !cir.ptr<!cir.bool>
+    cir.return
+  }
+//      MLIR: llvm.func @foo()
+//  MLIR-DAG: %[[TRUE:.*]] = llvm.mlir.constant(true) : i1
+//  MLIR-DAG: %[[VALUE:.*]] = llvm.mlir.constant(1 : index) : i64
+//  MLIR-DAG: %[[ADDR:.*]] = llvm.alloca %[[VALUE]] x i8 {alignment = 1 : i64} : (i64) -> !llvm.ptr
+//  MLIR-DAG: %[[TRUE_EXT:.*]] = llvm.zext %[[TRUE]] : i1 to i8
+//  MLIR-DAG: llvm.store %[[TRUE_EXT]], %[[ADDR]] {{.*}} : i8, !llvm.ptr
+// MLIR-NEXT: llvm.return
+
+//      LLVM: define void @foo()
+// LLVM-NEXT:   %1 = alloca i8, i64 1, align 1
+// LLVM-NEXT:   store i8 1, ptr %1, align 1
+// LLVM-NEXT:   ret void
+}
diff --git a/clang/test/CIR/Incubator/Lowering/branch.cir b/clang/test/CIR/Incubator/Lowering/branch.cir
new file mode 100644
index 0000000000000..0daea329f4b8f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/branch.cir
@@ -0,0 +1,35 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+cir.func @foo(%arg0: !cir.bool) -> !s32i {
+  cir.brcond %arg0 ^bb1, ^bb2
+  ^bb1:
+    %0 = cir.const #cir.int<1>: !s32i
+    cir.return %0 : !s32i
+  ^bb2:
+    %1 = cir.const #cir.int<0>: !s32i
+    cir.return %1 : !s32i
+}
+
+//      MLIR: module {
+// MLIR-NEXT:   llvm.func @foo(%arg0: i1) -> i32
+// MLIR-NEXT:     llvm.cond_br %arg0, ^bb1, ^bb2
+// MLIR-NEXT:   ^bb1:  // pred: ^bb0
+// MLIR-NEXT:     %0 = llvm.mlir.constant(1 : i32) : i32
+// MLIR-NEXT:     llvm.return %0 : i32
+// MLIR-NEXT:   ^bb2:  // pred: ^bb0
+// MLIR-NEXT:     %1 = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:     llvm.return %1 : i32
+// MLIR-NEXT:   }
+// MLIR-NEXT: }
+
+//       LLVM: define i32 @foo(i1 %0)
+//  LLVM-NEXT:   br i1 %0, label %2, label %3
+// LLVM-EMPTY:
+//  LLVM-NEXT: 2:                                                ; preds = %1
+//  LLVM-NEXT:   ret i32 1
+// LLVM-EMPTY:
+//  LLVM-NEXT: 3:                                                ; preds = %1
+//  LLVM-NEXT:   ret i32 0
+//  LLVM-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/brcond.cir b/clang/test/CIR/Incubator/Lowering/brcond.cir
new file mode 100644
index 0000000000000..23294baed11b6
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/brcond.cir
@@ -0,0 +1,43 @@
+// RUN: cir-opt %s -cir-to-llvm | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+#fn_attr = #cir<extra({nothrow = #cir.nothrow})>
+module { 
+  cir.func no_inline no_proto optnone @test() -> !cir.bool extra(#fn_attr) {
+    %0 = cir.const #cir.int<0> : !s32i
+    %1 = cir.cast int_to_bool %0 : !s32i -> !cir.bool
+    cir.br ^bb1
+  ^bb1:
+    cir.brcond %1 ^bb2, ^bb3
+  ^bb2:
+    cir.return %1 : !cir.bool
+  ^bb3:
+    cir.br ^bb4
+  ^bb4:
+    cir.return %1 : !cir.bool
+  }
+}
+
+// MLIR:         {{.*}} = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:    {{.*}} = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:    {{.*}} = llvm.icmp "ne" {{.*}}, {{.*}} : i32
+// MLIR-NEXT:    llvm.br ^bb1
+// MLIR-NEXT:  ^bb1:
+// MLIR-NEXT:    llvm.cond_br {{.*}}, ^bb2, ^bb3
+// MLIR-NEXT:  ^bb2:
+// MLIR-NEXT:    llvm.return {{.*}} : i1
+// MLIR-NEXT:  ^bb3:
+// MLIR-NEXT:    llvm.br ^bb4
+// MLIR-NEXT:  ^bb4:
+// MLIR-NEXT:    llvm.return {{.*}} : i1
+
+// LLVM: br label {{.*}}
+// LLVM: 1:
+// LLVM: br i1 false, label {{.*}}, label {{.*}}
+// LLVM: 2:
+// LLVM:  ret i1 false
+// LLVM: 3:
+// LLVM:  br label {{.*}}
+// LLVM: 4:
+// LLVM:  ret i1 false
diff --git a/clang/test/CIR/Incubator/Lowering/bswap.cir b/clang/test/CIR/Incubator/Lowering/bswap.cir
new file mode 100644
index 0000000000000..b3081ea273827
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/bswap.cir
@@ -0,0 +1,19 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+
+cir.func @test(%arg0: !u32i) -> !u32i {
+  %0 = cir.byte_swap %arg0 : !u32i
+  cir.return %0 : !u32i
+}
+
+//      MLIR: llvm.func @test(%arg0: i32) -> i32
+// MLIR-NEXT:   %0 = llvm.intr.bswap(%arg0) : (i32) -> i32
+// MLIR-NEXT:   llvm.return %0 : i32
+// MLIR-NEXT: }
+
+//      LLVM: define i32 @test(i32 %0)
+// LLVM-NEXT:   %2 = call i32 @llvm.bswap.i32(i32 %0)
+// LLVM-NEXT:   ret i32 %2
+// LLVM-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/builtin-binary-fp2fp.c b/clang/test/CIR/Incubator/Lowering/builtin-binary-fp2fp.c
new file mode 100644
index 0000000000000..2877aa5cef30c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/builtin-binary-fp2fp.c
@@ -0,0 +1,194 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fmath-errno -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -ffast-math -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM-FASTMATH
+
+// copysign
+
+float my_copysignf(float x, float y) {
+  return __builtin_copysignf(x, y);
+}
+
+// LLVM: define dso_local float @my_copysignf
+// LLVM:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_copysignf
+// LLVM-FASTMATH:   %{{.+}} = call float @llvm.copysign.f32(float %{{.+}}, float %{{.+}})
+// LLVM-FASTMATH: }
+
+double my_copysign(double x, double y) {
+  return __builtin_copysign(x, y);
+}
+
+// LLVM: define dso_local double @my_copysign
+// LLVM:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_copysign
+// LLVM-FASTMATH:   %{{.+}} = call double @llvm.copysign.f64(double %{{.+}}, double %{{.+}})
+// LLVM-FASTMATH: }
+
+long double my_copysignl(long double x, long double y) {
+  return __builtin_copysignl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_copysignl
+// LLVM:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_copysignl
+// LLVM-FASTMATH:   %{{.+}} = call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM-FASTMATH: }
+
+// fmax
+
+float my_fmaxf(float x, float y) {
+  return __builtin_fmaxf(x, y);
+}
+
+// LLVM: define dso_local float @my_fmaxf
+// LLVM:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_fmaxf
+// LLVM-FASTMATH:   %{{.+}} = call float @llvm.maxnum.f32(float %{{.+}}, float %{{.+}})
+// LLVM-FASTMATH: }
+
+double my_fmax(double x, double y) {
+  return __builtin_fmax(x, y);
+}
+
+// LLVM: define dso_local double @my_fmax
+// LLVM:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_fmax
+// LLVM-FASTMATH:   %{{.+}} = call double @llvm.maxnum.f64(double %{{.+}}, double %{{.+}})
+// LLVM-FASTMATH: }
+
+long double my_fmaxl(long double x, long double y) {
+  return __builtin_fmaxl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_fmaxl
+// LLVM:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_fmaxl
+// LLVM-FASTMATH:   %{{.+}} = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM-FASTMATH: }
+
+// fmin
+
+float my_fminf(float x, float y) {
+  return __builtin_fminf(x, y);
+}
+
+// LLVM: define dso_local float @my_fminf
+// LLVM:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_fminf
+// LLVM-FASTMATH:   %{{.+}} = call float @llvm.minnum.f32(float %{{.+}}, float %{{.+}})
+// LLVM-FASTMATH: }
+
+double my_fmin(double x, double y) {
+  return __builtin_fmin(x, y);
+}
+
+// LLVM: define dso_local double @my_fmin
+// LLVM:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_fmin
+// LLVM-FASTMATH:   %{{.+}} = call double @llvm.minnum.f64(double %{{.+}}, double %{{.+}})
+// LLVM-FASTMATH: }
+
+long double my_fminl(long double x, long double y) {
+  return __builtin_fminl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_fminl
+// LLVM:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_fminl
+// LLVM-FASTMATH:   %{{.+}} = call x86_fp80 @llvm.minnum.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM-FASTMATH: }
+
+// fmod
+
+float my_fmodf(float x, float y) {
+  return __builtin_fmodf(x, y);
+}
+
+// LLVM: define dso_local float @my_fmodf
+// LLVM:   %{{.+}} = call float @fmodf(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_fmodf
+// LLVM-FASTMATH:   %{{.+}} = frem float %{{.+}}, %{{.+}}
+// LLVM-FASTMATH: }
+
+double my_fmod(double x, double y) {
+  return __builtin_fmod(x, y);
+}
+
+// LLVM: define dso_local double @my_fmod
+// LLVM:   %{{.+}} = call double @fmod(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_fmod
+// LLVM-FASTMATH:   %{{.+}} = frem double %{{.+}}, %{{.+}}
+// LLVM-FASTMATH: }
+
+long double my_fmodl(long double x, long double y) {
+  return __builtin_fmodl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_fmodl
+// LLVM:   %{{.+}} = call x86_fp80 @fmodl(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_fmodl
+// LLVM-FASTMATH:   %{{.+}} = frem x86_fp80 %{{.+}}, %{{.+}}
+// LLVM-FASTMATH: }
+
+// pow
+
+float my_powf(float x, float y) {
+  return __builtin_powf(x, y);
+}
+
+// LLVM: define dso_local float @my_powf
+// LLVM:   %{{.+}} = call float @powf(float %{{.+}}, float %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local float @my_powf
+// LLVM-FASTMATH:   %{{.+}} = call float @llvm.pow.f32(float %{{.+}}, float %{{.+}})
+// LLVM-FASTMATH: }
+
+double my_pow(double x, double y) {
+  return __builtin_pow(x, y);
+}
+
+// LLVM: define dso_local double @my_pow
+// LLVM:   %{{.+}} = call double @pow(double %{{.+}}, double %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local double @my_pow
+// LLVM-FASTMATH:   %{{.+}} = call double @llvm.pow.f64(double %{{.+}}, double %{{.+}})
+// LLVM-FASTMATH: }
+
+long double my_powl(long double x, long double y) {
+  return __builtin_powl(x, y);
+}
+
+// LLVM: define dso_local x86_fp80 @my_powl
+// LLVM:   %{{.+}} = call x86_fp80 @powl(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM: }
+
+// LLVM-FASTMATH: define dso_local x86_fp80 @my_powl
+// LLVM-FASTMATH:   %{{.+}} = call x86_fp80 @llvm.pow.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}})
+// LLVM-FASTMATH: }
diff --git a/clang/test/CIR/Incubator/Lowering/builtin-floating-point.cir b/clang/test/CIR/Incubator/Lowering/builtin-floating-point.cir
new file mode 100644
index 0000000000000..211a6f2a1790e
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/builtin-floating-point.cir
@@ -0,0 +1,197 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s
+
+module {
+  cir.func @test(%arg0 : !cir.float, %arg1 : !cir.vector<!cir.double x 2>, %arg2 : !cir.vector<!cir.float x 4>) {
+    %1 = cir.cos %arg0 : !cir.float
+    // CHECK: llvm.intr.cos(%arg0) : (f32) -> f32
+    
+    %101 = cir.cos %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.cos(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %201 = cir.cos %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.cos(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+    
+    %2 = cir.ceil %arg0 : !cir.float
+    // CHECK: llvm.intr.ceil(%arg0) : (f32) -> f32
+
+    %102 = cir.ceil %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.ceil(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %202 = cir.ceil %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.ceil(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %3 = cir.exp %arg0 : !cir.float
+    // CHECK: llvm.intr.exp(%arg0) : (f32) -> f32
+
+    %103 = cir.exp %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.exp(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %203 = cir.exp %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.exp(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %4 = cir.exp2 %arg0 : !cir.float
+    // CHECK: llvm.intr.exp2(%arg0) : (f32) -> f32
+
+    %104 = cir.exp2 %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.exp2(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %204 = cir.exp2 %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.exp2(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %5 = cir.fabs %arg0 : !cir.float
+    // CHECK: llvm.intr.fabs(%arg0) : (f32) -> f32
+
+    %105 = cir.fabs %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.fabs(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %205 = cir.fabs %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.fabs(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %6 = cir.floor %arg0 : !cir.float
+    // CHECK: llvm.intr.floor(%arg0) : (f32) -> f32
+
+    %106 = cir.floor %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.floor(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %206 = cir.floor %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.floor(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %7 = cir.log %arg0 : !cir.float
+    // CHECK: llvm.intr.log(%arg0) : (f32) -> f32
+
+    %107 = cir.log %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.log(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %207 = cir.log %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.log(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %8 = cir.log10 %arg0 : !cir.float
+    // CHECK: llvm.intr.log10(%arg0) : (f32) -> f32
+
+    %108 = cir.log10 %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.log10(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %208 = cir.log10 %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.log10(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %9 = cir.log2 %arg0 : !cir.float
+    // CHECK: llvm.intr.log2(%arg0) : (f32) -> f32
+
+    %109 = cir.log2 %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.log2(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %209 = cir.log2 %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.log2(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %10 = cir.nearbyint %arg0 : !cir.float
+    // CHECK: llvm.intr.nearbyint(%arg0) : (f32) -> f32
+
+    %110 = cir.nearbyint %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.nearbyint(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %210 = cir.nearbyint %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.nearbyint(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %11 = cir.rint %arg0 : !cir.float
+    // CHECK: llvm.intr.rint(%arg0) : (f32) -> f32
+
+    %111 = cir.rint %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.rint(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %211 = cir.rint %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.rint(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %12 = cir.round %arg0 : !cir.float
+    // CHECK: llvm.intr.round(%arg0) : (f32) -> f32
+
+    %112 = cir.round %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.round(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %212 = cir.round %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.round(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %13 = cir.sin %arg0 : !cir.float
+    // CHECK: llvm.intr.sin(%arg0) : (f32) -> f32
+
+    %113 = cir.sin %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.sin(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %213 = cir.sin %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.sin(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %14 = cir.sqrt %arg0 : !cir.float
+    // CHECK: llvm.intr.sqrt(%arg0) : (f32) -> f32
+
+    %114 = cir.sqrt %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.sqrt(%arg1) : (vector<2xf64>) -> vector<2xf64>
+
+    %214 = cir.sqrt %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.sqrt(%arg2)  : (vector<4xf32>) -> vector<4xf32>
+
+    %15 = cir.copysign %arg0, %arg0 : !cir.float
+    // CHECK: llvm.intr.copysign(%arg0, %arg0) : (f32, f32) -> f32
+
+    %115 = cir.copysign %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.copysign(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+
+    %215 = cir.copysign %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.copysign(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+
+    %16 = cir.fmaxnum %arg0, %arg0 : !cir.float
+    // CHECK: llvm.intr.maxnum(%arg0, %arg0) : (f32, f32) -> f32
+
+    %116 = cir.fmaxnum %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.maxnum(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+
+    %216 = cir.fmaxnum %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.maxnum(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+
+    %17 = cir.fminnum %arg0, %arg0 : !cir.float
+    // CHECK: llvm.intr.minnum(%arg0, %arg0) : (f32, f32) -> f32
+
+    %117 = cir.fminnum %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.minnum(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+
+    %217 = cir.fminnum %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.minnum(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+
+    %18 = cir.fmod %arg0, %arg0 : !cir.float
+    // CHECK: llvm.frem %arg0, %arg0 : f32
+
+    %118 = cir.fmod %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.frem %arg1, %arg1 : vector<2xf64>
+
+    %218 = cir.fmod %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.frem %arg2, %arg2 : vector<4xf32>
+
+    %19 = cir.pow %arg0, %arg0 : !cir.float
+    // CHECK: llvm.intr.pow(%arg0, %arg0) : (f32, f32) -> f32
+
+    %119 = cir.pow %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.pow(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+
+    %219 = cir.pow %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.pow(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+
+    %20 = cir.fmaximum %arg0, %arg0 : !cir.float
+    // CHECK: llvm.intr.maximum(%arg0, %arg0) : (f32, f32) -> f32
+
+    %120 = cir.fmaximum %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.maximum(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+
+    %220 = cir.fmaximum %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.maximum(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+
+    %21 = cir.fminimum %arg0, %arg0 : !cir.float
+    // CHECK: llvm.intr.minimum(%arg0, %arg0) : (f32, f32) -> f32
+
+    %121 = cir.fminimum %arg1, %arg1 : !cir.vector<!cir.double x 2>
+    // CHECK: llvm.intr.minimum(%arg1, %arg1) : (vector<2xf64>, vector<2xf64>) -> vector<2xf64>
+
+    %221 = cir.fminimum %arg2, %arg2 : !cir.vector<!cir.float x 4>
+    // CHECK: llvm.intr.minimum(%arg2, %arg2) : (vector<4xf32>, vector<4xf32>) -> vector<4xf32>
+
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/builtin-isfpclass.c b/clang/test/CIR/Incubator/Lowering/builtin-isfpclass.c
new file mode 100644
index 0000000000000..630ded117ab35
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/builtin-isfpclass.c
@@ -0,0 +1,125 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s
+
+int finite(double);
+
+// CHECK: define {{.*}}@test_is_finite
+void test_is_finite(__fp16 *H, float F, double D, long double LD) {
+    volatile int res;
+    res = __builtin_isinf(*H);
+    // CHECK: call i1 @llvm.is.fpclass.f16(half %{{.*}}, i32 516)
+    res = __builtin_isinf(F);
+    // CHECK: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 516)
+    res = __builtin_isinf(D);
+    // CHECK: call i1 @llvm.is.fpclass.f64(double %{{.*}}, i32 516)
+    res = __builtin_isinf(LD);
+    // CHECK: call i1 @llvm.is.fpclass.f80(x86_fp80 %{{.*}}, i32 516)
+
+    res = __builtin_isfinite(*H);
+    // CHECK: call i1 @llvm.is.fpclass.f16(half %{{.*}}, i32 504)
+    res = __builtin_isfinite(F);
+    // CHECK: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 504)
+    res = finite(D);
+    // CHECK: call i1 @llvm.is.fpclass.f64(double %{{.*}}, i32 504)
+
+    res = __builtin_isnormal(*H);
+    // CHECK: call i1 @llvm.is.fpclass.f16(half %{{.*}}, i32 264)
+    res = __builtin_isnormal(F);
+    // CHECK: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 264)
+
+    res = __builtin_issubnormal(F);
+    // CHECK: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 144)
+    res = __builtin_iszero(F);
+    // CHECK: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 96)
+    res = __builtin_issignaling(F);
+    // CHECK: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 1)
+}
+
+_Bool check_isfpclass_finite(float x) {
+  return __builtin_isfpclass(x, 504 /*Finite*/);
+}
+
+// CHECK: define {{.*}}@check_isfpclass_finite
+// CHECK: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 504)
+
+_Bool check_isfpclass_nan_f32(float x) {
+  return __builtin_isfpclass(x, 3 /*NaN*/);
+}
+
+// CHECK: define {{.*}}@check_isfpclass_nan_f32
+// CHECK: call i1 @llvm.is.fpclass.f32(float %{{.*}}, i32 3)
+
+_Bool check_isfpclass_snan_f64(double x) {
+  return __builtin_isfpclass(x, 1 /*SNaN*/);
+}
+
+// CHECK: define {{.*}}@check_isfpclass_snan_f64
+// CHECK: call i1 @llvm.is.fpclass.f64(double %{{.*}}, i32 1)
+
+
+_Bool check_isfpclass_zero_f16(_Float16 x) {
+  return __builtin_isfpclass(x, 96 /*Zero*/);
+}
+
+// CHECK: define {{.*}}@check_isfpclass_zero_f16
+// CHECK: call i1 @llvm.is.fpclass.f16(half %{{.*}}, i32 96)
+
+// Update when we support FP pragma in functions.
+
+// _Bool check_isfpclass_finite_strict(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 504 /*Finite*/);
+// }
+// 
+// _Bool check_isfpclass_nan_f32_strict(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 3 /*NaN*/);
+// }
+// 
+// _Bool check_isfpclass_snan_f64_strict(double x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 1 /*NaN*/);
+// }
+// 
+// _Bool check_isfpclass_zero_f16_strict(_Float16 x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 96 /*Zero*/);
+// }
+// 
+// _Bool check_isnan(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isnan(x);
+// }
+// 
+// _Bool check_isinf(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isinf(x);
+// }
+// 
+// _Bool check_isfinite(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfinite(x);
+// }
+// 
+// _Bool check_isnormal(float x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isnormal(x);
+// }
+// 
+// typedef float __attribute__((ext_vector_type(4))) float4;
+// typedef double __attribute__((ext_vector_type(4))) double4;
+// typedef int __attribute__((ext_vector_type(4))) int4;
+// typedef long __attribute__((ext_vector_type(4))) long4;
+// 
+// int4 check_isfpclass_nan_v4f32(float4 x) {
+//   return __builtin_isfpclass(x, 3 /*NaN*/);
+// }
+// 
+// int4 check_isfpclass_nan_strict_v4f32(float4 x) {
+// #pragma STDC FENV_ACCESS ON
+//   return __builtin_isfpclass(x, 3 /*NaN*/);
+// }
+// 
+// long4 check_isfpclass_nan_v4f64(double4 x) {
+//   return __builtin_isfpclass(x, 3 /*NaN*/);
+// }
diff --git a/clang/test/CIR/Incubator/Lowering/call-op-call-conv.cir b/clang/test/CIR/Incubator/Lowering/call-op-call-conv.cir
new file mode 100644
index 0000000000000..9bd6e0a1be2a3
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/call-op-call-conv.cir
@@ -0,0 +1,19 @@
+// RUN: cir-translate -cir-to-llvmir --disable-cc-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+!fnptr = !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+
+module {
+  cir.func private @my_add(%a: !s32i, %b: !s32i) -> !s32i cc(spir_function)
+
+  cir.func @ind(%fnptr: !fnptr, %a : !s32i) {
+    %1 = cir.call %fnptr(%a) : (!fnptr, !s32i) -> !s32i cc(spir_function)
+    // LLVM: %{{[0-9]+}} = call spir_func i32 %{{[0-9]+}}(i32 %{{[0-9]+}})
+
+    %2 = cir.call @my_add(%1, %1) : (!s32i, !s32i) -> !s32i cc(spir_function)
+    // LLVM: %{{[0-9]+}} = call spir_func i32 @my_add(i32 %{{[0-9]+}}, i32 %{{[0-9]+}})
+
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/call.cir b/clang/test/CIR/Incubator/Lowering/call.cir
new file mode 100644
index 0000000000000..bcd8a37aabfed
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/call.cir
@@ -0,0 +1,121 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @a() {
+    cir.return
+  }
+  cir.func @d() {
+    cir.call @a() : () -> ()
+    cir.return
+  }
+
+//      MLIR: llvm.func @a()
+// MLIR-NEXT:   llvm.return
+// MLIR-NEXT: }
+// MLIR-NEXT: llvm.func @d()
+// MLIR-NEXT:   llvm.call @a() : () -> ()
+// MLIR-NEXT:   llvm.return
+// MLIR-NEXT: }
+
+//      LLVM: define void @a()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+//      LLVM: define void @d()
+// LLVM-NEXT:   call void @a()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+
+  // check operands and results type lowering
+  cir.func @callee(!cir.ptr<i32>) -> !cir.ptr<i32> attributes {sym_visibility = "private"}
+  // MLIR: llvm.func @callee(!llvm.ptr) -> !llvm.ptr
+  cir.func @caller(%arg0: !cir.ptr<i32>) -> !cir.ptr<i32> {
+  // MLIR: llvm.func @caller(%arg0: !llvm.ptr) -> !llvm.ptr
+    %0 = cir.call @callee(%arg0) : (!cir.ptr<i32>) -> !cir.ptr<i32>
+    // MLIR: %{{[0-9]+}} = llvm.call @callee(%arg0) : (!llvm.ptr) -> !llvm.ptr
+    cir.return %0 : !cir.ptr<i32>
+  }
+
+  // check indirect call lowering
+  cir.global "private" external @fp : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  cir.func @callIndirect(%arg: !s32i) -> !s32i {
+    %fpp = cir.get_global @fp : !cir.ptr<!cir.ptr<!cir.func<(!s32i) -> !s32i>>>
+    %fp = cir.load %fpp : !cir.ptr<!cir.ptr<!cir.func<(!s32i) -> !s32i>>>, !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+    %retval = cir.call %fp(%arg) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+    cir.return %retval : !s32i
+  }
+
+  // MLIR:      llvm.mlir.global external @fp() {addr_space = 0 : i32} : !llvm.ptr
+  // MLIR:      llvm.func @callIndirect(%arg0: i32) -> i32
+  // MLIR-NEXT:   %0 = llvm.mlir.addressof @fp : !llvm.ptr
+  // MLIR-NEXT:   %1 = llvm.load %0 {{.*}} : !llvm.ptr -> !llvm.ptr
+  // MLIR-NEXT:   %2 = llvm.call %1(%arg0) : !llvm.ptr, (i32) -> i32
+  // MLIR-NEXT:   llvm.return %2 : i32
+
+  // LLVM:      define i32 @callIndirect(i32 %0)
+  // LLVM-NEXT:   %2 = load ptr, ptr @fp
+  // LLVM-NEXT:   %3 = call i32 %2(i32 %0)
+  // LLVM-NEXT:   ret i32 %3
+
+  // check direct vararg call lowering
+  cir.func private @varargCallee(!s32i, ...) -> !s32i
+  cir.func @varargCaller() -> !s32i {
+    %zero = cir.const #cir.int<0> : !s32i
+    %retval = cir.call @varargCallee(%zero, %zero) : (!s32i, !s32i) -> !s32i
+    cir.return %retval : !s32i
+  }
+
+  // MLIR:      llvm.func @varargCallee(i32, ...) -> i32
+  // MLIR:      llvm.func @varargCaller() -> i32
+  // MLIR-NEXT:   %0 = llvm.mlir.constant(0 : i32) : i32
+  // MLIR-NEXT:   %1 = llvm.call @varargCallee(%0, %0) vararg(!llvm.func<i32 (i32, ...)>) : (i32, i32) -> i32
+  // MLIR-NEXT:   llvm.return %1 : i32
+
+  // LLVM:      define i32 @varargCaller()
+  // LLVM-NEXT:   %1 = call i32 (i32, ...) @varargCallee(i32 0, i32 0)
+  // LLVM-NEXT:   ret i32 %1
+
+  // check indirect vararg call lowering
+  cir.global "private" external @varargfp : !cir.ptr<!cir.func<(!s32i, ...) -> !s32i>>
+  cir.func @varargCallIndirect() -> !s32i {
+    %fpp = cir.get_global @varargfp : !cir.ptr<!cir.ptr<!cir.func<(!s32i, ...) -> !s32i>>>
+    %fp = cir.load %fpp : !cir.ptr<!cir.ptr<!cir.func<(!s32i, ...) -> !s32i>>>, !cir.ptr<!cir.func<(!s32i, ...) -> !s32i>>
+    %zero = cir.const #cir.int<0> : !s32i
+    %retval = cir.call %fp(%zero, %zero) : (!cir.ptr<!cir.func<(!s32i, ...) -> !s32i>>, !s32i, !s32i) -> !s32i
+    cir.return %retval : !s32i
+  }
+
+  // MLIR:      llvm.mlir.global external @varargfp() {addr_space = 0 : i32} : !llvm.ptr
+  // MLIR:      llvm.func @varargCallIndirect() -> i32
+  // MLIR-NEXT:   %0 = llvm.mlir.addressof @varargfp : !llvm.ptr
+  // MLIR-NEXT:   %1 = llvm.load %0 {{.*}} : !llvm.ptr -> !llvm.ptr
+  // MLIR-NEXT:   %2 = llvm.mlir.constant(0 : i32) : i32
+  // MLIR-NEXT:   %3 = llvm.call %1(%2, %2) vararg(!llvm.func<i32 (i32, ...)>) : !llvm.ptr, (i32, i32) -> i32
+  // MLIR-NEXT:   llvm.return %3 : i32
+
+  // LLVM:      define i32 @varargCallIndirect()
+  // LLVM-NEXT:   %1 = load ptr, ptr @varargfp
+  // LLVM-NEXT:   %2 = call i32 (i32, ...) %1(i32 0, i32 0)
+  // LLVM-NEXT:   ret i32 %2
+
+  cir.func private @add(%arg0: !s32i, %arg1: !s32i) -> !s32i
+
+  cir.func @call_with_side_effect() {
+    %0 = cir.const #cir.int<0> : !s32i
+    %1 = cir.const #cir.int<1> : !s32i
+    %2 = cir.call @add(%0, %1) : (!s32i, !s32i) -> !s32i side_effect(all)
+    %3 = cir.call @add(%0, %1) : (!s32i, !s32i) -> !s32i side_effect(pure)
+    %4 = cir.call @add(%0, %1) : (!s32i, !s32i) -> !s32i side_effect(const)
+    cir.return
+  }
+
+  // LLVM: @call_with_side_effect
+  // LLVM:   %{{.+}} = call i32 @add(i32 0, i32 1)
+  // LLVM:   %{{.+}} = call i32 @add(i32 0, i32 1) #[[#pure:]]
+  // LLVM:   %{{.+}} = call i32 @add(i32 0, i32 1) #[[#const:]]
+  // LLVM: }
+  // LLVM: attributes #[[#pure]] = { nounwind willreturn memory(read, errnomem: none) }
+  // LLVM: attributes #[[#const]] = { nounwind willreturn memory(none) }
+
+} // end module
diff --git a/clang/test/CIR/Incubator/Lowering/cast.cir b/clang/test/CIR/Incubator/Lowering/cast.cir
new file mode 100644
index 0000000000000..68f5a6d7180c0
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/cast.cir
@@ -0,0 +1,111 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+!u64i = !cir.int<u, 64>
+
+module {
+  cir.func @cStyleCasts(%arg0: !u32i, %arg1: !s32i, %arg2: !cir.float, %arg3: !cir.double) -> !s32i {
+  // CHECK: llvm.func @cStyleCasts
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x1", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x2", init] {alignment = 4 : i64}
+    %20 = cir.alloca !s16i, !cir.ptr<!s16i>, ["x4", init] {alignment = 2 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %3 = cir.alloca !s8i, !cir.ptr<!s8i>, ["a", init] {alignment = 1 : i64}
+    %4 = cir.alloca !s16i, !cir.ptr<!s16i>, ["b", init] {alignment = 2 : i64}
+    %5 = cir.alloca !s64i, !cir.ptr<!s64i>, ["c", init] {alignment = 8 : i64}
+    %6 = cir.alloca !s64i, !cir.ptr<!s64i>, ["d", init] {alignment = 8 : i64}
+    %7 = cir.alloca !cir.array<!s32i x 3>, !cir.ptr<!cir.array<!s32i x 3>>, ["arr"] {alignment = 4 : i64}
+    %8 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["e", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !u32i, !cir.ptr<!u32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+
+    // Integer casts.
+    %9 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %10 = cir.cast integral %9 : !u32i -> !s8i
+    // CHECK: %{{[0-9]+}} = llvm.trunc %{{[0-9]+}} : i32 to i8
+    cir.store %10, %3 : !s8i, !cir.ptr<!s8i>
+    %11 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %12 = cir.cast integral %11 : !s32i -> !s16i
+    // CHECK: %{{[0-9]+}} = llvm.trunc %{{[0-9]+}} : i32 to i16
+    cir.store %12, %4 : !s16i, !cir.ptr<!s16i>
+    %13 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %14 = cir.cast integral %13 : !u32i -> !s64i
+    // CHECK: %{{[0-9]+}} = llvm.zext %{{[0-9]+}} : i32 to i64
+    cir.store %14, %5 : !s64i, !cir.ptr<!s64i>
+    %15 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %16 = cir.cast integral %15 : !s32i -> !s64i
+    // CHECK: %{{[0-9]+}} = llvm.sext %{{[0-9]+}} : i32 to i64
+    %30 = cir.cast integral %arg1 : !s32i -> !u32i
+    // Should not produce a cast.
+    %32 = cir.cast integral %arg0 : !u32i -> !s32i
+    // Should not produce a cast.
+    %21 = cir.load %20 : !cir.ptr<!s16i>, !s16i
+    %22 = cir.cast integral %21 : !s16i -> !u64i
+    // CHECK: %[[TMP:[0-9]+]] = llvm.sext %{{[0-9]+}} : i16 to i64
+    %33 = cir.cast int_to_bool %arg1 : !s32i -> !cir.bool
+    // CHECK: %[[#ZERO:]] = llvm.mlir.constant(0 : i32) : i32
+    // CHECK: %[[#CMP:]] = llvm.icmp "ne" %arg1, %[[#ZERO]] : i32
+
+    // Pointer casts.
+    cir.store %16, %6 : !s64i, !cir.ptr<!s64i>
+    %17 = cir.cast array_to_ptrdecay %7 : !cir.ptr<!cir.array<!s32i x 3>> -> !cir.ptr<!s32i>
+    cir.store %17, %8 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    // CHECK: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, i32
+    %23 = cir.cast int_to_ptr %22 : !u64i -> !cir.ptr<!u8i>
+    // CHECK: %[[TMP2:[0-9]+]] = llvm.inttoptr %[[TMP]] : i64 to !llvm.ptr
+    %24 = cir.cast ptr_to_int %23 : !cir.ptr<!u8i> -> !s32i
+    // CHECK: %{{[0-9]+}} = llvm.ptrtoint %[[TMP2]] : !llvm.ptr to i32
+    %29 = cir.cast ptr_to_bool %23 : !cir.ptr<!u8i> -> !cir.bool
+
+    // Floating point casts.
+    %25 = cir.cast int_to_float %arg1 : !s32i -> !cir.float
+    // CHECK: %{{.+}} = llvm.sitofp %{{.+}} : i32 to f32
+    %26 = cir.cast int_to_float %arg0 : !u32i -> !cir.float
+    // CHECK: %{{.+}} = llvm.uitofp %{{.+}} : i32 to f32
+    %27 = cir.cast float_to_int %arg2 : !cir.float -> !s32i
+    // CHECK: %{{.+}} = llvm.fptosi %{{.+}} : f32 to i32
+    %28 = cir.cast float_to_int %arg2 : !cir.float -> !u32i
+    // CHECK: %{{.+}} = llvm.fptoui %{{.+}} : f32 to i32
+    %18 = cir.const #cir.int<0> : !s32i
+    // CHECK: %{{.+}} = llvm.fptrunc %{{.+}} : f64 to f32
+    %34 = cir.cast floating %arg3 : !cir.double -> !cir.float
+
+    cir.store %18, %2 : !s32i, !cir.ptr<!s32i>
+    %19 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.return %19 : !s32i
+  }
+
+  cir.func @testBoolToIntCast(%arg0: !cir.bool)  {
+  // CHECK: llvm.func @testBoolToIntCast
+    %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["bl", init] {alignment = 1 : i64}
+    %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["y", init] {alignment = 1 : i64}
+    cir.store %arg0, %0 : !cir.bool, !cir.ptr<!cir.bool>
+
+    %2 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
+    %3 = cir.cast bool_to_int %2 : !cir.bool -> !u8i
+    // CHECK: %[[LOAD_BOOL:.*]] = llvm.load %{{.*}} : !llvm.ptr -> i8
+    // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[LOAD_BOOL]] : i8 to i1
+    // CHECK: %[[EXT:.*]] = llvm.zext %[[TRUNC]] : i1 to i8
+
+    cir.store %3, %1 : !u8i, !cir.ptr<!u8i>
+    cir.return
+  }
+
+  // Test cases where the memory type is not the same as the source type.
+  cir.func @testArrayToPtrDecay() {
+    // CHECK-LABEL: llvm.func @testArrayToPtrDecay()
+    %null_bool_array = cir.const #cir.ptr<null> : !cir.ptr<!cir.array<!cir.bool x 3>>
+    %bool_array_decay = cir.cast array_to_ptrdecay %null_bool_array : !cir.ptr<!cir.array<!cir.bool x 3>> -> !cir.ptr<!cir.bool>
+    // CHECK: = llvm.getelementptr %{{.*}}[0] : (!llvm.ptr) -> !llvm.ptr, i8
+    %res = cir.load %bool_array_decay : !cir.ptr<!cir.bool>, !cir.bool
+    // CHECK-NEXT: %[[BOOL_LOAD:.+]] = llvm.load %{{.*}} {{.*}} : !llvm.ptr -> i8
+    // CHECK-NEXT: = llvm.trunc %[[BOOL_LOAD]] : i8 to i1
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/class.cir b/clang/test/CIR/Incubator/Lowering/class.cir
new file mode 100644
index 0000000000000..ee136ccc8dafb
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/class.cir
@@ -0,0 +1,96 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+!u8i = !cir.int<u, 8>
+!u32i = !cir.int<u, 32>
+!rec_S = !cir.record<class "S" {!u8i, !s32i}>
+!rec_S2A = !cir.record<class "S2A" {!s32i} #cir.record.decl.ast>
+!rec_S1_ = !cir.record<class "S1" {!s32i, !cir.float, !cir.ptr<!s32i>} #cir.record.decl.ast>
+!rec_S2_ = !cir.record<class "S2" {!rec_S2A} #cir.record.decl.ast>
+!rec_S3_ = !cir.record<class "S3" {!s32i} #cir.record.decl.ast>
+
+module {
+  cir.func @test() {
+    %1 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["x"] {alignment = 4 : i64}
+    // CHECK: %[[#ARRSIZE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#CLASS:]] = llvm.alloca %[[#ARRSIZE]] x !llvm.struct<"class.S", (i8, i32)>
+    %3 = cir.get_member %1[0] {name = "c"} : !cir.ptr<!rec_S> -> !cir.ptr<!u8i>
+    // CHECK: = llvm.getelementptr %[[#CLASS]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"class.S", (i8, i32)>
+    %5 = cir.get_member %1[1] {name = "i"} : !cir.ptr<!rec_S> -> !cir.ptr<!s32i>
+    // CHECK: = llvm.getelementptr %[[#CLASS]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"class.S", (i8, i32)>
+    cir.return
+  }
+
+  cir.func @shouldConstInitLocalClassesWithConstStructAttr() {
+    %0 = cir.alloca !rec_S2A, !cir.ptr<!rec_S2A>, ["s"] {alignment = 4 : i64}
+    %1 = cir.const #cir.const_record<{#cir.int<1> : !s32i}> : !rec_S2A
+    cir.store %1, %0 : !rec_S2A, !cir.ptr<!rec_S2A>
+    cir.return
+  }
+  // CHECK: llvm.func @shouldConstInitLocalClassesWithConstStructAttr()
+  // CHECK:   %0 = llvm.mlir.constant(1 : index) : i64
+  // CHECK:   %1 = llvm.alloca %0 x !llvm.struct<"class.S2A", (i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+  // CHECK:   %2 = llvm.mlir.undef : !llvm.struct<"class.S2A", (i32)>
+  // CHECK:   %3 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[0] : !llvm.struct<"class.S2A", (i32)>
+  // CHECK:   llvm.store %4, %1 {{.*}}: !llvm.struct<"class.S2A", (i32)>, !llvm.ptr
+  // CHECK:   llvm.return
+  // CHECK: }
+
+  // Should lower basic #cir.const_record initializer.
+  cir.global external @s1 = #cir.const_record<{#cir.int<1> : !s32i, #cir.fp<1.000000e-01> : !cir.float, #cir.ptr<null> : !cir.ptr<!s32i>}> : !rec_S1_
+  // CHECK: llvm.mlir.global external @s1() {addr_space = 0 : i32} : !llvm.struct<"class.S1", (i32, f32, ptr)> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK:   %1 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %2 = llvm.insertvalue %1, %0[0] : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK:   %3 = llvm.mlir.constant(1.000000e-01 : f32) : f32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[1] : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK:   %5 = llvm.mlir.zero : !llvm.ptr
+  // CHECK:   %6 = llvm.insertvalue %5, %4[2] : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK:   llvm.return %6 : !llvm.struct<"class.S1", (i32, f32, ptr)>
+  // CHECK: }
+
+  // Should lower nested #cir.const_record initializer.
+  cir.global external @s2 = #cir.const_record<{#cir.const_record<{#cir.int<1> : !s32i}> : !rec_S2A}> : !rec_S2_
+  // CHECK: llvm.mlir.global external @s2() {addr_space = 0 : i32} : !llvm.struct<"class.S2", (struct<"class.S2A", (i32)>)> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.struct<"class.S2", (struct<"class.S2A", (i32)>)>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"class.S2A", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"class.S2A", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.struct<"class.S2", (struct<"class.S2A", (i32)>)>
+  // CHECK:   llvm.return %4 : !llvm.struct<"class.S2", (struct<"class.S2A", (i32)>)>
+  // CHECK: }
+
+  cir.global external @s3 = #cir.const_array<[#cir.const_record<{#cir.int<1> : !s32i}> : !rec_S3_, #cir.const_record<{#cir.int<2> : !s32i}> : !rec_S3_, #cir.const_record<{#cir.int<3> : !s32i}> : !rec_S3_]> : !cir.array<!rec_S3_ x 3>
+  // CHECK: llvm.mlir.global external @s3() {addr_space = 0 : i32} : !llvm.array<3 x struct<"class.S3", (i32)>> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK:   %5 = llvm.mlir.undef : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %6 = llvm.mlir.constant(2 : i32) : i32
+  // CHECK:   %7 = llvm.insertvalue %6, %5[0] : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %8 = llvm.insertvalue %7, %4[1] : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK:   %9 = llvm.mlir.undef : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %10 = llvm.mlir.constant(3 : i32) : i32
+  // CHECK:   %11 = llvm.insertvalue %10, %9[0] : !llvm.struct<"class.S3", (i32)>
+  // CHECK:   %12 = llvm.insertvalue %11, %8[2] : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK:   llvm.return %12 : !llvm.array<3 x struct<"class.S3", (i32)>>
+  // CHECK: }
+
+  cir.func @shouldLowerClassCopies() {
+  // CHECK: llvm.func @shouldLowerClassCopies()
+    %1 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["a"] {alignment = 4 : i64}
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#SA:]] = llvm.alloca %[[#ONE]] x !llvm.struct<"class.S", (i8, i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+    %2 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["b", init] {alignment = 4 : i64}
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#SB:]] = llvm.alloca %[[#ONE]] x !llvm.struct<"class.S", (i8, i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+    cir.copy %1 to %2 : !cir.ptr<!rec_S>
+    // CHECK: %[[#SIZE:]] = llvm.mlir.constant(8 : i32) : i32
+    // CHECK: "llvm.intr.memcpy"(%[[#SB]], %[[#SA]], %[[#SIZE]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/cmp.cir b/clang/test/CIR/Incubator/Lowering/cmp.cir
new file mode 100644
index 0000000000000..081a7e795a434
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/cmp.cir
@@ -0,0 +1,78 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["c"] {alignment = 4 : i64}
+    %3 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["d"] {alignment = 4 : i64}
+    %4 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["e"] {alignment = 1 : i64}
+    %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %7 = cir.cmp(gt, %5, %6) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "sgt"
+    %8 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %9 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %10 = cir.cmp(eq, %8, %9) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "eq"
+    %11 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %12 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %13 = cir.cmp(lt, %11, %12) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "slt"
+    %14 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %15 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %16 = cir.cmp(ge, %14, %15) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "sge"
+    %17 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %18 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %19 = cir.cmp(ne, %17, %18) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "ne"
+    %20 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %21 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %22 = cir.cmp(le, %20, %21) : !s32i, !cir.bool
+    // CHECK: llvm.icmp "sle"
+    %23 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %24 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %25 = cir.cmp(gt, %23, %24) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "ogt"
+    %26 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %27 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %28 = cir.cmp(eq, %26, %27) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "oeq"
+    %29 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %30 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %31 = cir.cmp(lt, %29, %30) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "olt"
+    %32 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %33 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %34 = cir.cmp(ge, %32, %33) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "oge"
+    %35 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %36 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %37 = cir.cmp(ne, %35, %36) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "une"
+    %38 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %39 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %40 = cir.cmp(le, %38, %39) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "ole"
+
+    // Float-only predicates
+    %43 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %44 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %45 = cir.cmp(fone, %43, %44) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "one"
+    %46 = cir.load %2 : !cir.ptr<!cir.float>, !cir.float
+    %47 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+    %48 = cir.cmp(funo, %46, %47) : !cir.float, !cir.bool
+    // CHECK: llvm.fcmp "uno"
+
+    // Pointer comparisons.
+    %41 = cir.cmp(ne, %0, %1) : !cir.ptr<!s32i>, !cir.bool
+    // CHECK: llvm.icmp "ne"
+    %42 = cir.cmp(lt, %0, %1) : !cir.ptr<!s32i>, !cir.bool
+    // CHECK: llvm.icmp "ult"
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/cmp3way.cir b/clang/test/CIR/Incubator/Lowering/cmp3way.cir
new file mode 100644
index 0000000000000..9c18dfce57693
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/cmp3way.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+#cmp3way_info = #cir.cmp3way_info<strong, lt = -1, eq = 0, gt = 1>
+
+module {
+  cir.func @test_scmp(%arg0 : !s32i, %arg1 : !s32i) -> !s8i {
+    %0 = cir.cmp3way(%arg0 : !s32i, %arg1, #cmp3way_info) : !s8i
+    cir.return %0 : !s8i
+  }
+
+  //      MLIR: llvm.func @test_scmp(%arg0: i32, %arg1: i32) -> i8
+  // MLIR-NEXT:   %0 = llvm.call_intrinsic "llvm.scmp.i8.i32"(%arg0, %arg1) : (i32, i32) -> i8
+  // MLIR-NEXT:   llvm.return %0 : i8
+  // MLIR-NEXT: }
+
+  //      LLVM: define i8 @test_scmp(i32 %0, i32 %1)
+  // LLVM-NEXT:   %[[#RET:]] = call i8 @llvm.scmp.i8.i32(i32 %0, i32 %1)
+  // LLVM-NEXT:   ret i8 %[[#RET]]
+  // LLVM-NEXT: }
+
+  cir.func @test_ucmp(%arg0 : !u32i, %arg1 : !u32i) -> !s8i {
+    %0 = cir.cmp3way(%arg0 : !u32i, %arg1, #cmp3way_info) : !s8i
+    cir.return %0 : !s8i
+  }
+
+  //      MLIR: llvm.func @test_ucmp(%arg0: i32, %arg1: i32) -> i8
+  // MLIR-NEXT:   %0 = llvm.call_intrinsic "llvm.ucmp.i8.i32"(%arg0, %arg1) : (i32, i32) -> i8
+  // MLIR-NEXT:   llvm.return %0 : i8
+  // MLIR-NEXT: }
+
+  //      LLVM: define i8 @test_ucmp(i32 %0, i32 %1)
+  // LLVM-NEXT:   %[[#RET:]] = call i8 @llvm.ucmp.i8.i32(i32 %0, i32 %1)
+  // LLVM-NEXT:   ret i8 %[[#RET]]
+  // LLVM-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/complex.cir b/clang/test/CIR/Incubator/Lowering/complex.cir
new file mode 100644
index 0000000000000..27180865e3776
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/complex.cir
@@ -0,0 +1,15 @@
+// RUN: cir-translate -cir-to-llvmir --disable-cc-lowering -o %t.ll %s
+// RUN: FileCheck --input-file %t.ll -check-prefix=LLVM %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @complex_const() -> !cir.complex<!s32i> {
+    %0 = cir.const #cir.complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex<!s32i>
+    cir.return %0 : !cir.complex<!s32i>
+  }
+
+  // LLVM-LABEL: define { i32, i32 } @complex_const()
+  //  LLVM-NEXT:   ret { i32, i32 } { i32 1, i32 2 }
+  //  LLVM-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/const-array.cir b/clang/test/CIR/Incubator/Lowering/const-array.cir
new file mode 100644
index 0000000000000..84a21665bffde
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/const-array.cir
@@ -0,0 +1,20 @@
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering -o -  | FileCheck %s -check-prefix=LLVM
+
+!u8i = !cir.int<u, 8>
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+
+module {
+  cir.global "private" internal @normal_url_char = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<1> : !u8i], trailing_zeros> : !cir.array<!u8i x 4>
+  // LLVM: @normal_url_char = internal global [4 x i8] c"\00\01\00\00"
+
+  cir.global "private" internal @g_const_bool_arr = #cir.const_array<[#true, #false, #true, #false]> : !cir.array<!cir.bool x 4>
+  // LLVM: @g_const_bool_arr = internal global [4 x i8] c"\01\00\01\00"
+
+  cir.func @c0() -> !cir.ptr<!cir.array<!u8i x 4>> {
+    %0 = cir.get_global @normal_url_char : !cir.ptr<!cir.array<!u8i x 4>>
+    cir.return %0 : !cir.ptr<!cir.array<!u8i x 4>>
+  }
+  // LLVM: define ptr @c0()
+  // LLVM: ret ptr @normal_url_char
+}
diff --git a/clang/test/CIR/Incubator/Lowering/const.cir b/clang/test/CIR/Incubator/Lowering/const.cir
new file mode 100644
index 0000000000000..0fcad3ef400e8
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/const.cir
@@ -0,0 +1,86 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!rec_anon2E1_ = !cir.record<struct "anon.1" {!cir.int<s, 32>, !cir.int<s, 32>} #cir.record.decl.ast>
+module {
+  cir.func @testConstArrInit() {
+    %0 = cir.const #cir.const_array<"string\00" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7>
+    // CHECK: llvm.mlir.constant(dense<[115, 116, 114, 105, 110, 103, 0]> : tensor<7xi8>) : !llvm.array<7 x i8>
+    %1 = cir.const #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>
+    // CHECK: llvm.mlir.constant(dense<[1, 2]> : tensor<2xi32>) : !llvm.array<2 x i32>
+    %3 = cir.const #cir.const_array<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float]> : !cir.array<!cir.float x 2>
+    // CHECK: llvm.mlir.constant(dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf32>) : !llvm.array<2 x f32>
+    %4 = cir.const #cir.zero : !cir.array<!s32i x 3>
+    // CHECK: llvm.mlir.zero : !llvm.array<3 x i32>
+    %5 = cir.const #cir.undef : !cir.array<!s32i x 3>
+    // CHECK: llvm.mlir.undef : !llvm.array<3 x i32>
+    %6 = cir.const #cir.poison : !s32i
+    // CHECK: llvm.mlir.poison : i32
+    cir.return
+  }
+
+  cir.func @testConvertConstArrayToDenseConst() {
+    %0 = cir.const #cir.const_array<[#cir.const_array<[#cir.int<1> : !s32i]> : !cir.array<!s32i x 1>, #cir.zero : !cir.array<!s32i x 1>]> : !cir.array<!cir.array<!s32i x 1> x 2>
+    %1 = cir.const #cir.const_array<[#cir.const_array<[#cir.int<1> : !s64i]> : !cir.array<!s64i x 1>, #cir.zero : !cir.array<!s64i x 1>]> : !cir.array<!cir.array<!s64i x 1> x 2>
+    %2 = cir.const #cir.const_array<[#cir.const_array<[#cir.fp<1.000000e+00> : !cir.float]> : !cir.array<!cir.float x 1>, #cir.zero : !cir.array<!cir.float x 1>]> : !cir.array<!cir.array<!cir.float x 1> x 2>
+    %3 = cir.const #cir.const_array<[#cir.const_array<[#cir.fp<1.000000e+00> : !cir.double]> : !cir.array<!cir.double x 1>, #cir.zero : !cir.array<!cir.double x 1>]> : !cir.array<!cir.array<!cir.double x 1> x 2>
+    %4 = cir.const #cir.const_array<[#cir.const_array<[#cir.const_array<[#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.array<!s32i x 3>]> : !cir.array<!cir.array<!s32i x 3> x 1>, #cir.zero : !cir.array<!cir.array<!s32i x 3> x 1>]> : !cir.array<!cir.array<!cir.array<!s32i x 3> x 1> x 2>
+
+    cir.return
+  }
+  // CHECK:  llvm.func @testConvertConstArrayToDenseConst()
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[}}1], [0{{\]\]}}> : tensor<2x1xi32>) : !llvm.array<2 x array<1 x i32>>
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[}}1], [0{{\]\]}}> : tensor<2x1xi64>) : !llvm.array<2 x array<1 x i64>>
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[}}1.000000e+00], [0.000000e+00{{\]\]}}> : tensor<2x1xf32>) : !llvm.array<2 x array<1 x f32>>
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[}}1.000000e+00], [0.000000e+00{{\]\]}}> : tensor<2x1xf64>) : !llvm.array<2 x array<1 x f64>>
+  // CHECK:    {{%.*}} = llvm.mlir.constant(dense<{{\[\[\[}}1, 1, 1{{\]\]}}, {{\[\[}}0, 0, 0{{\]\]\]}}> : tensor<2x1x3xi32>) : !llvm.array<2 x array<1 x array<3 x i32>>>
+  // CHECK:    llvm.return
+
+  cir.func @testConstArrayOfStructs() {
+    %0 = cir.alloca !cir.array<!rec_anon2E1_ x 1>, !cir.ptr<!cir.array<!rec_anon2E1_ x 1>>, ["a"] {alignment = 4 : i64}
+    %1 = cir.const #cir.const_array<[#cir.const_record<{#cir.int<0> : !s32i, #cir.int<1> : !s32i}> : !rec_anon2E1_]> : !cir.array<!rec_anon2E1_ x 1>
+    cir.store %1, %0 : !cir.array<!rec_anon2E1_ x 1>, !cir.ptr<!cir.array<!rec_anon2E1_ x 1>>
+    cir.return
+  }
+  // CHECK:  llvm.func @testConstArrayOfStructs()
+  // CHECK:    %0 = llvm.mlir.constant(1 : index) : i64
+  // CHECK:    %1 = llvm.alloca %0 x !llvm.array<1 x struct<"struct.anon.1", (i32, i32)>> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+  // CHECK:    %2 = llvm.mlir.undef : !llvm.array<1 x struct<"struct.anon.1", (i32, i32)>>
+  // CHECK:    %3 = llvm.mlir.undef : !llvm.struct<"struct.anon.1", (i32, i32)>
+  // CHECK:    %4 = llvm.mlir.constant(0 : i32) : i32
+  // CHECK:    %5 = llvm.insertvalue %4, %3[0] : !llvm.struct<"struct.anon.1", (i32, i32)>
+  // CHECK:    %6 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:    %7 = llvm.insertvalue %6, %5[1] : !llvm.struct<"struct.anon.1", (i32, i32)>
+  // CHECK:    %8 = llvm.insertvalue %7, %2[0] : !llvm.array<1 x struct<"struct.anon.1", (i32, i32)>>
+  // CHECK:    llvm.store %8, %1 {{.*}}: !llvm.array<1 x struct<"struct.anon.1", (i32, i32)>>, !llvm.ptr
+  // CHECK:    llvm.return
+
+  cir.func @testArrWithTrailingZeros() {
+    %0 = cir.alloca !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>, ["a"] {alignment = 16 : i64}
+    %1 = cir.const #cir.const_array<[#cir.int<1> : !s32i], trailing_zeros> : !cir.array<!s32i x 10>
+    cir.store %1, %0 : !cir.array<!s32i x 10>, !cir.ptr<!cir.array<!s32i x 10>>
+    cir.return
+  }
+  // CHECK: llvm.func @testArrWithTrailingZeros()
+  // CHECK:   %0 = llvm.mlir.constant(1 : index) : i64
+  // CHECK:   %1 = llvm.alloca %0 x !llvm.array<10 x i32> {alignment = 16 : i64} : (i64) -> !llvm.ptr
+  // CHECK:   %2 = llvm.mlir.zero : !llvm.array<10 x i32>
+  // CHECK:   %3 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[0] : !llvm.array<10 x i32>
+
+  cir.func @testInitArrWithBool() {
+    %1 = cir.const #cir.const_array<[#cir.bool<true> : !cir.bool]> : !cir.array<!cir.bool x 1>
+    cir.return
+  }
+
+  // CHECK: llvm.func @testInitArrWithBool()
+  // CHECK:   [[ARR:%.*]] = llvm.mlir.undef : !llvm.array<1 x i8>
+  // CHECK:   [[TRUE:%.*]] = llvm.mlir.constant(true) : i1
+  // CHECK:   [[TRUE_EXT:%.*]] = llvm.zext [[TRUE]] : i1 to i8
+  // CHECK:   {{.*}} = llvm.insertvalue [[TRUE_EXT]], [[ARR]][0] : !llvm.array<1 x i8>
+  // CHECK: llvm.return
+
+}
diff --git a/clang/test/CIR/Incubator/Lowering/data-member.cir b/clang/test/CIR/Incubator/Lowering/data-member.cir
new file mode 100644
index 0000000000000..90f4d34dd0c52
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/data-member.cir
@@ -0,0 +1,57 @@
+// RUN: cir-opt -cir-to-llvm -o - %s | FileCheck -check-prefix=MLIR %s
+// RUN: cir-translate -cir-to-llvmir --target x86_64-unknown-linux-gnu --disable-cc-lowering -o - %s  | FileCheck -check-prefix=LLVM %s
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!structT = !cir.record<struct "Point" {!cir.int<s, 32>, !cir.int<s, 32>, !cir.int<s, 32>}>
+
+module @test attributes {
+  cir.triple = "x86_64-unknown-linux-gnu",
+  dlti.dl_spec = #dlti.dl_spec<i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">
+} {
+  cir.global external @pt_member = #cir.data_member<1> : !cir.data_member<!s32i in !structT>
+  // MLIR: llvm.mlir.global external @pt_member(4 : i64) {addr_space = 0 : i32} : i64
+  // LLVM: @pt_member = global i64 4
+
+  cir.func @constant() -> !cir.data_member<!s32i in !structT> {
+    %0 = cir.const #cir.data_member<1> : !cir.data_member<!s32i in !structT>
+    cir.return %0 : !cir.data_member<!s32i in !structT>
+  }
+  //      MLIR: llvm.func @constant() -> i64
+  // MLIR-NEXT:   %[[#VAL:]] = llvm.mlir.constant(4 : i64) : i64
+  // MLIR-NEXT:   llvm.return %[[#VAL]] : i64
+  // MLIR-NEXT: }
+
+  //      LLVM: define i64 @constant()
+  // LLVM-NEXT:   ret i64 4
+  // LLVM-NEXT: }
+
+  cir.func @null_constant() -> !cir.data_member<!s32i in !structT> {
+    %0 = cir.const #cir.data_member<null> : !cir.data_member<!s32i in !structT>
+    cir.return %0 : !cir.data_member<!s32i in !structT>
+  }
+  //      MLIR: llvm.func @null_constant() -> i64
+  // MLIR-NEXT:   %[[#VAL:]] = llvm.mlir.constant(-1 : i64) : i64
+  // MLIR-NEXT:   llvm.return %[[#VAL]] : i64
+  // MLIR-NEXT: }
+
+  //      LLVM: define i64 @null_constant() !dbg !7 {
+  // LLVM-NEXT:   ret i64 -1
+  // LLVM-NEXT: }
+
+  cir.func @get_runtime_member(%arg0: !cir.ptr<!structT>, %arg1: !cir.data_member<!s32i in !structT>) -> !cir.ptr<!s32i> {
+    %0 = cir.get_runtime_member %arg0[%arg1 : !cir.data_member<!s32i in !structT>] : !cir.ptr<!structT> -> !cir.ptr<!s32i>
+    cir.return %0 : !cir.ptr<!s32i>
+  }
+  //      MLIR: llvm.func @get_runtime_member(%[[ARG0:.+]]: !llvm.ptr, %[[ARG1:.+]]: i64) -> !llvm.ptr
+  // MLIR-NEXT:   %[[#PTR:]] = llvm.bitcast %[[ARG0]] : !llvm.ptr to !llvm.ptr
+  // MLIR-NEXT:   %[[#VAL:]] = llvm.getelementptr %[[#PTR]][%[[ARG1]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8
+  // MLIR-NEXT:   %[[#RET:]] = llvm.bitcast %[[#VAL]] : !llvm.ptr to !llvm.ptr
+  // MLIR-NEXT:   llvm.return %[[#RET]] : !llvm.ptr
+  // MLIR-NEXT: }
+
+  //      LLVM: define ptr @get_runtime_member(ptr %[[ARG0:.+]], i64 %[[ARG1:.+]])
+  // LLVM-NEXT:   %[[#VAL:]] = getelementptr i8, ptr %[[ARG0]], i64 %[[ARG1]]
+  // LLVM-NEXT:   ret ptr %[[#VAL]]
+  // LLVM-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/debug-info.c b/clang/test/CIR/Incubator/Lowering/debug-info.c
new file mode 100644
index 0000000000000..42a8217c5cc7d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/debug-info.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM_NO_DEBUG
+// RUN: %clang_cc1 -debug-info-kind=constructor -dwarf-version=4 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM_WITH_DEBUG
+int foo(int a, int b) {
+  // LLVM_NO_DEBUG-NOT: !dbg
+
+  // LLVM_WITH_DEBUG-LABEL: foo
+  // LLVM_WITH_DEBUG: %[[VAR_A:.*]] = load i32, ptr %{{.*}}, align 4, !dbg ![[DI_LOC1:.*]]
+  // LLVM_WITH_DEBUG: %[[VAR_B:.*]] = load i32, ptr %{{.*}}, align 4, !dbg ![[DI_LOC2:.*]]
+  // LLVM_WITH_DEBUG: %[[VAR_C:.*]] = add nsw i32 %[[VAR_A]], %[[VAR_B]], !dbg ![[DI_LOC1]]
+  // LLVM_WITH_DEBUG: store i32 %[[VAR_C]], ptr %{{.*}}, align 4, !dbg ![[DI_LOC3:.*]]
+
+  // LLVM_WITH_DEBUG: ![[DI_LOC3]] = !DILocation(line: [[LINE:.*]], scope: ![[SCOPE:.*]])
+  // LLVM_WITH_DEBUG: ![[DI_LOC1]] = !DILocation(line: [[LINE]], column: {{.*}}, scope: ![[SCOPE]])
+  // LLVM_WITH_DEBUG: ![[DI_LOC2]] = !DILocation(line: [[LINE]], column: {{.*}}, scope: ![[SCOPE]])
+  int c = a + b;
+  return c;
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/delete-array.cpp b/clang/test/CIR/Incubator/Lowering/delete-array.cpp
new file mode 100644
index 0000000000000..380f93392a00c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/delete-array.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-llvm -O0 %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+void test_delete_array(int *ptr) {
+  delete[] ptr;
+}
+
+// LLVM: [[PTR:%[0-9]+]] = load ptr, ptr %{{[0-9]+}}, align 8
+// LLVM-NEXT: call void @_ZdaPv(ptr [[PTR]])
+
+
+int *newmem();
+struct cls {
+  ~cls();
+};
+cls::~cls() { delete[] newmem(); }
+
+// LLVM: [[NEWMEM:%[0-9]+]] = call ptr @_Z6newmemv()
+// LLVM-NEXT: call void @_ZdaPv(ptr [[NEWMEM]])
diff --git a/clang/test/CIR/Incubator/Lowering/derived-to-base.cpp b/clang/test/CIR/Incubator/Lowering/derived-to-base.cpp
new file mode 100644
index 0000000000000..6e29c16c66084
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/derived-to-base.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+struct Base1 { int a; };
+struct Base2 { int b; };
+struct Derived : Base1, Base2 { int c; };
+void test_multi_base() {
+  Derived d;
+
+  Base2& bref = d; // no null check needed
+  // LLVM: getelementptr i8, ptr %[[D:.*]], i32 4
+
+  Base2* bptr = &d; // has null pointer check
+  // LLVM: %[[CHECK:.*]] = icmp eq ptr %[[D]], null
+  // LLVM: %[[BPTR:.*]] = getelementptr i8, ptr %[[D]], i32 4
+  // LLVM: select i1 %[[CHECK]], ptr %[[D]], ptr %[[BPTR]]
+
+  int a = d.a;
+  // LLVM: getelementptr %struct.Base1, ptr %[[D]], i32 0, i32 0
+
+  int b = d.b;
+  // LLVM: %[[BASE2_OFFSET:.*]] = getelementptr i8, ptr %[[D]], i32 4
+  // LLVM: %[[BASE2:.*]] = getelementptr %struct.Base2, ptr %[[BASE2_OFFSET]], i32 0, i32 0
+
+  int c = d.c;
+  // LLVM: getelementptr %struct.Derived, ptr %[[D]], i32 0, i32 2
+}
diff --git a/clang/test/CIR/Incubator/Lowering/dot.cir b/clang/test/CIR/Incubator/Lowering/dot.cir
new file mode 100644
index 0000000000000..e442b0babf227
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/dot.cir
@@ -0,0 +1,111 @@
+// RUN: cir-opt %s -cir-to-llvm --reconcile-unrealized-casts -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @dot(%arg0: !cir.ptr<!cir.double>, %arg1: !cir.ptr<!cir.double>, %arg2: !s32i) -> !cir.double {
+    %0 = cir.alloca !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>, ["a", init] {alignment = 8 : i64}
+    %1 = cir.alloca !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>, ["b", init] {alignment = 8 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["size", init] {alignment = 4 : i64}
+    %3 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["__retval"] {alignment = 8 : i64}
+    %4 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["q", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>
+    cir.store %arg1, %1 : !cir.ptr<!cir.double>, !cir.ptr<!cir.ptr<!cir.double>>
+    cir.store %arg2, %2 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.const #cir.fp<0.000000e+00> : !cir.double
+    cir.store %5, %4 : !cir.double, !cir.ptr<!cir.double>
+    cir.scope {
+      %8 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %9 = cir.const #cir.int<0> : !s32i
+      cir.store %9, %8 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %10 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+        %11 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %12 = cir.cmp(lt, %10, %11) : !s32i, !cir.bool
+        cir.condition(%12)
+      } body {
+        %10 = cir.load %0 : !cir.ptr<!cir.ptr<!cir.double>>, !cir.ptr<!cir.double>
+        %11 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+        %12 = cir.ptr_stride %10, %11 : (!cir.ptr<!cir.double>, !s32i) -> !cir.ptr<!cir.double>
+        %13 = cir.load %12 : !cir.ptr<!cir.double>, !cir.double
+        %14 = cir.load %1 : !cir.ptr<!cir.ptr<!cir.double>>, !cir.ptr<!cir.double>
+        %15 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+        %16 = cir.ptr_stride %14, %15 : (!cir.ptr<!cir.double>, !s32i) -> !cir.ptr<!cir.double>
+        %17 = cir.load %16 : !cir.ptr<!cir.double>, !cir.double
+        %18 = cir.binop(mul, %13, %17) : !cir.double
+        %19 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+        %20 = cir.binop(add, %19, %18) : !cir.double
+        cir.store %20, %4 : !cir.double, !cir.ptr<!cir.double>
+        cir.yield
+      } step {
+        %10 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+        %11 = cir.unary(inc, %10) : !s32i, !s32i
+        cir.store %11, %8 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    %6 = cir.load %4 : !cir.ptr<!cir.double>, !cir.double
+    cir.store %6, %3 : !cir.double, !cir.ptr<!cir.double>
+    %7 = cir.load %3 : !cir.ptr<!cir.double>, !cir.double
+    cir.return %7 : !cir.double
+  }
+}
+
+// MLIR-LABEL:   llvm.func @dot(
+// MLIR:           %[[VAL_1:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_2:.*]] = llvm.alloca %[[VAL_1]] x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_3:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_4:.*]] = llvm.alloca %[[VAL_3]] x !llvm.ptr {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_5:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_6:.*]] = llvm.alloca %[[VAL_5]] x !llvm.ptr {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_7:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_8:.*]] = llvm.alloca %[[VAL_7]] x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_9:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_10:.*]] = llvm.alloca %[[VAL_9]] x f64 {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_11:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_12:.*]] = llvm.alloca %[[VAL_11]] x f64 {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:           llvm.store {{.*}}, %[[VAL_4]] {{.*}}: !llvm.ptr, !llvm.ptr
+// MLIR:           llvm.store {{.*}}, %[[VAL_6]] {{.*}}: !llvm.ptr, !llvm.ptr
+// MLIR:           llvm.store {{.*}}, %[[VAL_8]] {{.*}}: i32, !llvm.ptr
+// MLIR:           %[[VAL_13:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : f64
+// MLIR:           llvm.store %[[VAL_13]], %[[VAL_12]] {{.*}}: f64, !llvm.ptr
+// MLIR:           llvm.br ^bb1
+// MLIR:         ^bb1:
+// MLIR:           %[[VAL_16:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR:           llvm.store %[[VAL_16]], %[[VAL_2]] {{.*}}: i32, !llvm.ptr
+// MLIR:           llvm.br ^bb2
+// MLIR:         ^bb2:
+// MLIR:           %[[VAL_17:.*]] = llvm.load %[[VAL_2]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_18:.*]] = llvm.load %[[VAL_8]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_19:.*]] = llvm.icmp "slt" %[[VAL_17]], %[[VAL_18]] : i32
+// MLIR:           llvm.cond_br %[[VAL_19]], ^bb3, ^bb5
+// MLIR:         ^bb3:
+// MLIR:           %[[VAL_23:.*]] = llvm.load %[[VAL_4]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+// MLIR:           %[[VAL_24:.*]] = llvm.load %[[VAL_2]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_25:.*]] = llvm.sext %[[VAL_24]] : i32 to i64
+// MLIR:           %[[VAL_26:.*]] = llvm.getelementptr %[[VAL_23]]{{\[}}%[[VAL_25]]] : (!llvm.ptr, i64) -> !llvm.ptr, f64
+// MLIR:           %[[VAL_27:.*]] = llvm.load %[[VAL_26]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           %[[VAL_28:.*]] = llvm.load %[[VAL_6]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+// MLIR:           %[[VAL_29:.*]] = llvm.load %[[VAL_2]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_30:.*]] = llvm.sext %[[VAL_29]] : i32 to i64
+// MLIR:           %[[VAL_31:.*]] = llvm.getelementptr %[[VAL_28]]{{\[}}%[[VAL_30]]] : (!llvm.ptr, i64) -> !llvm.ptr, f64
+// MLIR:           %[[VAL_32:.*]] = llvm.load %[[VAL_31]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           %[[VAL_33:.*]] = llvm.fmul %[[VAL_27]], %[[VAL_32]]  : f64
+// MLIR:           %[[VAL_34:.*]] = llvm.load %[[VAL_12]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           %[[VAL_35:.*]] = llvm.fadd %[[VAL_34]], %[[VAL_33]]  : f64
+// MLIR:           llvm.store %[[VAL_35]], %[[VAL_12]] {{.*}}: f64, !llvm.ptr
+// MLIR:           llvm.br ^bb4
+// MLIR:         ^bb4:
+// MLIR:           %[[VAL_36:.*]] = llvm.load %[[VAL_2]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           %[[VAL_37:.*]] = llvm.mlir.constant(1 : i32) : i32
+// MLIR:           %[[VAL_38:.*]] = llvm.add %[[VAL_36]], %[[VAL_37]] : i32
+// MLIR:           llvm.store %[[VAL_38]], %[[VAL_2]] {{.*}}: i32, !llvm.ptr
+// MLIR:           llvm.br ^bb2
+// MLIR:         ^bb5:
+// MLIR:           llvm.br ^bb6
+// MLIR:         ^bb6:
+// MLIR:           %[[VAL_39:.*]] = llvm.load %[[VAL_12]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           llvm.store %[[VAL_39]], %[[VAL_10]] {{.*}}: f64, !llvm.ptr
+// MLIR:           %[[VAL_40:.*]] = llvm.load %[[VAL_10]] {alignment = 8 : i64} : !llvm.ptr -> f64
+// MLIR:           llvm.return %[[VAL_40]] : f64
+// MLIR:         }
diff --git a/clang/test/CIR/Incubator/Lowering/exceptions.cir b/clang/test/CIR/Incubator/Lowering/exceptions.cir
new file mode 100644
index 0000000000000..2861a1de6489c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/exceptions.cir
@@ -0,0 +1,108 @@
+// RUN: cir-translate %s -cir-to-llvmir --target x86_64-unknown-linux-gnu --disable-cc-lowering -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+module @"try-catch.cpp" attributes {cir.lang = #cir.lang<cxx>, cir.sob = #cir.signed_overflow_behavior<undefined>} {
+  cir.global "private" constant external @_ZTIi : !cir.ptr<!u8i>
+  cir.global "private" constant external @_ZTIPKc : !cir.ptr<!u8i>
+  cir.func private @_Z8divisionii(!s32i, !s32i) -> !cir.double
+  // LLVM: @_Z2tcv() personality ptr @__gxx_personality_v0
+  cir.func @_Z2tcv() -> !u64i {
+    %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["__retval"] {alignment = 8 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %3 = cir.alloca !u64i, !cir.ptr<!u64i>, ["z"] {alignment = 8 : i64}
+    %4 = cir.const #cir.int<50> : !s32i
+    cir.store %4, %1 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.const #cir.int<3> : !s32i
+    cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+    cir.br ^bb1
+  ^bb1:
+    %6 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["msg"] {alignment = 8 : i64}
+    %7 = cir.alloca !s32i, !cir.ptr<!s32i>, ["idx"] {alignment = 4 : i64}
+    cir.br ^bb2
+  ^bb2:
+    %8 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %9 = cir.const #cir.int<4> : !s32i
+    cir.store %9, %8 : !s32i, !cir.ptr<!s32i>
+    %10 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %11 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    %12 = cir.try_call @_Z8divisionii(%10, %11) ^bb3, ^bb4 : (!s32i, !s32i) -> !cir.double
+    // LLVM: invoke double @_Z8divisionii
+    // LLVM:     to label %[[CONT:.*]] unwind label %[[UNWIND:.*]],
+  ^bb3:
+    // LLVM: [[CONT]]:
+    %13 = cir.cast float_to_int %12 : !cir.double -> !u64i
+    cir.store %13, %3 : !u64i, !cir.ptr<!u64i>
+    %14 = cir.load %8 : !cir.ptr<!s32i>, !s32i
+    %15 = cir.unary(inc, %14) : !s32i, !s32i
+    cir.store %15, %8 : !s32i, !cir.ptr<!s32i>
+    cir.br ^bb10
+  ^bb4:
+    // LLVM: [[UNWIND]]:
+    // LLVM: %[[EHINFO:.*]] = landingpad { ptr, i32 }
+    // LLVM:     catch ptr @_ZTIi
+    // LLVM:     catch ptr @_ZTIPKc
+    %exception_ptr, %type_id = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+    // LLVM: extractvalue { ptr, i32 } %[[EHINFO]], 0
+    // LLVM: extractvalue { ptr, i32 } %[[EHINFO]], 1
+    cir.br ^bb5(%exception_ptr, %type_id : !cir.ptr<!void>, !u32i)
+  ^bb5(%16: !cir.ptr<!void>, %17: !u32i):
+    %18 = cir.eh.typeid @_ZTIi
+    // LLVM: call i32 @llvm.eh.typeid.for.p0(ptr @_ZTIi)
+    %19 = cir.cmp(eq, %17, %18) : !u32i, !cir.bool
+    cir.brcond %19 ^bb6(%16 : !cir.ptr<!void>), ^bb7(%16, %17 : !cir.ptr<!void>, !u32i)
+  ^bb6(%20: !cir.ptr<!void>):
+    %21 = cir.catch_param begin %20 -> !cir.ptr<!s32i>
+    // LLVM: %[[EH_IDX:.*]] = phi ptr
+    // LLVM: call ptr @__cxa_begin_catch(ptr %[[EH_IDX]])
+    %22 = cir.load %21 : !cir.ptr<!s32i>, !s32i
+    cir.store %22, %7 : !s32i, !cir.ptr<!s32i>
+    %23 = cir.const #cir.int<98> : !s32i
+    %24 = cir.cast integral %23 : !s32i -> !u64i
+    cir.store %24, %3 : !u64i, !cir.ptr<!u64i>
+    %25 = cir.load %7 : !cir.ptr<!s32i>, !s32i
+    %26 = cir.unary(inc, %25) : !s32i, !s32i
+    cir.store %26, %7 : !s32i, !cir.ptr<!s32i>
+    cir.catch_param end
+    // LLVM: call void @__cxa_end_catch()
+    cir.br ^bb10
+  ^bb7(%27: !cir.ptr<!void>, %28: !u32i):
+    %29 = cir.eh.typeid @_ZTIPKc
+    // LLVM: call i32 @llvm.eh.typeid.for.p0(ptr @_ZTIPKc)
+    %30 = cir.cmp(eq, %28, %29) : !u32i, !cir.bool
+    cir.brcond %30 ^bb8(%27 : !cir.ptr<!void>), ^bb9(%27, %28 : !cir.ptr<!void>, !u32i)
+  ^bb8(%31: !cir.ptr<!void>):
+    %32 = cir.catch_param begin %31 -> !cir.ptr<!s8i>
+    // LLVM: %[[EH_MSG:.*]] = phi ptr
+    // LLVM: call ptr @__cxa_begin_catch(ptr %[[EH_MSG]])
+    cir.store %32, %6 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+    %33 = cir.const #cir.int<99> : !s32i
+    %34 = cir.cast integral %33 : !s32i -> !u64i
+    cir.store %34, %3 : !u64i, !cir.ptr<!u64i>
+    %35 = cir.load %6 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+    %36 = cir.const #cir.int<0> : !s32i
+    %37 = cir.ptr_stride %35, %36 : (!cir.ptr<!s8i>, !s32i) -> !cir.ptr<!s8i>
+    cir.catch_param end
+    // LLVM: call void @__cxa_end_catch()
+    cir.br ^bb10
+  ^bb9(%38: !cir.ptr<!void>, %39: !u32i):
+    // LLVM: %[[RESUME_EH:.*]] = phi ptr
+    // LLVM: %[[RESUME_SEL:.*]] = phi i32
+    // LLVM: %[[RES0:.*]] = insertvalue { ptr, i32 } poison, ptr %[[RESUME_EH]], 0
+    // LLVM: %[[RES1:.*]] = insertvalue { ptr, i32 } %[[RES0]], i32 %[[RESUME_SEL]], 1
+    // LLVM: resume { ptr, i32 } %[[RES1]]
+    cir.resume.flat %38, %39
+  ^bb10:
+    %40 = cir.load %3 : !cir.ptr<!u64i>, !u64i
+    cir.store %40, %0 : !u64i, !cir.ptr<!u64i>
+    %41 = cir.load %0 : !cir.ptr<!u64i>, !u64i
+    cir.return %41 : !u64i
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/expect.cir b/clang/test/CIR/Incubator/Lowering/expect.cir
new file mode 100644
index 0000000000000..346b097982fe9
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/expect.cir
@@ -0,0 +1,54 @@
+// RUN: cir-opt %s -cir-to-llvm | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+!s64i = !cir.int<s, 64>
+module {
+  cir.func @foo(%arg0: !s64i) {
+    %0 = cir.const #cir.int<1> : !s64i
+    %1 = cir.expect(%arg0, %0) : !s64i
+    %2 = cir.cast int_to_bool %1 : !s64i -> !cir.bool
+    cir.if %2 {
+      cir.yield
+    }
+    %3 = cir.expect(%arg0, %0, 1.000000e-01) : !s64i
+    %4 = cir.cast int_to_bool %3 : !s64i -> !cir.bool
+    cir.if %4 {
+      cir.yield
+    }
+    cir.return
+  }
+}
+
+// MLIR:  llvm.func @foo(%arg0: i64)
+// MLIR:    [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64
+// MLIR:    [[EXPECT:%.*]] = llvm.intr.expect %arg0, [[ONE]] : i64
+// MLIR:    [[ZERO:%.*]] = llvm.mlir.constant(0 : i64) : i64
+// MLIR:    [[CMP_NE:%.*]] = llvm.icmp "ne" [[EXPECT]], [[ZERO]] : i64
+// MLIR:    llvm.cond_br [[CMP_NE]], ^bb1, ^bb2
+// MLIR:  ^bb1:  // pred: ^bb0
+// MLIR:    llvm.br ^bb2
+// MLIR:  ^bb2:  // 2 preds: ^bb0, ^bb1
+// MLIR:    [[EXPECT_WITH_PROB:%.*]] = llvm.intr.expect.with.probability %arg0, [[ONE]], 1.000000e-01 : i64
+// MLIR:    [[ZERO:%.*]] = llvm.mlir.constant(0 : i64) : i64
+// MLIR:    [[CMP_NE:%.*]] = llvm.icmp "ne" [[EXPECT_WITH_PROB]], [[ZERO]] : i64
+// MLIR:    llvm.cond_br [[CMP_NE]], ^bb3, ^bb4
+// MLIR:  ^bb3:  // pred: ^bb2
+// MLIR:    llvm.br ^bb4
+// MLIR:  ^bb4:  // 2 preds: ^bb2, ^bb3
+// MLIR:    llvm.return
+
+// LLVM:  define void @foo(i64 %0)
+// LLVM:    [[EXPECT:%.*]] = call i64 @llvm.expect.i64(i64 %0, i64 1)
+// LLVM:    [[CMP_NE:%.*]] = icmp ne i64 [[EXPECT]], 0
+// LLVM:    br i1 [[CMP_NE]], label %4, label %5
+// LLVM:  4:
+// LLVM:    br label %5
+// LLVM:  5:
+// LLVM:    [[EXPECT_WITH_PROB:%.*]] = call i64 @llvm.expect.with.probability.i64(i64 %0, i64 1, double 1.000000e-01)
+// LLVM:    [[CMP_NE:%.*]] = icmp ne i64 [[EXPECT_WITH_PROB]], 0
+// LLVM:    br i1 [[CMP_NE]], label %8, label %9
+// LLVM:  8:
+// LLVM:    br label %9
+// LLVM:  9:
+// LLVM:    ret void
+
diff --git a/clang/test/CIR/Incubator/Lowering/float.cir b/clang/test/CIR/Incubator/Lowering/float.cir
new file mode 100644
index 0000000000000..aebc9b8050c36
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/float.cir
@@ -0,0 +1,18 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+
+module {
+  cir.func @test() {
+    // %0 = cir.const 1.0 : f16
+    // DISABLED-CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : f16) : f16
+    %1 = cir.const #cir.fp<1.0> : !cir.float
+    // CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : f32) : f32
+    %2 = cir.const #cir.fp<1.0> : !cir.double
+    // CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : f64) : f64
+    %3 = cir.const #cir.fp<1.0> : !cir.long_double<!cir.f80>
+    // CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : f80) : f80
+    // %5 = cir.const 1.0 : bf16
+    // DISABLED-CHECK: %{{.+}} = llvm.mlir.constant(1.000000e+00 : bf16) : bf16
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/func-call-conv.cir b/clang/test/CIR/Incubator/Lowering/func-call-conv.cir
new file mode 100644
index 0000000000000..577eb854d47b8
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/func-call-conv.cir
@@ -0,0 +1,20 @@
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll --check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+    // LLVM: define void @foo()
+    cir.func @foo() cc(c) {
+        cir.return
+    }
+
+    // LLVM: define spir_kernel void @bar()
+    cir.func @bar() cc(spir_kernel) {
+        cir.return
+    }
+
+    // LLVM: define spir_func void @baz()
+    cir.func @baz() cc(spir_function) {
+        cir.return
+    }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/func.cir b/clang/test/CIR/Incubator/Lowering/func.cir
new file mode 100644
index 0000000000000..9eb70b7c43e1f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/func.cir
@@ -0,0 +1,17 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck %s -check-prefix=MLIR --input-file=%t.mlir
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func no_proto private @noProto3(...) -> !s32i
+  // MLIR: llvm.func @noProto3(...) -> i32
+  cir.func @test3(%arg0: !s32i) {
+    %3 = cir.get_global @noProto3 : !cir.ptr<!cir.func<(...) -> !s32i>>
+    // MLIR: %[[#FN_PTR:]] = llvm.mlir.addressof @noProto3 : !llvm.ptr
+    %4 = cir.cast bitcast %3 : !cir.ptr<!cir.func<(...) -> !s32i>> -> !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+    // MLIR: %[[#FUNC:]] = llvm.bitcast %[[#FN_PTR]] : !llvm.ptr to !llvm.ptr
+    %5 = cir.call %4(%arg0) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+    // MLIR: %{{.+}} = llvm.call %[[#FUNC]](%{{.+}}) : !llvm.ptr, (i32) -> i32
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/global-ptr.c b/clang/test/CIR/Incubator/Lowering/global-ptr.c
new file mode 100644
index 0000000000000..30ba05f6769fb
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/global-ptr.c
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+// LLVM: %struct.S1 = type { [3200 x double], [3200 x double] }
+// LLVM: %struct.S2 = type { [10 x ptr] }
+// LLVM: %struct.S3 = type { [2000 x i32], [2000 x i32], [2000 x i32] }
+// LLVM: %struct.S4 = type { i32, i32, i32 }
+// LLVM: %union.U1 = type { [2000 x i32] }
+
+// Note: GEP emitted by cir might not be the same as LLVM, due to constant folding.
+// LLVM: @s1 = global %struct.S1 zeroinitializer, align 8
+// LLVM: @b1 = global ptr getelementptr inbounds nuw (i8, ptr @s1, i64 25600), align 8
+// LLVM: @s2 = global %struct.S2 zeroinitializer, align 8
+// LLVM: @b2 = global ptr @s2, align 8
+// LLVM: @s3 = global %struct.S3 zeroinitializer, align 4
+// LLVM: @b3 = global ptr getelementptr inbounds nuw (i8, ptr @s3, i64 16000), align 8
+// LLVM: @s4 = global %struct.S4 zeroinitializer, align 4
+// LLVM: @b4 = global ptr getelementptr inbounds nuw (i8, ptr @s4, i64 8), align 8
+// LLVM: @u1 = global %union.U1 zeroinitializer, align 4
+// LLVM: @b5 = global ptr @u1, align 8
+
+struct S1 {
+  double a[3200];
+  double b[3200];
+} s1;
+
+double *b1 = s1.b;
+
+struct S2 {
+  double* a[10];
+} s2;
+
+double **b2 = s2.a;
+
+struct S3 {
+  int a[2000];
+  int b[2000];
+  int c[2000];
+} s3;
+
+int *b3 = s3.c;
+
+struct S4 {
+    int a, b, c;
+} s4;
+
+int* b4 = &s4.c;
+
+union U1 {
+  int a[2000];
+  int b[2000];
+  int c[2000];
+} u1;
+
+int *b5 = u1.a;
diff --git a/clang/test/CIR/Incubator/Lowering/globals.cir b/clang/test/CIR/Incubator/Lowering/globals.cir
new file mode 100644
index 0000000000000..6c988b7dd635f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/globals.cir
@@ -0,0 +1,218 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+!void = !cir.void
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!s8i = !cir.int<s, 8>
+!u32i = !cir.int<u, 32>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!rec_A = !cir.record<struct "A" {!s32i, !cir.array<!cir.array<!s32i x 2> x 2>} #cir.record.decl.ast>
+!rec_Bar = !cir.record<struct "Bar" {!s32i, !s8i} #cir.record.decl.ast>
+!rec_StringStruct = !cir.record<struct "StringStruct" {!cir.array<!s8i x 3>, !cir.array<!s8i x 3>, !cir.array<!s8i x 3>} #cir.record.decl.ast>
+!rec_StringStructPtr = !cir.record<struct "StringStructPtr" {!cir.ptr<!s8i>} #cir.record.decl.ast>
+!rec_anon2E1_ = !cir.record<struct "anon.1" {!cir.ptr<!cir.func<(!cir.int<s, 32>)>>} #cir.record.decl.ast>
+
+module {
+  cir.global external @a = #cir.int<3> : !s32i
+  cir.global external @c = #cir.int<2> : !u64i
+  cir.global external @y = #cir.fp<3.400000e+00> : !cir.float
+  cir.global external @w = #cir.fp<4.300000e+00> : !cir.double
+  cir.global external @x = #cir.int<51> : !s8i
+  cir.global external @rgb = #cir.const_array<[#cir.int<0> : !u8i, #cir.int<233> : !u8i, #cir.int<33> : !u8i]> : !cir.array<!u8i x 3>
+  cir.global external @alpha = #cir.const_array<[#cir.int<97> : !s8i, #cir.int<98> : !s8i, #cir.int<99> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 4>
+  cir.global "private" constant internal @".str" = #cir.const_array<"example\00" : !cir.array<!s8i x 8>> : !cir.array<!s8i x 8> {alignment = 1 : i64}
+  cir.global external @s = #cir.global_view<@".str"> : !cir.ptr<!s8i>
+  cir.global external @s_addr = #cir.global_view<@".str"> : !u64i
+  // MLIR: llvm.mlir.global internal constant @".str"("example\00")
+  // MLIR-SAME: {addr_space = 0 : i32, alignment = 1 : i64}
+  // MLIR: llvm.mlir.global external @s() {addr_space = 0 : i32} : !llvm.ptr {
+  // MLIR:   %0 = llvm.mlir.addressof @".str" : !llvm.ptr
+  // MLIR:   %1 = llvm.bitcast %0 : !llvm.ptr to !llvm.ptr
+  // MLIR:   llvm.return %1 : !llvm.ptr
+  // MLIR: }
+  // MLIR: llvm.mlir.global external @s_addr() {addr_space = 0 : i32} : i64 {
+  // MLIR:   %0 = llvm.mlir.addressof @".str" : !llvm.ptr
+  // MLIR:   %1 = llvm.ptrtoint %0 : !llvm.ptr to i64
+  // MLIR:   llvm.return %1 : i64
+  // MLIR: }
+  // LLVM: @.str = internal constant [8 x i8] c"example\00"
+  // LLVM: @s = global ptr @.str
+  // LLVM: @s_addr = global i64 ptrtoint (ptr @.str to i64)
+  cir.global external @aPtr = #cir.global_view<@a> : !cir.ptr<!s32i>
+  // MLIR: llvm.mlir.global external @aPtr() {addr_space = 0 : i32} : !llvm.ptr {
+  // MLIR:   %0 = llvm.mlir.addressof @a : !llvm.ptr
+  // MLIR:   llvm.return %0 : !llvm.ptr
+  // MLIR: }
+  cir.global "private" constant internal @".str.1" = #cir.const_array<"example1\00" : !cir.array<!s8i x 9>> : !cir.array<!s8i x 9> {alignment = 1 : i64}
+  cir.global external @s1 = #cir.global_view<@".str.1"> : !cir.ptr<!s8i>
+  cir.global external @s2 = #cir.global_view<@".str"> : !cir.ptr<!s8i>
+  cir.func @_Z10use_globalv() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["li", init] {alignment = 4 : i64}
+    %1 = cir.get_global @a : !cir.ptr<!s32i>
+    %2 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+  cir.func @_Z17use_global_stringv() {
+    %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["c", init] {alignment = 1 : i64}
+    %1 = cir.get_global @s2 : !cir.ptr<!cir.ptr<!s8i>>
+    %2 = cir.load %1 : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.ptr_stride %2, %3 : (!cir.ptr<!s8i>, !s32i) -> !cir.ptr<!s8i>
+    %5 = cir.load %4 : !cir.ptr<!s8i>, !s8i
+    %6 = cir.cast integral %5 : !s8i -> !u8i
+    cir.store %6, %0 : !u8i, !cir.ptr<!u8i>
+    cir.return
+  }
+  cir.func linkonce_odr @_Z4funcIiET_v() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %2 : !s32i
+  }
+  cir.func @_Z8use_funcv() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %1 = cir.call @_Z4funcIiET_v() : () -> !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %2 : !s32i
+  }
+  cir.global external @string = #cir.const_array<[#cir.int<119> : !s8i, #cir.int<104> : !s8i, #cir.int<97> : !s8i, #cir.int<116> : !s8i, #cir.int<110> : !s8i, #cir.int<111> : !s8i, #cir.int<119> : !s8i, #cir.int<0> : !s8i]> : !cir.array<!s8i x 8>
+  // MLIR: llvm.mlir.global external @string(dense<[119, 104, 97, 116, 110, 111, 119, 0]> : tensor<8xi8>) {addr_space = 0 : i32} : !llvm.array<8 x i8>
+  cir.global external @uint = #cir.const_array<[#cir.int<255> : !u32i]> : !cir.array<!u32i x 1>
+  // MLIR: llvm.mlir.global external @uint(dense<255> : tensor<1xi32>) {addr_space = 0 : i32} : !llvm.array<1 x i32>
+  cir.global external @sshort = #cir.const_array<[#cir.int<11111> : !s16i, #cir.int<22222> : !s16i]> : !cir.array<!s16i x 2>
+  // MLIR: llvm.mlir.global external @sshort(dense<[11111, 22222]> : tensor<2xi16>) {addr_space = 0 : i32} : !llvm.array<2 x i16>
+  cir.global external @sint = #cir.const_array<[#cir.int<123> : !s32i, #cir.int<456> : !s32i, #cir.int<789> : !s32i]> : !cir.array<!s32i x 3>
+  // MLIR: llvm.mlir.global external @sint(dense<[123, 456, 789]> : tensor<3xi32>) {addr_space = 0 : i32} : !llvm.array<3 x i32>
+  cir.global external @ll = #cir.const_array<[#cir.int<999999999> : !s64i, #cir.int<0> : !s64i, #cir.int<0> : !s64i, #cir.int<0> : !s64i]> : !cir.array<!s64i x 4>
+  // MLIR: llvm.mlir.global external @ll(dense<[999999999, 0, 0, 0]> : tensor<4xi64>) {addr_space = 0 : i32} : !llvm.array<4 x i64>
+  cir.global external @twoDim = #cir.const_array<[#cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>, #cir.const_array<[#cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.array<!s32i x 2>]> : !cir.array<!cir.array<!s32i x 2> x 2>
+  // MLIR: llvm.mlir.global external @twoDim(dense<{{\[\[}}1, 2], [3, 4{{\]\]}}> : tensor<2x2xi32>) {addr_space = 0 : i32} : !llvm.array<2 x array<2 x i32>>
+
+  // The following tests check direclty the resulting LLVM IR because the MLIR
+  // version is two long. Always prefer the MLIR prefix when possible.
+  cir.global external @nestedTwoDim = #cir.const_record<{#cir.int<1> : !s32i, #cir.const_array<[#cir.const_array<[#cir.int<2> : !s32i, #cir.int<3> : !s32i]> : !cir.array<!s32i x 2>, #cir.const_array<[#cir.int<4> : !s32i, #cir.int<5> : !s32i]> : !cir.array<!s32i x 2>]> : !cir.array<!cir.array<!s32i x 2> x 2>}> : !rec_A
+  // LLVM: @nestedTwoDim = global %struct.A { i32 1, [2 x [2 x i32{{\]\] \[\[}}2 x i32] [i32 2, i32 3], [2 x i32] [i32 4, i32 5{{\]\]}} }
+  cir.global external @nestedString = #cir.const_record<{#cir.const_array<"1\00\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>, #cir.const_array<"\00\00\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>, #cir.const_array<"\00\00\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>}> : !rec_StringStruct
+  // LLVM: @nestedString = global %struct.StringStruct { [3 x i8] c"1\00\00", [3 x i8] zeroinitializer, [3 x i8] zeroinitializer }
+  cir.global external @nestedStringPtr = #cir.const_record<{#cir.global_view<@".str"> : !cir.ptr<!s8i>}> : !rec_StringStructPtr
+  // LLVM: @nestedStringPtr = global %struct.StringStructPtr { ptr @.str }
+
+  cir.func @_Z11get_globalsv() {
+    %0 = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["s", init] {alignment = 8 : i64}
+    %1 = cir.alloca !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>, ["u", init] {alignment = 8 : i64}
+    %2 = cir.alloca !cir.ptr<!s16i>, !cir.ptr<!cir.ptr<!s16i>>, ["ss", init] {alignment = 8 : i64}
+    %3 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["si", init] {alignment = 8 : i64}
+    %4 = cir.alloca !cir.ptr<!s64i>, !cir.ptr<!cir.ptr<!s64i>>, ["l", init] {alignment = 8 : i64}
+    %5 = cir.get_global @string : !cir.ptr<!cir.array<!s8i x 8>>
+    %6 = cir.cast array_to_ptrdecay %5 : !cir.ptr<!cir.array<!s8i x 8>> -> !cir.ptr<!s8i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @string : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i8
+    cir.store %6, %0 : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+    %7 = cir.get_global @uint : !cir.ptr<!cir.array<!u32i x 1>>
+    %8 = cir.cast array_to_ptrdecay %7 : !cir.ptr<!cir.array<!u32i x 1>> -> !cir.ptr<!u32i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @uint : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i32
+    cir.store %8, %1 : !cir.ptr<!u32i>, !cir.ptr<!cir.ptr<!u32i>>
+    %9 = cir.get_global @sshort : !cir.ptr<!cir.array<!s16i x 2>>
+    %10 = cir.cast array_to_ptrdecay %9 : !cir.ptr<!cir.array<!s16i x 2>> -> !cir.ptr<!s16i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @sshort : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i16
+    cir.store %10, %2 : !cir.ptr<!s16i>, !cir.ptr<!cir.ptr<!s16i>>
+    %11 = cir.get_global @sint : !cir.ptr<!cir.array<!s32i x 3>>
+    %12 = cir.cast array_to_ptrdecay %11 : !cir.ptr<!cir.array<!s32i x 3>> -> !cir.ptr<!s32i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @sint : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i32
+    cir.store %12, %3 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    %13 = cir.get_global @ll : !cir.ptr<!cir.array<!s64i x 4>>
+    %14 = cir.cast array_to_ptrdecay %13 : !cir.ptr<!cir.array<!s64i x 4>> -> !cir.ptr<!s64i>
+    // MLIR: %[[RES:[0-9]+]] = llvm.mlir.addressof @ll : !llvm.ptr
+    // MLIR: %{{[0-9]+}} = llvm.getelementptr %[[RES]][0] : (!llvm.ptr) -> !llvm.ptr, i64
+    cir.store %14, %4 : !cir.ptr<!s64i>, !cir.ptr<!cir.ptr<!s64i>>
+    cir.return
+  }
+  cir.global external @flt = #cir.const_array<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00> : !cir.float]> : !cir.array<!cir.float x 2>
+  cir.global external @zeroInitFlt = #cir.const_array<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.array<!cir.float x 2>
+  // MLIR: llvm.mlir.global external @flt(dense<[1.000000e+00, 2.000000e+00]> : tensor<2xf32>) {addr_space = 0 : i32} : !llvm.array<2 x f32>
+  // MLIR: llvm.mlir.global external @zeroInitFlt(dense<0.000000e+00> : tensor<2xf32>) {addr_space = 0 : i32} : !llvm.array<2 x f32>
+  cir.global "private" internal @staticVar = #cir.int<0> : !s32i
+  // MLIR: llvm.mlir.global internal @staticVar(0 : i32) {addr_space = 0 : i32} : i32
+  cir.global external @nullPtr = #cir.ptr<null> : !cir.ptr<!s32i>
+  // MLIR: llvm.mlir.global external @nullPtr()
+  // MLIR:   %0 = llvm.mlir.zero : !llvm.ptr
+  // MLIR:   llvm.return %0 : !llvm.ptr
+  // MLIR: }
+  cir.global external @zeroStruct = #cir.zero : !rec_Bar
+  // MLIR: llvm.mlir.global external @zeroStruct(#llvm.zero)
+  cir.global common @comm = #cir.int<0> : !s32i
+  // MLIR: llvm.mlir.global common @comm(0 : i32) {addr_space = 0 : i32} : i32
+
+  cir.global external @undefStruct = #cir.undef : !rec_Bar
+  // MLIR: llvm.mlir.global external @undefStruct()
+  // MLIR:   %0 = llvm.mlir.undef : !llvm.struct<"struct.Bar", (i32, i8)>
+  // MLIR:   llvm.return %0 : !llvm.struct<"struct.Bar", (i32, i8)>
+  // MLIR: }
+  // LLVM: @undefStruct = global %struct.Bar undef
+
+  cir.global "private" internal @Handlers = #cir.const_array<[#cir.const_record<{#cir.global_view<@myfun> : !cir.ptr<!cir.func<(!s32i)>>}> : !rec_anon2E1_]> : !cir.array<!rec_anon2E1_ x 1>
+  cir.func internal private dso_local @myfun(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+  cir.func @foo(%arg0: !s32i, %arg1: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["flag", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.get_global @Handlers : !cir.ptr<!cir.array<!rec_anon2E1_ x 1>>
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.cast array_to_ptrdecay %2 : !cir.ptr<!cir.array<!rec_anon2E1_ x 1>> -> !cir.ptr<!rec_anon2E1_>
+    %5 = cir.ptr_stride %4, %3 : (!cir.ptr<!rec_anon2E1_>, !s32i) -> !cir.ptr<!rec_anon2E1_>
+    %6 = cir.get_member %5[0] {name = "func"} : !cir.ptr<!rec_anon2E1_> -> !cir.ptr<!cir.ptr<!cir.func<(!s32i)>>>
+    %7 = cir.load %6 : !cir.ptr<!cir.ptr<!cir.func<(!s32i)>>>, !cir.ptr<!cir.func<(!s32i)>>
+    %8 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.call %7(%8) : (!cir.ptr<!cir.func<(!s32i)>>, !s32i) -> ()
+    cir.return
+  }
+  //MLIR-LABEL: @foo
+  //MLIR:  %[[RES4:.*]] = llvm.mlir.addressof @Handlers : !llvm.ptr
+  //MLIR:  %[[LOAD:.*]] = llvm.load {{.*}} {alignment = 4 : i64} : !llvm.ptr -> i32
+  //MLIR:  %[[RES6:.*]] = llvm.getelementptr %[[RES4]][0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.anon.1", (ptr)>
+  //MLIR:  %[[RES5:.*]] = llvm.sext %[[LOAD]] : i32 to i64
+  //MLIR:  %[[RES7:.*]] = llvm.getelementptr %[[RES6]][%[[RES5]]] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<"struct.anon.1", (ptr)>
+  //MLIR:  %[[RES8:.*]] = llvm.getelementptr %[[RES7]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.anon.1", (ptr)>
+  //MLIR:  %[[RES9:.*]] = llvm.load %[[RES8]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+  //MLIR:  llvm.call %[[RES9]]({{.*}}) : !llvm.ptr, (i32) -> ()
+
+  cir.global external @zero_array = #cir.zero : !cir.array<!s32i x 16>
+  cir.func @use_zero_array() {
+    %0 = cir.const #cir.global_view<@zero_array> : !cir.ptr<!s32i>
+    %1 = cir.const #cir.int<0> : !s32i
+    %2 = cir.ptr_stride %0, %1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %3 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.return
+  }
+  // MLIR:  %0 = llvm.mlir.addressof @zero_array
+
+  cir.func @global_view_as_integer() -> !u64i {
+    %0 = cir.const #cir.global_view<@".str"> : !u64i
+    cir.return %0 : !u64i
+  }
+  // MLIR-LABEL: @global_view_as_integer
+  // MLIR-NEXT:    %0 = llvm.mlir.addressof @".str" : !llvm.ptr
+  // MLIR-NEXT:    %1 = llvm.ptrtoint %0 : !llvm.ptr to i64
+  // MLIR-NEXT:    llvm.return %1 : i64
+  // MLIR-NEXT:  }
+  // LLVM-LABEL: @global_view_as_integer
+  // LLVM-NEXT:    ret i64 ptrtoint (ptr @.str to i64)
+  // LLVM-NEXT:  }
+
+}
diff --git a/clang/test/CIR/Incubator/Lowering/goto-interscope.c b/clang/test/CIR/Incubator/Lowering/goto-interscope.c
new file mode 100644
index 0000000000000..bcaf89d506904
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/goto-interscope.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s
+struct def;
+typedef struct def *decl;
+struct def {
+  int index;
+};
+struct def d;
+int foo(unsigned char cond, unsigned num)
+{
+  if (cond)
+    goto label;
+  {
+    decl b = &d;
+    label:
+      return b->index;
+  }
+
+  {
+    int a[num];
+    if (num > 0)
+      return a[0] + a[1];
+  }
+  return 0;
+}
+// It is fine enough to check the LLVM IR are generated succesfully.
+// CHECK: define {{.*}}i32 @foo
+// CHECK: alloca ptr
+// CHECK: alloca i8
+// Check the dynamic alloca is not hoisted and live in a seperate block.
+// CHECK: :
+// Check we have a dynamic alloca
+// CHECK: alloca i32, i64 %{{.*}}
diff --git a/clang/test/CIR/Incubator/Lowering/goto.cir b/clang/test/CIR/Incubator/Lowering/goto.cir
new file mode 100644
index 0000000000000..cd3a57d2e7138
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/goto.cir
@@ -0,0 +1,52 @@
+// RUN: cir-opt %s --pass-pipeline='builtin.module(cir-to-llvm,canonicalize{region-simplify=disabled})' -o - | FileCheck %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+
+module {
+
+  cir.func @gotoFromIf(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %6 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      %7 = cir.const #cir.int<5> : !s32i
+      %8 = cir.cmp(gt, %6, %7) : !s32i, !cir.bool
+      cir.if %8 {
+        cir.goto "err"
+      }
+    }
+    %2 = cir.const #cir.int<0> : !s32i
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+    cir.br ^bb1
+  ^bb1:
+    %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %3 : !s32i
+  ^bb2:
+    cir.label "err"
+    %4 = cir.const #cir.int<1> : !s32i
+    %5 = cir.unary(minus, %4) : !s32i, !s32i
+    cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+    cir.br ^bb1
+  }
+
+// MLIR:  llvm.func @gotoFromIf
+// MLIR:    %[[#One:]] = llvm.mlir.constant(1 : i32) : i32
+// MLIR:    %[[#Zero:]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR:    llvm.cond_br {{.*}}, ^bb[[#COND_YES:]], ^bb[[#COND_NO:]]
+// MLIR:  ^bb[[#COND_YES]]:
+// MLIR:    llvm.br ^bb[[#GOTO_BLK:]]
+// MLIR:   ^bb[[#COND_NO]]:
+// MLIR:    llvm.br ^bb[[#BLK:]]
+// MLIR:  ^bb[[#BLK]]:
+// MLIR:    llvm.store %[[#Zero]], %[[#Ret_val_addr:]] {{.*}}: i32, !llvm.ptr
+// MLIR:    llvm.br ^bb[[#RETURN:]]
+// MLIR:  ^bb[[#RETURN]]:
+// MLIR:    %[[#Ret_val:]] = llvm.load %[[#Ret_val_addr]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:    llvm.return %[[#Ret_val]] : i32
+// MLIR:  ^bb[[#GOTO_BLK]]:
+// MLIR:    %[[#Neg_one:]] = llvm.sub %[[#Zero]], %[[#One]]  : i32
+// MLIR:    llvm.store %[[#Neg_one]], %[[#Ret_val_addr]] {{.*}}: i32, !llvm.ptr
+// MLIR:    llvm.br ^bb[[#RETURN]]
+// MLIR: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/hello.cir b/clang/test/CIR/Incubator/Lowering/hello.cir
new file mode 100644
index 0000000000000..539e2d74974bd
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/hello.cir
@@ -0,0 +1,35 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+!s8i = !cir.int<s, 8>
+module @"/tmp/test.raw" attributes {cir.lang = #cir.lang<c>, cir.sob = #cir.signed_overflow_behavior<undefined>, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i32>>} {
+  cir.func private @printf(!cir.ptr<!s8i>, ...) -> !s32i
+  cir.global "private" constant internal @".str" = #cir.const_array<"Hello, world!\0A\00" : !cir.array<!s8i x 15>> : !cir.array<!s8i x 15> {alignment = 1 : i64}
+  cir.func @main() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %1 = cir.get_global @printf : !cir.ptr<!cir.func<(!cir.ptr<!s8i>, ...) -> !s32i>>
+    %2 = cir.get_global @".str" : !cir.ptr<!cir.array<!s8i x 15>>
+    %3 = cir.cast array_to_ptrdecay %2 : !cir.ptr<!cir.array<!s8i x 15>> -> !cir.ptr<!s8i>
+    %4 = cir.call @printf(%3) : (!cir.ptr<!s8i>) -> !s32i
+    %5 = cir.const #cir.int<0> : !s32i
+    cir.store %5, %0 : !s32i, !cir.ptr<!s32i>
+    %6 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %6 : !s32i
+  }
+}
+
+// CHECK:  llvm.func @printf(!llvm.ptr, ...) -> i32
+// CHECK:  llvm.mlir.global internal constant @".str"("Hello, world!\0A\00")
+// CHECK-SAME: {addr_space = 0 : i32, alignment = 1 : i64}
+// CHECK:  llvm.func @main() -> i32
+// CHECK:    %0 = llvm.mlir.constant(1 : index) : i64
+// CHECK:    %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// CHECK:    %2 = llvm.mlir.addressof @".str" : !llvm.ptr
+// CHECK:    %3 = llvm.getelementptr %2[0] : (!llvm.ptr) -> !llvm.ptr, i8
+// CHECK:    %4 = llvm.call @printf(%3) vararg(!llvm.func<i32 (ptr, ...)>) : (!llvm.ptr) -> i32
+// CHECK:    %5 = llvm.mlir.constant(0 : i32) : i32
+// CHECK:    llvm.store %5, %1 {{.*}} : i32, !llvm.ptr
+// CHECK:    %6 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+// CHECK:    llvm.return %6 : i32
+// CHECK:  }
diff --git a/clang/test/CIR/Incubator/Lowering/if.cir b/clang/test/CIR/Incubator/Lowering/if.cir
new file mode 100644
index 0000000000000..8968e1fa9ae51
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/if.cir
@@ -0,0 +1,99 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @foo(%arg0: !s32i) -> !s32i {
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    cir.if %4 {
+      %5 = cir.const #cir.int<1> : !s32i
+      cir.return %5 : !s32i
+    } else {
+      %5 = cir.const #cir.int<0> : !s32i
+      cir.return %5 : !s32i
+    }
+    cir.return %arg0 : !s32i
+  }
+
+//      MLIR:   llvm.func @foo(%arg0: i32) -> i32
+// MLIR-NEXT:     %0 = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:     %1 = llvm.icmp "ne" %arg0, %0 : i32
+// MLIR-NEXT:     llvm.cond_br %1, ^bb1, ^bb2
+// MLIR-NEXT:   ^bb1:  // pred: ^bb0
+// MLIR-NEXT:     %2 = llvm.mlir.constant(1 : i32) : i32
+// MLIR-NEXT:     llvm.return %2 : i32
+// MLIR-NEXT:   ^bb2:  // pred: ^bb0
+// MLIR-NEXT:     %3 = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:     llvm.return %3 : i32
+// MLIR-NEXT:   ^bb3:  // no predecessors
+// MLIR-NEXT:     llvm.return %arg0 : i32
+// MLIR-NEXT:   }
+
+//       LLVM: define i32 @foo(i32 %0)
+//  LLVM-NEXT:   %2 = icmp ne i32 %0, 0
+//  LLVM-NEXT:   br i1 %2, label %3, label %4
+// LLVM-EMPTY:
+//  LLVM-NEXT: 3:
+//  LLVM-NEXT:   ret i32 1
+// LLVM-EMPTY:
+//  LLVM-NEXT: 4:
+//  LLVM-NEXT:   ret i32 0
+// LLVM-EMPTY:
+//  LLVM-NEXT: 5:
+//  LLVM-NEXT:   ret i32 %0
+//  LLVM-NEXT: }
+
+  cir.func @onlyIf(%arg0: !s32i) -> !s32i {
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    cir.if %4 {
+      %5 = cir.const #cir.int<1> : !s32i
+      cir.return %5 : !s32i
+    }
+    cir.return %arg0 : !s32i
+  }
+
+  //      MLIR: llvm.func @onlyIf(%arg0: i32) -> i32
+  // MLIR-NEXT:   %0 = llvm.mlir.constant(0 : i32) : i32
+  // MLIR-NEXT:   %1 = llvm.icmp "ne" %arg0, %0 : i32
+  // MLIR-NEXT:   llvm.cond_br %1, ^bb1, ^bb2
+  // MLIR-NEXT: ^bb1:  // pred: ^bb0
+  // MLIR-NEXT:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // MLIR-NEXT:   llvm.return %2 : i32
+  // MLIR-NEXT: ^bb2:  // pred: ^bb0
+  // MLIR-NEXT:   llvm.return %arg0 : i32
+  // MLIR-NEXT: }
+
+  // Verify empty if clause is properly lowered to empty block
+  cir.func @emptyIfClause(%arg0: !s32i) -> !s32i {
+    // MLIR-LABEL: llvm.func @emptyIfClause
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    // MLIR: llvm.cond_br {{%.*}}, ^[[T:.*]], ^[[PHI:.*]]
+    cir.if %4 {
+      // MLIR-NEXT: ^[[T]]:
+      // MLIR-NEXT:   llvm.br ^[[PHI]]
+    }
+    // MLIR-NEXT: ^[[PHI]]:
+    // MLIR-NEXT:   llvm.return
+    cir.return %arg0 : !s32i
+  }
+
+  // Verify empty if-else clauses are properly lowered to empty blocks
+  // TODO: Fix reversed order of blocks in the test once Issue clangir/#1094 is
+  // addressed
+  cir.func @emptyIfElseClause(%arg0: !s32i) -> !s32i {
+    // MLIR-LABEL: llvm.func @emptyIfElseClause
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    // MLIR: llvm.cond_br {{%.*}}, ^[[T:.*]], ^[[F:.*]]
+    cir.if %4 {
+    // MLIR-NEXT: ^[[T]]:
+    // MLIR-NEXT:   llvm.br ^[[PHI:.*]]
+    } else {
+    // MLIR-NEXT: ^[[F]]:
+    // MLIR-NEXT:   llvm.br ^[[PHI]]
+    }
+    // MLIR-NEXT: ^[[PHI]]:
+    // MLIR-NEXT:   llvm.return
+    cir.return %arg0 : !s32i
+  }
+
+}
diff --git a/clang/test/CIR/Incubator/Lowering/int-wrap.cir b/clang/test/CIR/Incubator/Lowering/int-wrap.cir
new file mode 100644
index 0000000000000..f885e745004b6
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/int-wrap.cir
@@ -0,0 +1,24 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @test(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["len", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %2 = cir.const #cir.int<42> : !s32i
+    %3 = cir.binop(sub, %1, %2) nsw : !s32i
+    %4 = cir.binop(sub, %1, %2) nuw : !s32i
+    %5 = cir.binop(sub, %1, %2) : !s32i
+    cir.return
+  }
+}
+
+// MLIR:      llvm.sub {{.*}}, {{.*}} overflow<nsw>  : i32
+// MLIR-NEXT: llvm.sub {{.*}}, {{.*}} overflow<nuw>  : i32
+// MLIR-NEXT: llvm.sub {{.*}}, {{.*}}  : i32
+
+// LLVM:      sub nsw i32 {{.*}}, {{.*}}
+// LLVM-NEXT: sub nuw i32 {{.*}}, {{.*}}
+// LLVM-NEXT: sub i32 {{.*}}, {{.*}}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/intrinsics.cir b/clang/test/CIR/Incubator/Lowering/intrinsics.cir
new file mode 100644
index 0000000000000..1d91cd8f89648
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/intrinsics.cir
@@ -0,0 +1,23 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+
+module {
+  cir.func @test_unreachable() {
+    cir.unreachable
+  }
+
+  //      MLIR: llvm.func @test_unreachable()
+  // MLIR-NEXT:   llvm.unreachable
+
+  cir.func @test_trap() {
+    cir.trap
+  }
+
+  //      MLIR: llvm.func @test_trap()
+  // MLIR-NEXT:   llvm.intr.trap
+  // MLIR-NEXT:   llvm.unreachable
+
+  //      LLVM: define void @test_trap()
+  // LLVM-NEXT:   call void @llvm.trap()
+  // LLVM-NEXT:   unreachable
+}
diff --git a/clang/test/CIR/Incubator/Lowering/libc.cir b/clang/test/CIR/Incubator/Lowering/libc.cir
new file mode 100644
index 0000000000000..5be5d44cd3c69
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/libc.cir
@@ -0,0 +1,18 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!void = !cir.void
+!u64i = !cir.int<u, 64>
+module {
+  cir.func @shouldLowerLibcMemcpyBuiltin(%arg0: !cir.ptr<!void>, %arg1: !cir.ptr<!void>, %arg2: !u64i) {
+    cir.libc.memcpy %arg2 bytes from %arg0 to %arg1 : !u64i, !cir.ptr<!void> -> !cir.ptr<!void>
+    // CHECK: "llvm.intr.memcpy"(%{{.+}}, %{{.+}}, %{{.+}}) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
+    cir.return
+  }
+
+  cir.func @shouldLowerLibcFAbsBuiltin(%arg0: !cir.double) -> !cir.double {
+    %0 = cir.fabs %arg0 : !cir.double
+    // CHECK: %0 = llvm.intr.fabs(%arg0) : (f64) -> f64
+    cir.return %0 : !cir.double
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/linker-options.cir b/clang/test/CIR/Incubator/Lowering/linker-options.cir
new file mode 100644
index 0000000000000..76365c2f4f0d9
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/linker-options.cir
@@ -0,0 +1,9 @@
+// RUN: cir-opt -cir-to-llvm %s | FileCheck %s
+
+module {
+  cir.linker_options ["/DEFAULTLIB:", "libcmt"]
+} 
+
+// CHECK: module {
+// CHECK-NEXT:   llvm.linker_options ["/DEFAULTLIB:", "libcmt"]
+// CHECK-NEXT: }
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/loadstorealloca.cir b/clang/test/CIR/Incubator/Lowering/loadstorealloca.cir
new file mode 100644
index 0000000000000..582652d5f3de4
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/loadstorealloca.cir
@@ -0,0 +1,57 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir -check-prefix=MLIR
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %2 : !u32i
+  }
+
+  cir.func @test_volatile() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store volatile %1, %0 : !u32i, !cir.ptr<!u32i>
+    %2 = cir.load volatile %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %2 : !u32i
+  }
+
+
+//      MLIR: module {
+// MLIR-NEXT:   func @foo() -> i32
+// MLIR-NEXT:     %0 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:     %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:     %2 = llvm.mlir.constant(1 : i32) : i32
+// MLIR-NEXT:     llvm.store %2, %1 {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:     %3 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR-NEXT:     return %3 : i32
+
+
+//      MLIR:   func @test_volatile() -> i32
+// MLIR-NEXT:     %0 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:     %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:     %2 = llvm.mlir.constant(1 : i32) : i32
+// MLIR-NEXT:     llvm.store volatile %2, %1 {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:     %3 = llvm.load volatile %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR-NEXT:     return %3 : i32
+
+  cir.func @test_bool_memory_lowering() {
+    // MLIR-LABEL: @test_bool_memory_lowering
+    %0 = cir.alloca !cir.bool, !cir.ptr<!cir.bool>, ["x", init] {alignment = 1 : i64}
+    // MLIR: %[[VAR:.*]] = llvm.alloca %{{.*}} x i8
+    %1 = cir.const #cir.bool<true> : !cir.bool
+    // MLIR: %[[TRUE:.*]] = llvm.mlir.constant(true) : i1
+    cir.store %1, %0 : !cir.bool, !cir.ptr<!cir.bool>
+    // MLIR: %[[TRUE_EXT:.*]] = llvm.zext %[[TRUE]] : i1 to i8
+    // MLIR: llvm.store %[[TRUE_EXT]], %[[VAR]] {alignment = 1 : i64} : i8, !llvm.ptr
+    %2 = cir.load %0 : !cir.ptr<!cir.bool>, !cir.bool
+    // MLIR: %[[LOAD_VAL:.*]] = llvm.load %[[VAR]] {alignment = 1 : i64} : !llvm.ptr -> i8
+    // MLIR: %[[LOAD_SCALAR:.*]] = llvm.trunc %[[LOAD_VAL]] : i8 to i1
+    %3 = cir.cast bool_to_int %2 : !cir.bool -> !u32i
+    // MLIR: %[[CAST_VAL:.*]] = llvm.zext %[[LOAD_SCALAR]] : i1 to i32
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/loop.cir b/clang/test/CIR/Incubator/Lowering/loop.cir
new file mode 100644
index 0000000000000..d15479a76a0d0
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/loop.cir
@@ -0,0 +1,126 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+#true = #cir.bool<true> : !cir.bool
+!s32i = !cir.int<s, 32>
+
+
+module {
+
+  cir.func @testFor(%arg0 : !cir.bool) {
+    cir.for : cond {
+      cir.condition(%arg0)
+    } body {
+      cir.yield
+    } step {
+      cir.yield
+    }
+    cir.return
+  }
+
+// CHECK: @testFor
+// CHECK:    llvm.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#STEP:]]
+// CHECK:  ^bb[[#STEP]]:
+// CHECK:    llvm.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+
+
+
+  // Test while cir.loop operation lowering.
+  cir.func @testWhile(%arg0 : !cir.bool) {
+    cir.while {
+      cir.condition(%arg0)
+    } do {
+      cir.yield
+    }
+    cir.return
+  }
+
+// CHECK: @testWhile
+// CHECK:    llvm.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+
+
+
+  // Test do-while cir.loop operation lowering.
+  cir.func @testDoWhile(%arg0 : !cir.bool) {
+    cir.do {
+      cir.yield
+    } while {
+      cir.condition(%arg0)
+    }
+    cir.return
+  }
+
+// CHECK: @testDoWhile
+// CHECK:    llvm.br ^bb[[#BODY:]]
+// CHECK:  ^bb[[#COND:]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+
+
+
+  // test corner case 
+  // while (1) {
+  //     break;
+  // }
+  cir.func @testWhileWithBreakTerminatedBody(%arg0 : !cir.bool) {
+    cir.while {
+      cir.condition(%arg0)
+    } do {
+      cir.break
+    }
+    cir.return
+  }
+
+// CHECK: @testWhileWithBreakTerminatedBody
+// CHECK:    llvm.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+
+
+
+  // test C only corner case - no fails during the lowering
+  // for (;;) {
+  //     break;
+  // }
+  cir.func @forWithBreakTerminatedScopeInBody(%arg0 : !cir.bool) {
+      cir.for : cond {
+        cir.condition(%arg0)
+      } body {
+        cir.scope { // FIXME(cir): Redundant scope emitted during C codegen.
+          cir.break
+        }
+        cir.yield
+      } step {
+        cir.yield
+      }
+    cir.return
+  }
+
+// CHECK: @forWithBreakTerminatedScopeInBody
+// CHECK:    llvm.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND:]]:
+// CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    llvm.br ^bb[[#SCOPE_IN:]]
+// CHECK:  ^bb[[#SCOPE_IN]]:
+// CHECK:    llvm.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#SCOPE_EXIT:]]:
+// CHECK:    llvm.br ^bb[[#STEP:]]
+// CHECK:  ^bb[[#STEP]]:
+// CHECK:    llvm.br ^bb[[#COND]]
+// CHECK:  ^bb[[#EXIT]]:
+}
diff --git a/clang/test/CIR/Incubator/Lowering/loops-with-break.cir b/clang/test/CIR/Incubator/Lowering/loops-with-break.cir
new file mode 100644
index 0000000000000..813d9aed05d58
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/loops-with-break.cir
@@ -0,0 +1,269 @@
+// RUN: cir-opt %s -cir-to-llvm -reconcile-unrealized-casts -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @testFor() {
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<1> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
+      } body {
+        cir.scope {
+          cir.scope {
+            %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+            %3 = cir.const #cir.int<5> : !s32i
+            %4 = cir.cmp(eq, %2, %3) : !s32i, !cir.bool
+            cir.if %4 {
+              cir.break
+            }
+          }
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testFor()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preBREAK1:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preBREAK1]]:
+  // CHECK:    llvm.br ^bb[[#preBREAK2:]]
+  // CHECK:  ^bb[[#preBREAK2]]:
+  // CHECK:    llvm.br ^bb[[#BREAK:]]
+  // CHECK:  ^bb[[#BREAK]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preEXIT1:]], ^bb[[#preBODY0:]]
+  // CHECK:  ^bb[[#preEXIT1]]:
+  // CHECK:    llvm.br ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preBODY0]]:
+  // CHECK:    llvm.br ^bb[[#preBODY1:]]
+  // CHECK:  ^bb[[#preBODY1]]:
+  // CHECK:    llvm.br ^bb[[#BODY:]]
+  // CHECK:  ^bb[[#BODY]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#STEP]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+  cir.func @testForNested() {
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<1> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
+      } body {
+        cir.scope {
+          cir.scope {
+            %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j", init] {alignment = 4 : i64}
+            %3 = cir.const #cir.int<1> : !s32i
+            cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+            cir.for : cond {
+              %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+              %5 = cir.const #cir.int<10> : !s32i
+              %6 = cir.cmp(lt, %4, %5) : !s32i, !cir.bool
+              cir.condition(%6)
+            } body {
+              cir.scope {
+                cir.scope {
+                  %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+                  %5 = cir.const #cir.int<5> : !s32i
+                  %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+                  cir.if %6 {
+                    cir.break
+                  }
+                }
+              }
+              cir.yield
+            } step {
+              %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+              %5 = cir.unary(inc, %4) : !s32i, !s32i
+              cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+              cir.yield
+            }
+          }
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testForNested()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preNESTED1:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preNESTED1]]:
+  // CHECK:    llvm.br ^bb[[#preNESTED2:]]
+  // CHECK:  ^bb[[#preNESTED2]]:
+  // CHECK:    llvm.br ^bb[[#NESTED:]]
+  // CHECK:  ^bb[[#NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND_NESTED:]]
+  // CHECK:  ^bb[[#COND_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preBREAK1:]], ^bb[[#EXIT_NESTED:]]
+  // CHECK:  ^bb[[#preBREAK1]]:
+  // CHECK:    llvm.br ^bb[[#preBREAK2:]]
+  // CHECK:  ^bb[[#preBREAK2]]:
+  // CHECK:    llvm.br ^bb[[#BREAK:]]
+  // CHECK:  ^bb[[#BREAK]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preEXIT2:]], ^bb[[#preBODY0:]]
+  // CHECK:  ^bb[[#preEXIT2]]:
+  // CHECK:    llvm.br ^bb[[#EXIT_NESTED:]]
+  // CHECK:  ^bb[[#preBODY0]]:
+  // CHECK:    llvm.br ^bb[[#preBODY1:]]
+  // CHECK:  ^bb[[#preBODY1]]:
+  // CHECK:    llvm.br ^bb[[#BODY_NESTED:]]
+  // CHECK:  ^bb[[#BODY_NESTED]]:
+  // CHECK:    llvm.br ^bb[[#STEP_NESTED:]]
+  // CHECK:  ^bb[[#STEP_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND_NESTED:]]
+  // CHECK:  ^bb[[#EXIT_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#BODY:]]
+  // CHECK:  ^bb[[#BODY]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#STEP]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+  cir.func @testWhile() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      cir.while {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
+      } do {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.scope {
+          %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.const #cir.int<5> : !s32i
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+          cir.if %6 {
+            cir.break
+          }
+        }
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+
+  // CHECK:  llvm.func @testWhile()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#BODY]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#BREAK:]]
+  // CHECK:  ^bb[[#BREAK]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preEXIT1:]], ^bb[[#preCOND0:]]
+  // CHECK:  ^bb[[#preEXIT1]]:
+  // CHECK:    llvm.br ^bb[[#preEXIT2:]]
+  // CHECK:  ^bb[[#preCOND0]]:
+  // CHECK:    llvm.br ^bb[[#preCOND1:]]
+  // CHECK:  ^bb[[#preCOND1]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#preEXIT2]]:
+  // CHECK:    llvm.br ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+cir.func @testDoWhile() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      cir.do {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.scope {
+          %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.const #cir.int<5> : !s32i
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+          cir.if %6 {
+            cir.break
+          }
+        }
+        cir.yield
+      } while {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testDoWhile()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#BODY]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#BREAK:]]
+  // CHECK:  ^bb[[#BREAK]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preEXIT1:]], ^bb[[#preCOND0:]]
+  // CHECK:  ^bb[[#preEXIT1]]:
+  // CHECK:    llvm.br ^bb[[#preEXIT2:]]
+  // CHECK:  ^bb[[#preCOND0]]:
+  // CHECK:    llvm.br ^bb[[#preCOND1:]]
+  // CHECK:  ^bb[[#preCOND1]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#preEXIT2]]:
+  // CHECK:    llvm.br ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+}
diff --git a/clang/test/CIR/Incubator/Lowering/loops-with-continue.cir b/clang/test/CIR/Incubator/Lowering/loops-with-continue.cir
new file mode 100644
index 0000000000000..f6a91dcab5600
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/loops-with-continue.cir
@@ -0,0 +1,265 @@
+// RUN: cir-opt %s -cir-to-llvm -reconcile-unrealized-casts -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @testFor() {
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<1> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
+      } body {
+        cir.scope {
+          cir.scope {
+            %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+            %3 = cir.const #cir.int<5> : !s32i
+            %4 = cir.cmp(eq, %2, %3) : !s32i, !cir.bool
+            cir.if %4 {
+              cir.continue
+            }
+          }
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testFor()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preCONTINUE1:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preCONTINUE1]]:
+  // CHECK:    llvm.br ^bb[[#preCONTINUE2:]]
+  // CHECK:  ^bb[[#preCONTINUE2]]:
+  // CHECK:    llvm.br ^bb[[#CONTINUE:]]
+  // CHECK:  ^bb[[#CONTINUE]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preSTEP:]], ^bb[[#preBODY0:]]
+  // CHECK:  ^bb[[#preSTEP]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#preBODY0]]:
+  // CHECK:    llvm.br ^bb[[#preBODY1:]]
+  // CHECK:  ^bb[[#preBODY1]]:
+  // CHECK:    llvm.br ^bb[[#BODY:]]
+  // CHECK:  ^bb[[#BODY]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#STEP]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+
+  cir.func @testForNested() {
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<1> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
+      } body {
+        cir.scope {
+          cir.scope {
+            %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["j", init] {alignment = 4 : i64}
+            %3 = cir.const #cir.int<1> : !s32i
+            cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+            cir.for : cond {
+              %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+              %5 = cir.const #cir.int<10> : !s32i
+              %6 = cir.cmp(lt, %4, %5) : !s32i, !cir.bool
+              cir.condition(%6)
+            } body {
+              cir.scope {
+                cir.scope {
+                  %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+                  %5 = cir.const #cir.int<5> : !s32i
+                  %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+                  cir.if %6 {
+                    cir.continue
+                  }
+                }
+              }
+              cir.yield
+            } step {
+              %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+              %5 = cir.unary(inc, %4) : !s32i, !s32i
+              cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+              cir.yield
+            }
+          }
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testForNested()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preNESTED1:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#preNESTED1]]:
+  // CHECK:    llvm.br ^bb[[#preNESTED2:]]
+  // CHECK:  ^bb[[#preNESTED2]]:
+  // CHECK:    llvm.br ^bb[[#NESTED:]]
+  // CHECK:  ^bb[[#NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND_NESTED:]]
+  // CHECK:  ^bb[[#COND_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preCONTINUE1:]], ^bb[[#EXIT_NESTED:]]
+  // CHECK:  ^bb[[#preCONTINUE1]]:
+  // CHECK:    llvm.br ^bb[[#preCONTINUE2:]]
+  // CHECK:  ^bb[[#preCONTINUE2]]:
+  // CHECK:    llvm.br ^bb[[#CONTINUE:]]
+  // CHECK:  ^bb[[#CONTINUE]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preSTEP0:]], ^bb[[#preBODY0:]]
+  // CHECK:  ^bb[[#preSTEP0]]:
+  // CHECK:    llvm.br ^bb[[#STEP_NESTED:]]
+  // CHECK:  ^bb[[#preBODY0]]:
+  // CHECK:    llvm.br ^bb[[#preBODY1:]]
+  // CHECK:  ^bb[[#preBODY1]]:
+  // CHECK:    llvm.br ^bb[[#BODY_NESTED:]]
+  // CHECK:  ^bb[[#BODY_NESTED]]:
+  // CHECK:    llvm.br ^bb[[#STEP_NESTED:]]
+  // CHECK:  ^bb[[#STEP_NESTED]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND_NESTED:]]
+  // CHECK:  ^bb[[#EXIT_NESTED]]:
+  // CHECK:    llvm.br ^bb[[#BODY:]]
+  // CHECK:  ^bb[[#BODY]]:
+  // CHECK:    llvm.br ^bb[[#STEP:]]
+  // CHECK:  ^bb[[#STEP]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+cir.func @testWhile() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      cir.while {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
+      } do {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.scope {
+          %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.const #cir.int<5> : !s32i
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+          cir.if %6 {
+            cir.continue
+          }
+        }
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // CHECK:  llvm.func @testWhile()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#BODY]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#CONTINUE:]]
+  // CHECK:  ^bb[[#CONTINUE]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preCOND0:]], ^bb[[#preCOND1:]]
+  // CHECK:  ^bb[[#preCOND0]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#preCOND1]]:
+  // CHECK:    llvm.br ^bb[[#preCOND2:]]
+  // CHECK:  ^bb[[#preCOND2]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+  cir.func @testDoWhile() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<0> : !s32i
+    cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      cir.do {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.scope {
+          %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.const #cir.int<5> : !s32i
+          %6 = cir.cmp(eq, %4, %5) : !s32i, !cir.bool
+          cir.if %6 {
+            cir.continue
+          }
+        }
+        cir.yield
+      } while {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<10> : !s32i
+        %4 = cir.cmp(lt, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
+      }
+    }
+    cir.return
+  }
+
+
+  // CHECK:  llvm.func @testDoWhile()
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#COND]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#BODY:]], ^bb[[#EXIT:]]
+  // CHECK:  ^bb[[#BODY]]:
+  //           [...]
+  // CHECK:    llvm.br ^bb[[#CONTINUE:]]
+  // CHECK:  ^bb[[#CONTINUE]]:
+  //           [...]
+  // CHECK:    llvm.cond_br %{{.+}}, ^bb[[#preCOND0:]], ^bb[[#preCOND1:]]
+  // CHECK:  ^bb[[#preCOND0]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#preCOND1]]:
+  // CHECK:    llvm.br ^bb[[#preCOND2:]]
+  // CHECK:  ^bb[[#preCOND2]]:
+  // CHECK:    llvm.br ^bb[[#COND:]]
+  // CHECK:  ^bb[[#EXIT]]:
+  //           [...]
+  // CHECK:  }
+
+}
diff --git a/clang/test/CIR/Incubator/Lowering/module-asm.cir b/clang/test/CIR/Incubator/Lowering/module-asm.cir
new file mode 100644
index 0000000000000..b802cda8f6e65
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/module-asm.cir
@@ -0,0 +1,11 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir
+
+// RUN: cir-translate -cir-to-llvmir --disable-cc-lowering -o %t.ll %s
+// RUN: FileCheck -check-prefix=LLVM --input-file=%t.ll %s
+
+// CHECK: llvm.module_asm =  [".globl bar", ".globl foo"]
+// LLVM: module asm ".globl bar"
+// LLVM: module asm ".globl foo"
+module attributes {cir.module_asm = [".globl bar", ".globl foo"]} {
+}
diff --git a/clang/test/CIR/Incubator/Lowering/multi-array.c b/clang/test/CIR/Incubator/Lowering/multi-array.c
new file mode 100644
index 0000000000000..8d01028a9bf17
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/multi-array.c
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+unsigned char table[10][5] =
+{
+    {1,0},
+    {7,6,5},
+};
+
+// LLVM: @table = {{.*}}[10 x [5 x i8]] {{.*}}[5 x i8] c"\01\00\00\00\00", [5 x i8] c"\07\06\05\00\00", [5 x i8] zeroinitializer
+
+unsigned char table2[15][16] =
+{
+    {1,0},
+    {1,1,0},
+    {3,2,1,0},
+    {3,2,1,1,0},
+    {3,2,3,2,1,0},
+    {3,0,1,3,2,5,4},
+    {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
+};
+
+// LLVM: @table2 = {{.*}}[15 x [16 x i8]] {{.*}}[16 x i8] c"\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\01\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\03\02\01\00\00\00\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\03\02\01\01\00\00\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\03\02\03\02\01\00\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\03\00\01\03\02\05\04\00\00\00\00\00\00\00\00\00", [16 x i8] c"\07\06\05\04\03\02\01\01\01\01\01\01\01\01\01\00", [16 x i8] zeroinitializer
+
+unsigned char table3[15][16] =
+{
+    {1,1},
+    {1,2,2},
+    {2,2,2,2},
+    {2,2,2,3,3},
+    {2,2,3,3,3,3},
+    {2,3,3,3,3,3,3},
+    {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
+};
+
+// LLVM: @table3 = {{.*}}[15 x [16 x i8]] {{.*}}[16 x i8] c"\01\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\01\02\02\00\00\00\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\02\02\02\02\00\00\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\02\02\02\03\03\00\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\02\02\03\03\03\03\00\00\00\00\00\00\00\00\00\00", [16 x i8] c"\02\03\03\03\03\03\03\00\00\00\00\00\00\00\00\00", [16 x i8] c"\03\03\03\03\03\03\03\04\05\06\07\08\09\0A\0B\00", [16 x i8] zeroinitializer
+
+
+unsigned char table4[][20] =
+{
+  {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  {  0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0 }
+};
+// LLVM: @table4 = {{.*}}[2 x [20 x i8]] {{.*}}[20 x i8] zeroinitializer, [20 x i8] c"\00\00\00\00\00\00\00\01\01\01\01\00\02\02\02\02\00\00\00\00"]
+
+unsigned char table5[][20] =
+{
+  {  0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  {  0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0 }
+};
+// LLVM: @table5 = {{.*}}[2 x [20 x i8]] {{.*}}[20 x i8] c"\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\00", [20 x i8] c"\00\00\00\00\00\00\00\01\01\01\01\00\02\02\02\02\00\00\00\00"]
+
+unsigned char table6[][20] =
+{
+  {  1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+  {  0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 2, 2, 2, 2, 0, 0, 0, 0 }
+};
+// LLVM: @table6 = {{.*}}[2 x [20 x i8]] {{.*}}[20 x i8] c"\01\00\00\00\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\00", [20 x i8] c"\00\00\00\00\00\00\00\01\01\01\01\00\02\02\02\02\00\00\00\00"]
diff --git a/clang/test/CIR/Incubator/Lowering/nested-switch.cpp b/clang/test/CIR/Incubator/Lowering/nested-switch.cpp
new file mode 100644
index 0000000000000..5f6961a84018f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/nested-switch.cpp
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s
+
+int nested_switch(int a) {
+  switch (int b = 1; a) {
+  case 0:
+    b = b + 1;
+  case 1:
+    return b;
+  case 2: {
+    b = b + 1;
+    if (a > 1000) {
+        case 9:
+          b += a;
+    }
+    if (a > 500) {
+        case 7:
+          return a + b;
+    }
+    break;
+  }
+  }
+
+  return 0;
+}
+
+// CHECK: define {{.*}}@_Z13nested_switchi(
+// CHECK: switch i32 %6, label %[[DEFAULT_BB:[0-9]+]] [
+// CHECK:   i32 0, label %[[ZERO_BB:[0-9]+]]
+// CHECK:   i32 1, label %[[ONE_BB:[0-9]+]]
+// CHECK:   i32 2, label %[[TWO_BB:[0-9]+]]
+// CHECK:   i32 9, label %[[NINE_BB:[0-9]+]]
+// CHECK:   i32 7, label %[[SEVEN_BB:[0-9]+]]
+// CHECK: ]
+//
+// CHECK: [[ZERO_BB]]:
+// CHECK:   add {{.*}}, 1
+// CHECK:   br label %[[ONE_BB]]
+//
+// CHECK: [[ONE_BB]]:
+// CHECK:   ret
+//
+// CHECK: [[TWO_BB]]:
+// CHECK:   add {{.*}}, 1
+// CHECK:   br label %[[IF_BB:[0-9]+]]
+//
+// CHECK: [[IF_BB]]:
+// CHECK:   %[[CMP:.+]] = icmp sgt i32 %{{.*}}, 1000
+// CHECK:   br i1 %[[CMP]], label %[[IF_TRUE_BB:[0-9]+]], label %[[IF_FALSE_BB:[0-9]+]]
+//
+// CHECK: [[IF_TRUE_BB]]:
+// CHECK:   br label %[[NINE_BB]]
+//
+// CHECK: [[NINE_BB]]:
+// CHECK:   %[[A_VALUE:.+]] = load i32
+// CHECK:   %[[B_VALUE:.+]] = load i32
+// CHECK:   add nsw i32 %[[B_VALUE]], %[[A_VALUE]]
+//
+// CHECK: %[[CMP2:.+]] = icmp sgt i32 %{{.*}}, 500
+// CHECK:   br i1 %[[CMP2]], label %[[IF2_TRUE_BB:[0-9]+]], label %[[IF2_FALSE_BB:[0-9]+]]
+//
+// CHECK: [[IF2_TRUE_BB]]:
+// CHECK:   br label %[[SEVEN_BB]]
+//
+// CHECK: [[SEVEN_BB]]:
+// CHECK:   ret
+//
+// CHECK: [[DEFAULT_BB]]:
+// CHECK:   ret
diff --git a/clang/test/CIR/Incubator/Lowering/nested-union-array.c b/clang/test/CIR/Incubator/Lowering/nested-union-array.c
new file mode 100644
index 0000000000000..1cf04242d3986
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/nested-union-array.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+
+struct nested
+{
+  union {
+    const char *single;
+    const char *const *multi;
+  } output;
+};
+static const char * const test[] = {
+  "test",
+};
+const struct nested data[] =
+{
+    {
+        {
+            .multi = test,
+        },
+    },
+    {
+        {
+            .single = "hello",
+        },
+    },
+};
+
+// LLVM: @data = constant { { { ptr } }, { { ptr } } }
diff --git a/clang/test/CIR/Incubator/Lowering/new.cpp b/clang/test/CIR/Incubator/Lowering/new.cpp
new file mode 100644
index 0000000000000..4626aad0cbecd
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/new.cpp
@@ -0,0 +1,237 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+#include "std-cxx.h"
+
+void t_new_constant_size() {
+  auto p = new double[16];
+}
+
+// LLVM: @_Z19t_new_constant_sizev()
+// LLVM:   %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[ADDR:.*]] = call ptr @_Znam(i64 128)
+// LLVM:   store ptr %[[ADDR]], ptr %[[ALLOCA]], align 8
+
+void t_new_multidim_constant_size() {
+  auto p = new double[2][3][4];
+}
+
+// LLVM: @_Z28t_new_multidim_constant_sizev()
+// LLVM:   %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[ADDR:.*]] = call ptr @_Znam(i64 192)
+// LLVM:   store ptr %[[ADDR]], ptr %[[ALLOCA]], align 8
+
+class C {
+  public:
+    ~C();
+};
+
+void t_constant_size_nontrivial() {
+  auto p = new C[3];
+}
+
+// Note: The below differs from the IR emitted by clang without -fclangir in
+//       several respects. (1) The alloca here has an extra "i64 1"
+//       (2) The operator new call is missing "noalias noundef nonnull" on
+//       the call and "noundef" on the argument, (3) The getelementptr is
+//       missing "inbounds"
+
+// LLVM: @_Z26t_constant_size_nontrivialv()
+// LLVM:   %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 11)
+// LLVM:   store i64 3, ptr %[[COOKIE_PTR]], align 8
+// LLVM:   %[[ALLOCATED_PTR:.*]] = getelementptr i8, ptr %[[COOKIE_PTR]], i64 8
+// LLVM:   store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8
+
+class D {
+  public:
+    int x;
+    ~D();
+};
+
+void t_constant_size_nontrivial2() {
+  auto p = new D[3];
+}
+
+// LLVM: @_Z27t_constant_size_nontrivial2v()
+// LLVM:   %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 20)
+// LLVM:   store i64 3, ptr %[[COOKIE_PTR]], align 8
+// LLVM:   %[[ALLOCATED_PTR:.*]] = getelementptr i8, ptr %[[COOKIE_PTR]], i64 8
+// LLVM:   store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8
+
+void t_constant_size_memset_init() {
+  auto p = new int[16] {};
+}
+
+// LLVM: @_Z27t_constant_size_memset_initv()
+// LLVM:   %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[ADDR:.*]] = call ptr @_Znam(i64 64)
+// LLVM:   call void @llvm.memset.p0.i64(ptr %[[ADDR]], i8 0, i64 64, i1 false)
+// LLVM:   store ptr %[[ADDR]], ptr %[[ALLOCA]], align 8
+
+void t_constant_size_partial_init() {
+  auto p = new int[16] { 1, 2, 3 };
+}
+
+// LLVM: @_Z28t_constant_size_partial_initv()
+// LLVM:   %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8
+// LLVM:   %[[ADDR:.*]] = call ptr @_Znam(i64 64)
+// LLVM:   store i32 1, ptr %[[ADDR]], align 4
+// LLVM:   %[[ELEM_1_PTR:.*]] = getelementptr i32, ptr %[[ADDR]], i64 1
+// LLVM:   store i32 2, ptr %[[ELEM_1_PTR]], align 4
+// LLVM:   %[[ELEM_2_PTR:.*]] = getelementptr i32, ptr %[[ELEM_1_PTR]], i64 1
+// LLVM:   store i32 3, ptr %[[ELEM_2_PTR]], align 4
+// LLVM:   %[[ELEM_3_PTR:.*]] = getelementptr i32, ptr %[[ELEM_2_PTR]], i64 1
+// LLVM:   call void @llvm.memset.p0.i64(ptr %[[ELEM_3_PTR]], i8 0, i64 52, i1 false)
+// LLVM:   store ptr %[[ADDR]], ptr %[[ALLOCA]], align 8
+
+void t_new_var_size(size_t n) {
+  auto p = new char[n];
+}
+
+// LLVM:  @_Z14t_new_var_sizem
+// LLVM:    %[[N:.*]] = load i64, ptr %[[ARG_ALLOCA:.*]]
+// LLVM:    %[[ADDR:.*]] = call ptr @_Znam(i64 %[[N]])
+
+void t_new_var_size2(int n) {
+  auto p = new char[n];
+}
+
+// LLVM:  @_Z15t_new_var_size2i
+// LLVM:    %[[N:.*]] = load i32, ptr %[[ARG_ALLOCA:.*]]
+// LLVM:    %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
+// LLVM:    %[[ADDR:.*]] = call ptr @_Znam(i64 %[[N_SIZE_T]])
+
+void t_new_var_size3(size_t n) {
+  auto p = new double[n];
+}
+
+// LLVM:  @_Z15t_new_var_size3m
+// LLVM:    %[[N:.*]] = load i64, ptr %[[ARG_ALLOCA:.*]]
+// LLVM:    %[[RESULT_PAIR:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N]], i64 8)
+// LLVM:    %[[RESULT:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 0
+// LLVM:    %[[OVERFLOW:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 1
+// LLVM:    %[[ALLOC_SIZE:.*]] = select i1 %[[OVERFLOW]], i64 -1, i64 %[[RESULT]]
+// LLVM:    %[[ADDR:.*]] = call ptr @_Znam(i64 %[[ALLOC_SIZE]])
+
+void t_new_var_size4(int n) {
+  auto p = new double[n];
+}
+
+// LLVM:  @_Z15t_new_var_size4i
+// LLVM:    %[[N:.*]] = load i32, ptr %[[ARG_ALLOCA:.*]]
+// LLVM:    %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
+// LLVM:    %[[RESULT_PAIR:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8)
+// LLVM:    %[[RESULT:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 0
+// LLVM:    %[[OVERFLOW:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 1
+// LLVM:    %[[ALLOC_SIZE:.*]] = select i1 %[[OVERFLOW]], i64 -1, i64 %[[RESULT]]
+// LLVM:    %[[ADDR:.*]] = call ptr @_Znam(i64 %[[ALLOC_SIZE]])
+
+void t_new_var_size5(int n) {
+  auto p = new double[n][2][3];
+}
+
+// NUM_ELEMENTS is not used in this case because cookies aren't required
+
+// LLVM:  @_Z15t_new_var_size5i
+// LLVM:    %[[N:.*]] = load i32, ptr %[[ARG_ALLOCA:.*]]
+// LLVM:    %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
+// LLVM:    %[[RESULT_PAIR:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 48)
+// LLVM:    %[[RESULT:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 0
+// LLVM:    %[[OVERFLOW:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 1
+// LLVM:    %[[NUM_ELEMENTS:.*]] = mul i64 %[[N_SIZE_T]], 6
+// LLVM:    %[[ALLOC_SIZE:.*]] = select i1 %[[OVERFLOW]], i64 -1, i64 %[[RESULT]]
+// LLVM:    %[[ADDR:.*]] = call ptr @_Znam(i64 %[[ALLOC_SIZE]])
+
+void t_new_var_size6(int n) {
+  auto p = new double[n] { 1, 2, 3 };
+}
+
+// LLVM:  @_Z15t_new_var_size6i
+// LLVM:    %[[N:.*]] = load i32, ptr %[[ARG_ALLOCA:.*]]
+// LLVM:    %[[N_SIZE_T:.*]] = sext i32 %[[N]] to i64
+// LLVM:    %[[LT_MIN_SIZE:.*]] = icmp ult i64 %[[N_SIZE_T]], 3
+// LLVM:    %[[RESULT_PAIR:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8)
+// LLVM:    %[[RESULT:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 0
+// LLVM:    %[[OVERFLOW:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 1
+// LLVM:    %[[ANY_OVERFLOW:.*]] = or i1 %[[LT_MIN_SIZE]], %[[OVERFLOW]]
+// LLVM:    %[[ALLOC_SIZE:.*]] = select i1 %[[ANY_OVERFLOW]], i64 -1, i64 %[[RESULT]]
+// LLVM:    %[[ADDR:.*]] = call ptr @_Znam(i64 %[[ALLOC_SIZE]])
+
+void t_new_var_size7(__int128 n) {
+  auto p = new double[n];
+}
+
+// LLVM:  @_Z15t_new_var_size7n
+// LLVM:    %[[N:.*]] = load i128, ptr %[[ARG_ALLOCA:.*]]
+// LLVM:    %[[N_SIZE_T:.*]] = trunc i128 %[[N]] to i64
+// LLVM:    %[[RESULT_PAIR:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N_SIZE_T]], i64 8)
+// LLVM:    %[[RESULT:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 0
+// LLVM:    %[[OVERFLOW:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 1
+// LLVM:    %[[ALLOC_SIZE:.*]] = select i1 %[[OVERFLOW]], i64 -1, i64 %[[RESULT]]
+// LLVM:    %[[ADDR:.*]] = call ptr @_Znam(i64 %[[ALLOC_SIZE]])
+
+void t_new_var_size_nontrivial(size_t n) {
+  auto p = new D[n];
+}
+
+// LLVM:  @_Z25t_new_var_size_nontrivialm
+// LLVM:    %[[N:.*]] = load i64, ptr %[[ARG_ALLOCA:.*]]
+// LLVM:    %[[RESULT_PAIR:.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %[[N]], i64 4)
+// LLVM:    %[[SIZE_WITHOUT_COOKIE:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 0
+// LLVM:    %[[OVERFLOW:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR]], 1
+// LLVM:    %[[RESULT_PAIR2:.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %[[SIZE_WITHOUT_COOKIE]], i64 8)
+// LLVM:    %[[SIZE:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR2]], 0
+// LLVM:    %[[OVERFLOW2:.*]] = extractvalue { i64, i1 } %[[RESULT_PAIR2]], 1
+// LLVM:    %[[ANY_OVERFLOW:.*]] = or i1 %[[OVERFLOW]], %[[OVERFLOW2]]
+// LLVM:    %[[ALLOC_SIZE:.*]] = select i1 %[[ANY_OVERFLOW]], i64 -1, i64 %[[SIZE]]
+// LLVM:    %[[ADDR:.*]] = call ptr @_Znam(i64 %[[ALLOC_SIZE]])
+
+class E {
+  public:
+    E();
+    ~E();
+};
+
+void t_new_constant_size_constructor() {
+  auto p = new E[3];
+}
+
+// LLVM:  @_Z31t_new_constant_size_constructorv
+// LLVM:    %[[ALLOC_PTR:.*]] = call ptr @_Znam(i64 11)
+// LLVM:    store i64 3, ptr %[[ALLOC_PTR]], align 8
+// LLVM:    %[[OBJ_PTR:.*]] = getelementptr i8, ptr %[[ALLOC_PTR]], i64 8
+// LLVM:    %[[ELEM_PTR:.*]] = getelementptr %class.E, ptr %[[OBJ_PTR]], i32 0
+// LLVM:    %[[END_PTR:.*]] = getelementptr %class.E, ptr %[[ELEM_PTR]], i64 3
+// LLVM:    br label %[[INIT_ELEM_BB:.*]]
+// LLVM:  [[LOOP_INC_BB:.*]]:
+// LLVM:    %[[NEXT_ELEM_PTR:.*]] = load ptr
+// LLVM:    %[[END_TEST:.*]] = icmp ne ptr %[[NEXT_ELEM_PTR]], %[[END_PTR]]
+// LLVM:    br i1 %[[END_TEST]], label %[[INIT_ELEM_BB]], label %[[EXIT_BB:.*]]
+// LLVM:  [[INIT_ELEM_BB]]:
+// LLVM:    %[[CUR_ELEM_PTR:.*]] = load ptr
+// LLVM:    call void @_ZN1EC1Ev(ptr %[[CUR_ELEM_PTR]])
+// LLVM:    %[[NEXT_PTR:.*]] = getelementptr %class.E, ptr %[[CUR_ELEM_PTR]], i64 1
+// LLVM:    store ptr %[[NEXT_PTR]]
+// LLVM:    br label %[[LOOP_INC_BB]]
+
+void t_multidim_init() {
+  auto *p = new int[2][3] { {1, 2, 3}, {4, 5, 6}};
+}
+
+// LLVM:  @_Z15t_multidim_initv()
+// LLVM:    %[[ALLOC_PTR:.*]] = call ptr @_Znam(i64 24)
+// LLVM:    %[[ELEM_00_PTR:.*]] = getelementptr [3 x i32], ptr %[[ALLOC_PTR]], i32 0, i64 0
+// LLVM:    store i32 1, ptr %[[ELEM_00_PTR]], align 4
+// LLVM:    %[[ELEM_01_PTR:.*]] = getelementptr [3 x i32], ptr %[[ALLOC_PTR]], i32 0, i64 1
+// LLVM:    store i32 2, ptr %[[ELEM_01_PTR]], align 4
+// LLVM:    %[[ELEM_02_PTR:.*]] = getelementptr [3 x i32], ptr %[[ALLOC_PTR]], i32 0, i64 2
+// LLVM:    store i32 3, ptr %[[ELEM_02_PTR]], align 4
+// LLVM:    %[[ELEM_1_PTR:.*]] = getelementptr [3 x i32], ptr %[[ALLOC_PTR]], i64 1
+// LLVM:    %[[ELEM_10_PTR:.*]] = getelementptr [3 x i32], ptr %[[ELEM_1_PTR]], i32 0, i64 0
+// LLVM:    store i32 4, ptr %[[ELEM_10_PTR]], align 4
+// LLVM:    %[[ELEM_11_PTR:.*]] = getelementptr [3 x i32], ptr %[[ELEM_1_PTR]], i32 0, i64 1
+// LLVM:    store i32 5, ptr %[[ELEM_11_PTR]], align 4
+// LLVM:    %[[ELEM_12_PTR:.*]] = getelementptr [3 x i32], ptr %[[ELEM_1_PTR]], i32 0, i64 2
+// LLVM:    store i32 6, ptr %[[ELEM_12_PTR]], align 4
diff --git a/clang/test/CIR/Incubator/Lowering/ptrdiff.cir b/clang/test/CIR/Incubator/Lowering/ptrdiff.cir
new file mode 100644
index 0000000000000..35fddac7356c3
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ptrdiff.cir
@@ -0,0 +1,18 @@
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!u64i = !cir.int<u, 64>
+
+module {
+  cir.func @foo(%arg0: !cir.ptr<!s32i>, %arg1: !cir.ptr<!s32i>) -> !s32i {
+    %1 = cir.ptr_diff %arg0, %arg1 : !cir.ptr<!s32i> -> !u64i
+    %2 = cir.cast integral %1 : !u64i -> !s32i
+    cir.return %2 : !s32i
+  }
+}
+
+//      CHECK: %3 = ptrtoint ptr %0 to i64
+// CHECK-NEXT: %4 = ptrtoint ptr %1 to i64
+// CHECK-NEXT: %5 = sub i64 %3, %4
+// CHECK-NEXT: %6 = udiv i64 %5, 4
+// CHECK-NEXT: %7 = trunc i64 %6 to i32
diff --git a/clang/test/CIR/Incubator/Lowering/ptrstride.cir b/clang/test/CIR/Incubator/Lowering/ptrstride.cir
new file mode 100644
index 0000000000000..56e6c9b7e9122
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ptrstride.cir
@@ -0,0 +1,44 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+!u64i = !cir.int<u, 64>
+
+module {
+  cir.func @f(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["a", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    %1 = cir.load %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.ptr_stride %1, %2 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %4 = cir.load %3 : !cir.ptr<!s32i>, !s32i
+    cir.return
+  }
+
+  cir.func @g(%arg0: !cir.ptr<!s32i>, %2 : !s32i) {
+    %3 = cir.ptr_stride %arg0, %2 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    cir.return
+  }
+
+  cir.func @bool_stride(%arg0: !cir.ptr<!cir.bool>, %2 : !u64i) {
+    %3 = cir.ptr_stride %arg0, %2 : (!cir.ptr<!cir.bool>, !u64i) -> !cir.ptr<!cir.bool>
+    cir.return
+  }
+}
+
+// MLIR-LABEL: @f
+// MLIR:   %[[VAL_1:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:   %[[VAL_2:.*]] = llvm.alloca %[[VAL_1]] x !llvm.ptr {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// MLIR:   llvm.store {{.*}}, %[[VAL_2]] {{.*}}: !llvm.ptr, !llvm.ptr
+// MLIR:   %[[VAL_3:.*]] = llvm.load %[[VAL_2]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+// MLIR:   %[[VAL_4:.*]] = llvm.mlir.constant(1 : i32) : i32
+// MLIR:   %[[VAL_5:.*]] = llvm.sext %[[VAL_4]] : i32 to i64
+// MLIR:   %[[VAL_6:.*]] = llvm.getelementptr %[[VAL_3]]{{\[}}%[[VAL_5]]] : (!llvm.ptr, i64) -> !llvm.ptr, i32
+// MLIR:   %[[VAL_7:.*]] = llvm.load %[[VAL_6]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:   llvm.return
+
+// MLIR-LABEL: @g
+// MLIR: llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i32) -> !llvm.ptr, i32
+
+// MLIR-LABEL: @bool_stride
+// MLIR: llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, i8
diff --git a/clang/test/CIR/Incubator/Lowering/region-simplify.cir b/clang/test/CIR/Incubator/Lowering/region-simplify.cir
new file mode 100644
index 0000000000000..a76d73d03d8eb
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/region-simplify.cir
@@ -0,0 +1,38 @@
+// RUN: cir-opt %s -canonicalize -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-opt %s -canonicalize -o - | cir-translate -cir-to-llvmir --disable-cc-lowering | FileCheck %s -check-prefix=LLVM
+
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["b", init] {alignment = 4 : i64}
+    %1 = cir.const #cir.int<1> : !u32i
+    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    cir.br ^bb2
+  ^bb1:  // no predecessors
+    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %3 = cir.const #cir.int<1> : !u32i
+    %4 = cir.binop(add, %2, %3) : !u32i
+    cir.store %4, %0 : !u32i, !cir.ptr<!u32i>
+    cir.br ^bb2
+  ^bb2:  // 2 preds: ^bb0, ^bb1
+    %5 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    %6 = cir.const #cir.int<2> : !u32i
+    %7 = cir.binop(add, %5, %6) : !u32i
+    cir.store %7, %0 : !u32i, !cir.ptr<!u32i>
+    cir.return
+  }
+
+  //      MLIR: module {
+// MLIR-NEXT: llvm.func @foo
+//      MLIR: llvm.br ^bb1
+//      MLIR: ^bb1:
+//      MLIR: return
+
+//      LLVM: br label %[[Value:[0-9]+]]
+// LLVM-EMPTY:
+// LLVM-NEXT: [[Value]]:              ; preds =
+//      LLVM: ret void
+
+
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/resume-flat.cir b/clang/test/CIR/Incubator/Lowering/resume-flat.cir
new file mode 100644
index 0000000000000..15690db6535ac
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/resume-flat.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+
+!u32i = !cir.int<u, 32>
+!void = !cir.void
+
+module {
+
+cir.func private @flattened_resume() {
+  %exception_addr = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["exception", init] {alignment = 8 : i64}
+  %type_id_addr = cir.alloca !u32i, !cir.ptr<!u32i>, ["type_id", init] {alignment = 4 : i64}
+  %exception = cir.load %exception_addr : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+  %type_id = cir.load %type_id_addr : !cir.ptr<!u32i>, !u32i
+  cir.resume.flat %exception, %type_id
+}
+
+
+// CHECK: llvm.func @flattened_resume() attributes {sym_visibility = "private"} {
+// CHECK:   %[[CONST_1:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK:   %[[EXCEPTION_ADDR:.*]] = llvm.alloca %[[CONST_1]] x !llvm.ptr {alignment = 8 : i64} : (i64) -> !llvm.ptr
+// CHECK:   %[[CONST_1:.*]] = llvm.mlir.constant(1 : i64) : i64
+// CHECK:   %[[TYPE_ID_ADDR:.*]] = llvm.alloca %[[CONST_1]] x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// CHECK:   %[[EXCEPTION:.*]] = llvm.load %[[EXCEPTION_ADDR]] {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
+// CHECK:   %[[TYPE_ID:.*]] = llvm.load %[[TYPE_ID_ADDR]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// CHECK:   %[[POISON:.*]] = llvm.mlir.poison : !llvm.struct<(ptr, i32)>
+// CHECK:   %[[TMP_EXCEPTION_INFO:.*]] = llvm.insertvalue %[[EXCEPTION]], %[[POISON]][0] : !llvm.struct<(ptr, i32)>
+// CHECK:   %[[EXCEPTION_INFO:.*]] = llvm.insertvalue %[[TYPE_ID]], %[[TMP_EXCEPTION_INFO]][1] : !llvm.struct<(ptr, i32)>
+// CHECK:   llvm.resume %[[EXCEPTION_INFO]] : !llvm.struct<(ptr, i32)>
+// CHECK: }
+
+}
diff --git a/clang/test/CIR/Incubator/Lowering/scope.cir b/clang/test/CIR/Incubator/Lowering/scope.cir
new file mode 100644
index 0000000000000..850b1ec5e0510
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/scope.cir
@@ -0,0 +1,78 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() {
+    cir.scope {
+      %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<4> : !u32i
+      cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    }
+    cir.return
+  }
+
+//      MLIR: llvm.func @foo()
+// MLIR:   [[v2:%[0-9]]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:   [[v3:%[0-9]]] = llvm.alloca [[v2]] x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR:   llvm.br ^bb1
+// MLIR: ^bb1:
+// MLIR-DAG:   [[v1:%[0-9]]] = llvm.mlir.constant(4 : i32) : i32
+// MLIR:   llvm.store [[v1]], [[v3]] {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:   llvm.br ^bb2
+// MLIR-NEXT: ^bb2:
+// MLIR-NEXT:   llvm.return
+
+
+//      LLVM: define void @foo()
+//  LLVM-NEXT:   %1 = alloca i32, i64 1, align 4
+//  LLVM-NEXT:   br label %2
+// LLVM-EMPTY:
+//  LLVM-NEXT: 2:
+//  LLVM-NEXT:   store i32 4, ptr %1, align 4
+//  LLVM-NEXT:   br label %3
+// LLVM-EMPTY:
+//  LLVM-NEXT: 3:
+//  LLVM-NEXT:   ret void
+//  LLVM-NEXT: }
+
+
+  // Should drop empty scopes.
+  cir.func @empty_scope() {
+    cir.scope {
+    }
+    cir.return
+  }
+  //      MLIR: llvm.func @empty_scope()
+  // MLIR-NEXT:   llvm.return
+  // MLIR-NEXT: }
+
+
+  cir.func @scope_with_return() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"] {alignment = 4 : i64}
+    cir.scope {
+      %2 = cir.const #cir.int<0> : !u32i
+      cir.store %2, %0 : !u32i, !cir.ptr<!u32i>
+      %3 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+      cir.return %3 : !u32i
+    }
+    %1 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %1 : !u32i
+  }
+
+  //      MLIR: llvm.func @scope_with_return()
+  // MLIR-NEXT:  [[v0:%.*]] = llvm.mlir.constant(1 : index) : i64
+  // MLIR-NEXT:  [[v1:%.*]] = llvm.alloca [[v0]] x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+  // MLIR-NEXT:  llvm.br ^bb1
+  // MLIR-NEXT: ^bb1:  // pred: ^bb0
+  // MLIR-NEXT:  [[v2:%.*]] = llvm.mlir.constant(0 : i32) : i32
+  // MLIR-NEXT:  llvm.store [[v2]], [[v1]] {{.*}}: i32, !llvm.ptr
+  // MLIR-NEXT:  [[v3:%.*]] = llvm.load [[v1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // MLIR-NEXT:  llvm.return [[v3]] : i32
+  // MLIR-NEXT: ^bb2:  // no predecessors
+  // MLIR-NEXT:  [[v4:%.*]] = llvm.load [[v1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // MLIR-NEXT:  llvm.return [[v4]] : i32
+  // MLIR-NEXT: }
+
+  }
diff --git a/clang/test/CIR/Incubator/Lowering/select.cir b/clang/test/CIR/Incubator/Lowering/select.cir
new file mode 100644
index 0000000000000..71ca79a390e8b
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/select.cir
@@ -0,0 +1,48 @@
+// RUN: cir-translate -cir-to-llvmir --disable-cc-lowering -o %t.ll %s
+// RUN: FileCheck --input-file=%t.ll -check-prefix=LLVM %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @select_int(%arg0 : !cir.bool, %arg1 : !s32i, %arg2 : !s32i) -> !s32i {
+    %0 = cir.select if %arg0 then %arg1 else %arg2 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %0 : !s32i
+  }
+
+  //      LLVM: define i32 @select_int(i1 %[[#COND:]], i32 %[[#TV:]], i32 %[[#FV:]])
+  // LLVM-NEXT:   %[[#RES:]] = select i1 %[[#COND]], i32 %[[#TV]], i32 %[[#FV]]
+  // LLVM-NEXT:   ret i32 %[[#RES]]
+  // LLVM-NEXT: }
+
+  cir.func @select_bool(%arg0 : !cir.bool, %arg1 : !cir.bool, %arg2 : !cir.bool) -> !cir.bool {
+    %0 = cir.select if %arg0 then %arg1 else %arg2 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %0 : !cir.bool
+  }
+
+  //      LLVM: define i1 @select_bool(i1 %[[#COND:]], i1 %[[#TV:]], i1 %[[#FV:]])
+  // LLVM-NEXT:   %[[#RES:]] = select i1 %[[#COND]], i1 %[[#TV]], i1 %[[#FV]]
+  // LLVM-NEXT:   ret i1 %[[#RES]]
+  // LLVM-NEXT: }
+
+  cir.func @logical_and(%arg0 : !cir.bool, %arg1 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.select if %arg0 then %arg1 else %0 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %1 : !cir.bool
+  }
+
+  //      LLVM: define i1 @logical_and(i1 %[[#ARG0:]], i1 %[[#ARG1:]])
+  // LLVM-NEXT:   %[[#RES:]] = and i1 %[[#ARG0]], %[[#ARG1]]
+  // LLVM-NEXT:   ret i1 %[[#RES]]
+  // LLVM-NEXT: }
+
+  cir.func @logical_or(%arg0 : !cir.bool, %arg1 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<true> : !cir.bool
+    %1 = cir.select if %arg0 then %0 else %arg1 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %1 : !cir.bool
+  }
+
+  //      LLVM: define i1 @logical_or(i1 %[[#ARG0:]], i1 %[[#ARG1:]])
+  // LLVM-NEXT:   %[[#RES:]] = or i1 %[[#ARG0]], %[[#ARG1]]
+  // LLVM-NEXT:   ret i1 %[[#RES]]
+  // LLVM-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/setjmp-longjmp.cir b/clang/test/CIR/Incubator/Lowering/setjmp-longjmp.cir
new file mode 100644
index 0000000000000..1a441ce494e08
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/setjmp-longjmp.cir
@@ -0,0 +1,37 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.ll
+// RUN: FileCheck %s --input-file=%t.ll -check-prefix=MLIR
+!s32i = !cir.int<s, 32>
+!p32  = !cir.ptr<!s32i>
+
+module {
+  // MLIR: module {
+  cir.func @test_setjmp(%arg0 : !p32) -> !s32i {
+
+    // MLIR:  llvm.func @test_setjmp([[ARG0:%.*]]: !llvm.ptr) -> i32
+    // MLIR-NEXT:    [[RET:%.*]] = llvm.call_intrinsic "llvm.eh.sjlj.setjmp"([[ARG0]]) : (!llvm.ptr) -> i32
+    // MLIR-NEXT:    llvm.return [[RET:%.*]] : i32
+    // MLIR-NEXT:  }
+    %0 = cir.eh.setjmp builtin %arg0 : (!p32) -> !s32i
+    cir.return %0 : !s32i
+  }
+  cir.func @test_setjmp_2(%arg0 : !p32) -> !s32i {
+
+    // MLIR:  llvm.func @test_setjmp_2([[ARG0:%.*]]: !llvm.ptr) -> i32
+    // MLIR-NEXT:    [[RET:%.*]] = llvm.call @_setjmp([[ARG0]]) : (!llvm.ptr) -> i32
+    // MLIR-NEXT:    llvm.return [[RET:%.*]] : i32
+    // MLIR-NEXT:  }
+    %0 = cir.eh.setjmp %arg0 : (!p32) -> !s32i
+    cir.return %0 : !s32i
+  }
+  cir.func @test_longjmp(%arg0 : !p32) {
+
+    // MLIR: llvm.func @test_longjmp([[ARG0:%.*]]: !llvm.ptr)
+    // MLIR-NEXT:    llvm.call_intrinsic "llvm.eh.sjlj.longjmp"([[ARG0]]) : (!llvm.ptr) -> ()
+    // MLIR-NEXT:    llvm.unreachable
+    // MLIR-NEXT:  }
+    cir.eh.longjmp %arg0 : !p32
+    cir.unreachable
+  }
+  // MLIR: }
+}
+
diff --git a/clang/test/CIR/Incubator/Lowering/shift.cir b/clang/test/CIR/Incubator/Lowering/shift.cir
new file mode 100644
index 0000000000000..f47d5955dcee2
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/shift.cir
@@ -0,0 +1,28 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u16i = !cir.int<u, 16>
+module {
+  cir.func @testShiftWithDifferentValueAndAmountTypes(%arg0: !s16i, %arg1: !s32i, %arg2: !s64i, %arg3: !u16i) {
+  // CHECK: testShiftWithDifferentValueAndAmountTypes
+
+    // Should allow shift with larger amount type.
+    %1 = cir.shift(left, %arg1: !s32i, %arg2 : !s64i) -> !s32i
+    // CHECK: %[[#CAST:]] = llvm.trunc %{{.+}} : i64 to i32
+    // CHECK: llvm.shl %{{.+}}, %[[#CAST]]  : i32
+
+    // Should allow shift with signed smaller amount type.
+    %2 = cir.shift(left, %arg1 : !s32i, %arg0 : !s16i) -> !s32i
+    // CHECK: %[[#CAST:]] = llvm.zext %{{.+}} : i16 to i32
+    // CHECK: llvm.shl %{{.+}}, %[[#CAST]]  : i32
+
+    // Should allow shift with unsigned smaller amount type.
+    %14 = cir.shift(left, %arg1 : !s32i, %arg3 : !u16i) -> !s32i
+    // CHECK: %[[#CAST:]] = llvm.zext %{{.+}} : i16 to i32
+    // CHECK: llvm.shl %{{.+}}, %[[#CAST]]  : i32
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/stack-save-restore.cir b/clang/test/CIR/Incubator/Lowering/stack-save-restore.cir
new file mode 100644
index 0000000000000..ad9dee66b53f5
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/stack-save-restore.cir
@@ -0,0 +1,19 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+
+!u8i = !cir.int<u, 8>
+
+module  {
+  cir.func @stack_save() {
+    %0 = cir.stack_save : !cir.ptr<!u8i>
+    cir.stack_restore %0 : !cir.ptr<!u8i>
+    cir.return
+  }
+}
+
+//      MLIR: module {
+// MLIR-NEXT:  llvm.func @stack_save
+// MLIR-NEXT:    %0 = llvm.intr.stacksave : !llvm.ptr
+// MLIR-NEXT:    llvm.intr.stackrestore %0 : !llvm.ptr
+// MLIR-NEXT:    llvm.return
+// MLIR-NEXT:  }
+// MLIR-NEXT: }
diff --git a/clang/test/CIR/Incubator/Lowering/static-array.c b/clang/test/CIR/Incubator/Lowering/static-array.c
new file mode 100644
index 0000000000000..60cfce0245d69
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/static-array.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+int test(int x) {
+  static int arr[10] = {0, 1, 0, 0};
+  return arr[x];
+}
+// LLVM: internal global [10 x i32] [i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0]
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Lowering/store-memcpy.cpp b/clang/test/CIR/Incubator/Lowering/store-memcpy.cpp
new file mode 100644
index 0000000000000..1398ed9821028
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/store-memcpy.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o %t2.cir 2>&1 | FileCheck -check-prefix=AFTER %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+void foo() {
+  char s1[] = "Hello";
+}
+// AFTER-DAG:  cir.global "private" constant cir_private @__const._Z3foov.s1 = #cir.const_array<"Hello\00" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6>
+// AFTER: @_Z3foov
+// AFTER:    %[[S1:.*]] = cir.alloca !cir.array<!s8i x 6>, !cir.ptr<!cir.array<!s8i x 6>>, ["s1", init]
+// AFTER:    %[[HELLO:.*]] = cir.get_global @__const._Z3foov.s1 : !cir.ptr<!cir.array<!s8i x 6>>
+// AFTER:    cir.copy %[[HELLO]] to %[[S1]] : !cir.ptr<!cir.array<!s8i x 6>>
+// AFTER:    cir.return
+// AFTER:  }
+
+// LLVM: @__const._Z3foov.s1 = private constant [6 x i8] c"Hello\00"
+// LLVM: @_Z3foov()
+// LLVM:   %[[S1:.*]] = alloca [6 x i8], i64 1, align 1
+// FIXME: LLVM OG uses @llvm.memcpy.p0.p0.i64
+// LLVM:   call void @llvm.memcpy.p0.p0.i32(ptr %[[S1]], ptr @__const._Z3foov.s1, i32 6, i1 false)
+// LLVM:   ret void
diff --git a/clang/test/CIR/Incubator/Lowering/str.c b/clang/test/CIR/Incubator/Lowering/str.c
new file mode 100644
index 0000000000000..cc393daee884f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/str.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+void f(char *fmt, ...);
+void test() {
+    f("test\0");
+}
+
+// LLVM: @.str = {{.*}}[6 x i8] c"test\00\00"
diff --git a/clang/test/CIR/Incubator/Lowering/struct-init.c b/clang/test/CIR/Incubator/Lowering/struct-init.c
new file mode 100644
index 0000000000000..8be8f6ffc5c0f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/struct-init.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+struct S {
+    int x;
+};
+
+// LLVM: define dso_local void @zeroInit
+// LLVM: [[TMP0:%.*]] = alloca %struct.S, i64 1
+// LLVM: store %struct.S zeroinitializer, ptr [[TMP0]]
+void zeroInit() {
+  struct S s = {0};
+}
diff --git a/clang/test/CIR/Incubator/Lowering/struct.cir b/clang/test/CIR/Incubator/Lowering/struct.cir
new file mode 100644
index 0000000000000..832ce20861d85
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/struct.cir
@@ -0,0 +1,130 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s32i = !cir.int<s, 32>
+!u8i = !cir.int<u, 8>
+!u32i = !cir.int<u, 32>
+!rec_S = !cir.record<struct "S" {!u8i, !s32i}>
+!rec_S2A = !cir.record<struct "S2A" {!s32i} #cir.record.decl.ast>
+!rec_S1_ = !cir.record<struct "S1" {!s32i, !cir.float, !cir.ptr<!s32i>} #cir.record.decl.ast>
+!rec_S2_ = !cir.record<struct "S2" {!rec_S2A} #cir.record.decl.ast>
+!rec_S3_ = !cir.record<struct "S3" {!s32i} #cir.record.decl.ast>
+
+!struct_with_bool = !cir.record<struct "struct_with_bool" {!u32i, !cir.bool}>
+
+module {
+  cir.func @test() {
+    %1 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["x"] {alignment = 4 : i64}
+    // CHECK: %[[#ARRSIZE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#STRUCT:]] = llvm.alloca %[[#ARRSIZE]] x !llvm.struct<"struct.S", (i8, i32)>
+    %3 = cir.get_member %1[0] {name = "c"} : !cir.ptr<!rec_S> -> !cir.ptr<!u8i>
+    // CHECK: = llvm.getelementptr %[[#STRUCT]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.S", (i8, i32)>
+    %5 = cir.get_member %1[1] {name = "i"} : !cir.ptr<!rec_S> -> !cir.ptr<!s32i>
+    // CHECK: = llvm.getelementptr %[[#STRUCT]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.S", (i8, i32)>
+    cir.return
+  }
+
+  // CHECK-LABEL: @test_value
+  cir.func @test_value() {
+    %0 = cir.const #cir.const_record<{#cir.int<1> : !u8i, #cir.int<2> : !s32i}> : !rec_S
+    //      CHECK: %[[#v0:]] = llvm.mlir.undef : !llvm.struct<"struct.S", (i8, i32)>
+    // CHECK-NEXT: %[[#v1:]] = llvm.mlir.constant(1 : i8) : i8
+    // CHECK-NEXT: %[[#v2:]] = llvm.insertvalue %[[#v1]], %[[#v0]][0] : !llvm.struct<"struct.S", (i8, i32)>
+    // CHECK-NEXT: %[[#v3:]] = llvm.mlir.constant(2 : i32) : i32
+    // CHECK-NEXT: %[[#v4:]] = llvm.insertvalue %[[#v3]], %[[#v2]][1] : !llvm.struct<"struct.S", (i8, i32)>
+    %1 = cir.extract_member %0[0] : !rec_S -> !u8i
+    // CHECK-NEXT: %{{.+}} = llvm.extractvalue %[[#v4]][0] : !llvm.struct<"struct.S", (i8, i32)>
+    %2 = cir.extract_member %0[1] : !rec_S -> !s32i
+    // CHECK-NEXT: %{{.+}} = llvm.extractvalue %[[#v4]][1] : !llvm.struct<"struct.S", (i8, i32)>
+    cir.return
+  }
+
+  cir.func @shouldConstInitLocalStructsWithConstStructAttr() {
+    %0 = cir.alloca !rec_S2A, !cir.ptr<!rec_S2A>, ["s"] {alignment = 4 : i64}
+    %1 = cir.const #cir.const_record<{#cir.int<1> : !s32i}> : !rec_S2A
+    cir.store %1, %0 : !rec_S2A, !cir.ptr<!rec_S2A>
+    cir.return
+  }
+  // CHECK: llvm.func @shouldConstInitLocalStructsWithConstStructAttr()
+  // CHECK:   %0 = llvm.mlir.constant(1 : index) : i64
+  // CHECK:   %1 = llvm.alloca %0 x !llvm.struct<"struct.S2A", (i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+  // CHECK:   %2 = llvm.mlir.undef : !llvm.struct<"struct.S2A", (i32)>
+  // CHECK:   %3 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[0] : !llvm.struct<"struct.S2A", (i32)>
+  // CHECK:   llvm.store %4, %1 {{.*}}: !llvm.struct<"struct.S2A", (i32)>, !llvm.ptr
+  // CHECK:   llvm.return
+  // CHECK: }
+
+  // Should lower basic #cir.const_record initializer.
+  cir.global external @s1 = #cir.const_record<{#cir.int<1> : !s32i, #cir.fp<1.000000e-01> : !cir.float, #cir.ptr<null> : !cir.ptr<!s32i>}> : !rec_S1_
+  // CHECK: llvm.mlir.global external @s1() {addr_space = 0 : i32} : !llvm.struct<"struct.S1", (i32, f32, ptr)> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK:   %1 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %2 = llvm.insertvalue %1, %0[0] : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK:   %3 = llvm.mlir.constant(1.000000e-01 : f32) : f32
+  // CHECK:   %4 = llvm.insertvalue %3, %2[1] : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK:   %5 = llvm.mlir.zero : !llvm.ptr
+  // CHECK:   %6 = llvm.insertvalue %5, %4[2] : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK:   llvm.return %6 : !llvm.struct<"struct.S1", (i32, f32, ptr)>
+  // CHECK: }
+
+  // Should lower nested #cir.const_record initializer.
+  cir.global external @s2 = #cir.const_record<{#cir.const_record<{#cir.int<1> : !s32i}> : !rec_S2A}> : !rec_S2_
+  // CHECK: llvm.mlir.global external @s2() {addr_space = 0 : i32} : !llvm.struct<"struct.S2", (struct<"struct.S2A", (i32)>)> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.struct<"struct.S2", (struct<"struct.S2A", (i32)>)>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"struct.S2A", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"struct.S2A", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.struct<"struct.S2", (struct<"struct.S2A", (i32)>)>
+  // CHECK:   llvm.return %4 : !llvm.struct<"struct.S2", (struct<"struct.S2A", (i32)>)>
+  // CHECK: }
+
+  cir.global external @s3 = #cir.const_array<[#cir.const_record<{#cir.int<1> : !s32i}> : !rec_S3_, #cir.const_record<{#cir.int<2> : !s32i}> : !rec_S3_, #cir.const_record<{#cir.int<3> : !s32i}> : !rec_S3_]> : !cir.array<!rec_S3_ x 3>
+  // CHECK: llvm.mlir.global external @s3() {addr_space = 0 : i32} : !llvm.array<3 x struct<"struct.S3", (i32)>> {
+  // CHECK:   %0 = llvm.mlir.undef : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK:   %1 = llvm.mlir.undef : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %2 = llvm.mlir.constant(1 : i32) : i32
+  // CHECK:   %3 = llvm.insertvalue %2, %1[0] : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %4 = llvm.insertvalue %3, %0[0] : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK:   %5 = llvm.mlir.undef : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %6 = llvm.mlir.constant(2 : i32) : i32
+  // CHECK:   %7 = llvm.insertvalue %6, %5[0] : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %8 = llvm.insertvalue %7, %4[1] : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK:   %9 = llvm.mlir.undef : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %10 = llvm.mlir.constant(3 : i32) : i32
+  // CHECK:   %11 = llvm.insertvalue %10, %9[0] : !llvm.struct<"struct.S3", (i32)>
+  // CHECK:   %12 = llvm.insertvalue %11, %8[2] : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK:   llvm.return %12 : !llvm.array<3 x struct<"struct.S3", (i32)>>
+  // CHECK: }
+
+  cir.func @shouldLowerStructCopies() {
+  // CHECK: llvm.func @shouldLowerStructCopies()
+    %1 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["a"] {alignment = 4 : i64}
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#SA:]] = llvm.alloca %[[#ONE]] x !llvm.struct<"struct.S", (i8, i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+    %2 = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["b", init] {alignment = 4 : i64}
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : index) : i64
+    // CHECK: %[[#SB:]] = llvm.alloca %[[#ONE]] x !llvm.struct<"struct.S", (i8, i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+    cir.copy %1 to %2 : !cir.ptr<!rec_S>
+    // CHECK: %[[#SIZE:]] = llvm.mlir.constant(8 : i32) : i32
+    // CHECK: "llvm.intr.memcpy"(%[[#SB]], %[[#SA]], %[[#SIZE]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> ()
+    cir.return
+  }
+
+  // Verify that boolean fields are lowered to i8 and that the correct type is inserted during initialization.
+  cir.global external @struct_with_bool = #cir.const_record<{#cir.int<1> : !u32i, #cir.bool<false> : !cir.bool}> : !struct_with_bool
+  // CHECK: llvm.mlir.global external @struct_with_bool() {addr_space = 0 : i32} : !llvm.struct<"struct.struct_with_bool", (i32, i8)> {
+  // CHECK:  %[[FALSE:.+]] = llvm.mlir.constant(false) : i1
+  // CHECK-NEXT:  %[[FALSE_MEM:.+]] = llvm.zext %[[FALSE]] : i1 to i8
+  // CHECK-NEXT:  = llvm.insertvalue %[[FALSE_MEM]], %{{.+}}[1] : !llvm.struct<"struct.struct_with_bool", (i32, i8)>
+
+  cir.func @test_struct_with_bool() {
+    // CHECK-LABEL: llvm.func @test_struct_with_bool()
+    %0 = cir.alloca !struct_with_bool, !cir.ptr<!struct_with_bool>, ["a"] {alignment = 4 : i64}
+    %1 = cir.get_member %0[1] {name = "b"} : !cir.ptr<!struct_with_bool> -> !cir.ptr<!cir.bool>
+    // CHECK: %[[BOOL_MEMBER_PTR:.+]] = llvm.getelementptr %{{.*}}[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.struct_with_bool", (i32, i8)>
+    %2 = cir.load %1 : !cir.ptr<!cir.bool>, !cir.bool
+    // CHECK: = llvm.load %[[BOOL_MEMBER_PTR]] {{.*}} : !llvm.ptr -> i8
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/switch-while.c b/clang/test/CIR/Incubator/Lowering/switch-while.c
new file mode 100644
index 0000000000000..9123d55329607
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/switch-while.c
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s
+void func100();
+int f(int a, int cond) {
+  int b = 1; 
+  switch (a) 
+    while (1) {
+        b++;
+
+        default:
+            if (cond)
+                return a;
+
+            a = a + b;
+
+        case 2:
+            a++;
+
+        case 3:
+            continue;
+
+        case 5:
+            break;
+
+        case 100:
+            func100();
+  }
+
+  return a;
+}
+
+// CHECK: switch i32 %[[A:.+]], label %[[DEFAULT_BB:.+]] [
+// CHECK:   i32 2, label %[[TWO_BB:.+]]
+// CHECK:   i32 3, label %[[THREE_BB:.+]]
+// CHECK:   i32 5, label %[[FIVE_BB:.+]]
+// CHECK:   i32 100, label %[[HUNDRED_BB:.+]]
+// CHECK: ]
+//
+// CHECK: [[UNREACHABLE_BB:.+]]: {{.*}}; No predecessors!
+// 
+// CHECK: [[LOOP_ENTRY:.+]]:
+// CHECK: br label %[[LOOP_HEADER:.+]]
+//
+// CHECK: [[LOOP_HEADER]]:
+// CHECK:   add nsw i32 %{{.*}}, 1
+// CHECK: br label %[[DEFAULT_BB:.+]]
+//
+// CHECK: [[DEFAULT_BB]]:
+// CHECK:   br label %[[IF_BB:.+]]
+//
+// CHECK: [[IF_BB]]:
+// CHECK:   %[[CMP:.+]] = icmp ne i32 %[[COND:.+]], 0
+// CHECK:   br i1 %[[CMP]], label %[[IF_TRUE_BB:.+]], label %[[IF_FALSE_BB:.+]]
+//
+// CHECK: [[IF_TRUE_BB]]:
+// CHECK:   ret
+//
+// CHECK: [[IF_FALSE_BB]]:
+// CHECK:   %[[V1:.+]] = load i32
+// CHECK:   %[[V2:.+]] = load i32
+// CHECK:   add nsw i32 %[[V1]], %[[V2]]
+//
+// CHECK: [[TWO_BB]]:
+// CHECK:   add nsw i32 %{{.*}}, 1
+// CHECK:   br label %[[FALLTHOUGH_BB:.+]]
+//
+// CHECK: [[FALLTHOUGH_BB]]:
+// CHECK:   br label %[[LOOP_HEADER]]
+//
+// CHECK: [[FIVE_BB]]:
+// CHECK:   br label %[[LOOP_EXIT_BB:.+]]
+//
+// CHECK: [[HUNDRED_BB]]:
+// CHECK:   call {{.*}}@func100()
+// CHECK:   br label %[[CONTINUE_BB:.+]]
+//
+// CHECK: [[CONTINUE_BB]]:
+// CHECK:  br label %[[LOOP_HEADER]]
+//
+// CHECK: [[LOOP_EXIT_BB]]:
+// CHECK:   br label %[[RET_BB:.+]]
+//
+// CHECK: [[RET_BB]]:
+// CHECK:   ret
diff --git a/clang/test/CIR/Incubator/Lowering/switch.cir b/clang/test/CIR/Incubator/Lowering/switch.cir
new file mode 100644
index 0000000000000..9434b7337f7ed
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/switch.cir
@@ -0,0 +1,190 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module {
+  cir.func @shouldLowerSwitchWithDefault(%arg0: !s8i) {
+    cir.switch (%arg0 : !s8i) {
+    // CHECK: llvm.switch %arg0 : i8, ^bb[[#DEFAULT:]] [
+    // CHECK:   1: ^bb[[#CASE1:]]
+    // CHECK: ]
+    cir.case (equal, [#cir.int<1> : !s8i]) {
+      cir.break
+    }
+    // CHECK: ^bb[[#CASE1]]:
+    // CHECK:   llvm.br ^bb[[#EXIT:]]
+    cir.case (default, []) {
+      cir.break
+    }
+    // CHECK: ^bb[[#DEFAULT]]:
+    // CHECK:   llvm.br ^bb[[#EXIT]]
+    cir.yield
+    }
+    // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldLowerSwitchWithoutDefault(%arg0: !s32i) {
+    cir.switch (%arg0 : !s32i) {
+    // Default block is the exit block:
+    // CHECK: llvm.switch %arg0 : i32, ^bb[[#EXIT:]] [
+    // CHECK:   1: ^bb[[#CASE1:]]
+    // CHECK: ]
+    cir.case (equal, [#cir.int<1> : !s32i]) {
+      cir.break
+    }
+    // CHECK: ^bb[[#CASE1]]:
+    // CHECK:   llvm.br ^bb[[#EXIT]]
+    cir.yield
+    }
+    // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldLowerSwitchWithImplicitFallthrough(%arg0: !s64i) {
+    cir.switch (%arg0 : !s64i) {
+    // CHECK: llvm.switch %arg0 : i64, ^bb[[#EXIT:]] [
+    // CHECK:   1: ^bb[[#CASE1N2:]],
+    // CHECK:   2: ^bb[[#CASE1N2]]
+    // CHECK: ]
+    cir.case (anyof, [#cir.int<1> : !s64i, #cir.int<2> : !s64i]) { // case 1 and 2 use same region
+      cir.break
+    }
+    // CHECK: ^bb[[#CASE1N2]]:
+    // CHECK:   llvm.br ^bb[[#EXIT]]
+    cir.yield
+    }
+    // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldLowerSwitchWithExplicitFallthrough(%arg0: !s64i) {
+      cir.switch (%arg0 : !s64i) {
+      // CHECK: llvm.switch %arg0 : i64, ^bb[[#EXIT:]] [
+      // CHECK:   1: ^bb[[#CASE1:]],
+      // CHECK:   2: ^bb[[#CASE2:]]
+      // CHECK: ]
+      cir.case (equal, [#cir.int<1> : !s64i]) { // case 1 has its own region
+        cir.yield // fallthrough to case 2
+      }
+      // CHECK: ^bb[[#CASE1]]:
+      // CHECK:   llvm.br ^bb[[#CASE2]]
+      cir.case (equal, [#cir.int<2> : !s64i]) {
+        cir.break
+      }
+      // CHECK: ^bb[[#CASE2]]:
+      // CHECK:   llvm.br ^bb[[#EXIT]]
+      cir.yield
+      }
+      // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldLowerSwitchWithFallthroughToExit(%arg0: !s64i) {
+      cir.switch (%arg0 : !s64i) {
+      // CHECK: llvm.switch %arg0 : i64, ^bb[[#EXIT:]] [
+      // CHECK:   1: ^bb[[#CASE1:]]
+      // CHECK: ]
+       cir.case (equal, [#cir.int<1> : !s64i]) {
+        cir.yield // fallthrough to exit
+      }
+      // CHECK: ^bb[[#CASE1]]:
+      // CHECK:   llvm.br ^bb[[#EXIT]]
+      cir.yield
+      }
+      // CHECK: ^bb[[#EXIT]]:
+    cir.return
+  }
+
+
+  cir.func @shouldDropEmptySwitch(%arg0: !s64i) {
+    cir.switch (%arg0 : !s64i) {
+      cir.yield
+    }
+    // CHECK-NOT: llvm.switch
+    cir.return
+  }
+
+  cir.func @shouldLowerMultiBlockCase(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%1 : !s32i) {
+      cir.case (equal, [#cir.int<3> : !s32i]) {
+        cir.return
+      ^bb1:  // no predecessors
+        cir.break
+      }
+      cir.yield
+      }
+    }
+    cir.return
+  }
+  // CHECK: llvm.func @shouldLowerMultiBlockCase
+  // CHECK: ^bb1:  // pred: ^bb0
+  // CHECK:   llvm.switch {{.*}} : i32, ^[[DEFAULT_BB:.+]] [
+  // CHECK:     3: ^[[DIRECTLY_RET_BB:.+]]
+  // CHECK:   ]
+  // CHECK: ^[[DIRECTLY_RET_BB]]:
+  // CHECK:   llvm.return
+  // CHECK: ^[[DEFAULT_BB:.+]]:
+  // CHECK:   llvm.br ^[[RET_BB:.+]]
+  // CHECK: ^[[RET_BB:.+]]:  // pred: ^[[DEFAULT_BB:.+]]
+  // CHECK:   llvm.return
+  // CHECK: }
+
+  cir.func @shouldLowerNestedBreak(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%5 : !s32i) {
+      cir.case (equal, [#cir.int<0> : !s32i]) {
+        cir.scope {
+          %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+          %7 = cir.const #cir.int<0> : !s32i
+          %8 = cir.cmp(ge, %6, %7) : !s32i, !cir.bool
+          cir.if %8 {
+            cir.break
+          }
+        }
+        cir.break
+      }
+      cir.yield
+      }
+    }
+    %3 = cir.const #cir.int<3> : !s32i
+    cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.return %4 : !s32i
+  }
+  // CHECK:  llvm.func @shouldLowerNestedBreak
+  // CHECK:    llvm.switch %6 : i32, ^[[DEFAULT_BB:.+]] [
+  // CHECK:      0: ^[[ZERO_BB:.+]]
+  // CHECK:    ]
+  // CHECK:  ^[[ZERO_BB]]:
+  // CHECK:    llvm.br ^[[ZERO_BB_SUCC:.+]]
+  // CHECK:  ^[[ZERO_BB_SUCC]]:  // pred: ^[[ZERO_BB:]]
+  // CHECK:    llvm.cond_br {{%.*}}, ^[[DEFAULT_BB_PRED1:.+]], ^[[DEFAULT_BB_PRED12:.+]]
+  // CHECK:  ^[[DEFAULT_BB_PRED1]]:  // pred: ^[[ZERO_BB_SUCC]]
+  // CHECK:    llvm.br ^[[DEFAULT_BB]]
+  // CHECK:  ^[[DEFAULT_BB_PRED12]]:  // pred: ^[[ZERO_BB_SUCC]]
+  // CHECK:    llvm.br ^[[DEFAULT_BB_PRED1:.+]]
+  // CHECK:  ^[[DEFAULT_BB_PRED1]]:  // pred: ^[[DEFAULT_BB_PRED12]]
+  // CHECK:    llvm.br ^[[DEFAULT_BB]]
+  // CHECK:  ^[[DEFAULT_BB]]:
+  // CHECK:    llvm.br ^[[RET_BB:.+]]
+  // CHECK:  ^[[RET_BB]]:  // pred: ^[[DEFAULT_BB]]
+  // CHECK:    llvm.return
+}
diff --git a/clang/test/CIR/Incubator/Lowering/syncscope.cir b/clang/test/CIR/Incubator/Lowering/syncscope.cir
new file mode 100644
index 0000000000000..c42a2094cc8c5
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/syncscope.cir
@@ -0,0 +1,29 @@
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering -o -  | FileCheck %s -check-prefix=LLVM
+
+!s32i = !cir.int<s, 32>
+#fn_attr = #cir<extra({nothrow = #cir.nothrow})>
+module {
+  cir.func no_inline optnone @test(%ptr: !cir.ptr<!s32i>, %expected: !s32i, %desired: !s32i) -> !cir.bool extra(#fn_attr) {
+    %old, %cmp = cir.atomic.cmp_xchg(%ptr : !cir.ptr<!s32i>, %expected : !s32i, %desired : !s32i, success = acquire, failure = acquire) syncscope(single_thread) align(4) : (!s32i, !cir.bool)
+    cir.return %cmp: !cir.bool
+  }
+
+  cir.func no_inline @load(%ptr: !cir.ptr<!s32i>) -> !s32i extra(#fn_attr) {
+    %val = cir.load syncscope(single_thread) atomic(relaxed) %ptr : !cir.ptr<!s32i>, !s32i
+    cir.return %val : !s32i
+  }
+
+
+  // System scope should not materialize a syncscope attribute.
+  cir.func no_inline @system_load(%ptr: !cir.ptr<!s32i>) -> !s32i extra(#fn_attr) {
+    %val = cir.load atomic(seq_cst) %ptr : !cir.ptr<!s32i>, !s32i
+    cir.return %val : !s32i
+  }
+
+}
+
+// LLVM: {{%.*}} = cmpxchg ptr {{%.*}}, i32 {{%.*}}, i32 {{%.*}} syncscope("singlethread") acquire acquire, align 4
+// LLVM: load atomic i32, ptr {{%.*}} syncscope("singlethread") monotonic, align 4
+// LLVM-LABEL: @system_load
+// LLVM: load atomic i32, ptr {{%.*}} seq_cst, align 4
+// LLVM-NOT: syncscope(
diff --git a/clang/test/CIR/Incubator/Lowering/ternary.cir b/clang/test/CIR/Incubator/Lowering/ternary.cir
new file mode 100644
index 0000000000000..24307daea4e3e
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/ternary.cir
@@ -0,0 +1,111 @@
+// RUN: cir-opt %s -cir-to-llvm -reconcile-unrealized-casts -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+!rec_A = !cir.record<union "A" {!s32i}>
+!rec_anon2E0 = !cir.record<union "anon.0" {!rec_A} #cir.record.decl.ast>
+
+module {
+cir.func @_Z1xi(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+    %5 = cir.ternary(%4, true {
+      %7 = cir.const #cir.int<3> : !s32i
+      cir.yield %7 : !s32i
+    }, false {
+      %7 = cir.const #cir.int<5> : !s32i
+      cir.yield %7 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %6 : !s32i
+  }
+
+//      MLIR:  llvm.func @_Z1xi(%arg0: i32) -> i32
+// MLIR-NEXT:    %0 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:    %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:    %2 = llvm.mlir.constant(1 : index) : i64
+// MLIR-NEXT:    %3 = llvm.alloca %2 x i32 {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR-NEXT:    llvm.store %arg0, %1 {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:    %4 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR-NEXT:    %5 = llvm.mlir.constant(0 : i32) : i32
+// MLIR-NEXT:    %6 = llvm.icmp "sgt" %4, %5 : i32
+// MLIR-NEXT:    llvm.cond_br %6, ^bb1, ^bb2
+// MLIR-NEXT:  ^bb1:  // pred: ^bb0
+// MLIR-NEXT:    %7 = llvm.mlir.constant(3 : i32) : i32
+// MLIR-NEXT:    llvm.br ^bb3(%7 : i32)
+// MLIR-NEXT:  ^bb2:  // pred: ^bb0
+// MLIR-NEXT:    %8 = llvm.mlir.constant(5 : i32) : i32
+// MLIR-NEXT:    llvm.br ^bb3(%8 : i32)
+// MLIR-NEXT:  ^bb3(%9: i32):  // 2 preds: ^bb1, ^bb2
+// MLIR-NEXT:    llvm.store %9, %3 {{.*}}: i32, !llvm.ptr
+// MLIR-NEXT:    %10 = llvm.load %3 {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR-NEXT:    llvm.return %10 : i32
+// MLIR-NEXT:  }
+
+cir.global external dso_local @a = #cir.zero : !cir.array<!rec_anon2E0 x 1> {alignment = 4 : i64}
+cir.func no_proto dso_local @B() -> !rec_A {
+  %0 = cir.alloca !rec_A, !cir.ptr<!rec_A>, ["__retval"] {alignment = 4 : i64}
+  %1 = cir.get_global @a : !cir.ptr<!cir.array<!rec_anon2E0 x 1>>
+  %2 = cir.cast bitcast %1 : !cir.ptr<!cir.array<!rec_anon2E0 x 1>> -> !cir.ptr<!cir.array<!rec_anon2E0 x 0>>
+  %3 = cir.get_global @a : !cir.ptr<!cir.array<!rec_anon2E0 x 1>>
+  %4 = cir.cast bitcast %3 : !cir.ptr<!cir.array<!rec_anon2E0 x 1>> -> !cir.ptr<!cir.array<!rec_anon2E0 x 0>>
+  %5 = cir.cast array_to_ptrdecay %4 : !cir.ptr<!cir.array<!rec_anon2E0 x 0>> -> !cir.ptr<!rec_anon2E0>
+  %6 = cir.const #cir.ptr<null> : !cir.ptr<!rec_anon2E0>
+  %7 = cir.cmp(lt, %5, %6) : !cir.ptr<!rec_anon2E0>, !cir.bool
+  %8 = cir.cast bool_to_int %7 : !cir.bool -> !s32i
+  %9 = cir.ternary(%7, true {
+    cir.yield %8 : !s32i
+  }, false {
+    %14 = cir.get_global @a : !cir.ptr<!cir.array<!rec_anon2E0 x 1>>
+    %15 = cir.cast bitcast %14 : !cir.ptr<!cir.array<!rec_anon2E0 x 1>> -> !cir.ptr<!cir.array<!rec_anon2E0 x 0>>
+    %16 = cir.const #cir.int<0> : !s32i
+    %17 = cir.get_element %15[%16] : (!cir.ptr<!cir.array<!rec_anon2E0 x 0>>, !s32i) -> !cir.ptr<!rec_anon2E0>
+    %18 = cir.get_member %17[0] {name = "B"} : !cir.ptr<!rec_anon2E0> -> !cir.ptr<!rec_A>
+    %19 = cir.get_member %18[0] {name = "A"} : !cir.ptr<!rec_A> -> !cir.ptr<!s32i>
+    %20 = cir.load align(4) %19 : !cir.ptr<!s32i>, !s32i
+    cir.yield %20 : !s32i
+  }) : (!cir.bool) -> !s32i
+  %10 = cir.const #cir.int<0> : !s32i
+  %11 = cir.binop(sub, %9, %10) nsw : !s32i
+  %12 = cir.get_element %2[%11] : (!cir.ptr<!cir.array<!rec_anon2E0 x 0>>, !s32i) -> !cir.ptr<!rec_anon2E0>
+  %13 = cir.load %0 : !cir.ptr<!rec_A>, !rec_A
+  cir.return %13 : !rec_A
+}
+// MLIR-LABEL:   llvm.func @B() -> !llvm.struct<"union.A", (i32)>
+// MLIR:           %[[VAL_0:.*]] = llvm.mlir.constant(1 : index) : i64
+// MLIR:           %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.struct<"union.A", (i32)> {alignment = 4 : i64} : (i64) -> !llvm.ptr
+// MLIR:           %[[VAL_2:.*]] = llvm.mlir.addressof @a : !llvm.ptr
+// MLIR:           %[[VAL_3:.*]] = llvm.bitcast %[[VAL_2]] : !llvm.ptr to !llvm.ptr
+// MLIR:           %[[VAL_4:.*]] = llvm.mlir.addressof @a : !llvm.ptr
+// MLIR:           %[[VAL_5:.*]] = llvm.bitcast %[[VAL_4]] : !llvm.ptr to !llvm.ptr
+// MLIR:           %[[VAL_6:.*]] = llvm.getelementptr %[[VAL_5]][0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"union.anon.0", (struct<"union.A", (i32)>)>
+// MLIR:           %[[VAL_7:.*]] = llvm.mlir.zero : !llvm.ptr
+// MLIR:           %[[VAL_8:.*]] = llvm.icmp "ult" %[[VAL_6]], %[[VAL_7]] : !llvm.ptr
+// MLIR:           %[[VAL_9:.*]] = llvm.zext %[[VAL_8]] : i1 to i32
+// MLIR:           llvm.cond_br %[[VAL_8]], ^bb1, ^bb2
+// MLIR:         ^bb1:
+// MLIR:           llvm.br ^bb3(%[[VAL_9]] : i32)
+// MLIR:         ^bb2:
+// MLIR:           %[[VAL_10:.*]] = llvm.mlir.addressof @a : !llvm.ptr
+// MLIR:           %[[VAL_11:.*]] = llvm.bitcast %[[VAL_10]] : !llvm.ptr to !llvm.ptr
+// MLIR:           %[[VAL_12:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR:           %[[VAL_13:.*]] = llvm.sext %[[VAL_12]] : i32 to i64
+// MLIR:           %[[VAL_14:.*]] = llvm.getelementptr %[[VAL_11]][0, %[[VAL_13]]] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<0 x struct<"union.anon.0", (struct<"union.A", (i32)>)>>
+// MLIR:           %[[VAL_15:.*]] = llvm.bitcast %[[VAL_14]] : !llvm.ptr to !llvm.ptr
+// MLIR:           %[[VAL_16:.*]] = llvm.bitcast %[[VAL_15]] : !llvm.ptr to !llvm.ptr
+// MLIR:           %[[VAL_17:.*]] = llvm.load %[[VAL_16]] {alignment = 4 : i64} : !llvm.ptr -> i32
+// MLIR:           llvm.br ^bb3(%[[VAL_17]] : i32)
+// MLIR:         ^bb3(%[[VAL_18:.*]]: i32):
+// MLIR:           %[[VAL_19:.*]] = llvm.mlir.constant(0 : i32) : i32
+// MLIR:           %[[VAL_20:.*]] = llvm.sub %[[VAL_18]], %[[VAL_19]] overflow<nsw> : i32
+// MLIR:           %[[VAL_21:.*]] = llvm.sext %[[VAL_20]] : i32 to i64
+// MLIR:           %[[VAL_22:.*]] = llvm.getelementptr %[[VAL_3]][0, %[[VAL_21]]] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<0 x struct<"union.anon.0", (struct<"union.A", (i32)>)>>
+// MLIR:           %[[VAL_23:.*]] = llvm.load %[[VAL_1]] {alignment = 4 : i64} : !llvm.ptr -> !llvm.struct<"union.A", (i32)>
+// MLIR:           llvm.return %[[VAL_23]] : !llvm.struct<"union.A", (i32)>
+// MLIR:         }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/try-catch.cpp b/clang/test/CIR/Incubator/Lowering/try-catch.cpp
new file mode 100644
index 0000000000000..cae5a6957e1e2
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/try-catch.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -fno-clangir-call-conv-lowering -emit-cir-flat %s -o %t.flat.cir
+// RUN: FileCheck --input-file=%t.flat.cir --check-prefix=CIR_FLAT %s
+// RUN_DISABLED: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -mconstructor-aliases -fclangir -emit-llvm %s -o %t.ll
+// RUN_DISABLED: FileCheck --input-file=%t.flat.cir --check-prefix=CIR_LLVM %s
+double division(int a, int b);
+
+// CIR: cir.func {{.*}} @_Z2tcv()
+// CIR_FLAT: cir.func {{.*}} @_Z2tcv()
+unsigned long long tc() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  // CIR_FLAT:     cir.alloca !s32i, !cir.ptr<!s32i>, ["a"
+  // CIR_FLAT:     cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>, ["msg"]
+  // CIR_FLAT:     cir.alloca !s32i, !cir.ptr<!s32i>, ["idx"]
+  // CIR_FLAT:     cir.br ^bb2
+  try {
+    // CIR_FLAT:   ^bb2:  // pred: ^bb1
+    // CIR_FLAT:     cir.try_call @_Z8divisionii({{.*}}) ^[[CONT:.*]], ^[[LPAD:.*]] : (!s32i, !s32i)
+    int a = 4;
+    z = division(x, y);
+
+    // CIR_FLAT: ^[[CONT:.*]]:  // pred: ^bb2
+    // CIR_FLAT: cir.cast float_to_int %12 : !cir.double -> !u64i
+    a++;
+    // CIR_FLAT: cir.br ^[[AFTER_TRY:.*]] loc
+
+    // CIR_FLAT: ^[[LPAD]]:  // pred: ^bb2
+    // CIR_FLAT:   %[[EH:.*]], %[[SEL:.*]] = cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+    // CIR_FLAT:   cir.br ^[[BB_INT_IDX_SEL:.*]](%[[EH]], %[[SEL]] : {{.*}}) loc
+  } catch (int idx) {
+    // CIR_FLAT: ^[[BB_INT_IDX_SEL]](%[[INT_IDX_EH:.*]]: !cir.ptr<!void> loc({{.*}}), %[[INT_IDX_SEL:.*]]: !u32i
+    // CIR_FLAT:   %[[INT_IDX_ID:.*]] = cir.eh.typeid @_ZTIi
+    // CIR_FLAT:   %[[MATCH_CASE_INT_IDX:.*]] = cir.cmp(eq, %[[INT_IDX_SEL]], %[[INT_IDX_ID]]) : !u32i, !cir.bool
+    // CIR_FLAT:   cir.brcond %[[MATCH_CASE_INT_IDX]] ^[[BB_INT_IDX_CATCH:.*]](%[[INT_IDX_EH]] : {{.*}}), ^[[BB_CHAR_MSG_CMP:.*]](%[[INT_IDX_EH]], %[[INT_IDX_SEL]] : {{.*}}) loc
+    // CIR_FLAT: ^[[BB_INT_IDX_CATCH]](%[[INT_IDX_CATCH_SLOT:.*]]: !cir.ptr<!void>
+    // CIR_FLAT:   %[[PARAM_INT_IDX:.*]] = cir.catch_param begin %[[INT_IDX_CATCH_SLOT]] -> !cir.ptr<!s32i>
+    // CIR_FLAT:   cir.const #cir.int<98>
+    // CIR_FLAT:   cir.br ^[[AFTER_TRY]] loc
+    z = 98;
+    idx++;
+  } catch (const char* msg) {
+    // CIR_FLAT: ^[[BB_CHAR_MSG_CMP]](%[[CHAR_MSG_EH:.*]]: !cir.ptr<!void> loc({{.*}}), %[[CHAR_MSG_SEL:.*]]: !u32i
+    // CIR_FLAT:   %[[CHAR_MSG_ID:.*]] = cir.eh.typeid @_ZTIPKc
+    // CIR_FLAT:   %[[MATCH_CASE_CHAR_MSG:.*]] = cir.cmp(eq, %[[CHAR_MSG_SEL]], %[[CHAR_MSG_ID]])
+    // CIR_FLAT:   cir.brcond %[[MATCH_CASE_CHAR_MSG]] ^[[BB_CHAR_MSG_CATCH:.*]](%[[CHAR_MSG_EH]] : {{.*}}), ^[[BB_RESUME:.*]](%[[CHAR_MSG_EH]], %[[CHAR_MSG_SEL]] : {{.*}}) loc
+    // CIR_FLAT: ^[[BB_CHAR_MSG_CATCH]](%[[CHAR_MSG_CATCH_SLOT:.*]]: !cir.ptr<!void>
+    // CIR_FLAT:   %[[PARAM_CHAR_MSG:.*]] = cir.catch_param begin %[[CHAR_MSG_CATCH_SLOT]] -> !cir.ptr<!s8i>
+    // CIR_FLAT:   cir.const #cir.int<99> : !s32i
+    // CIR_FLAT:   cir.br ^[[AFTER_TRY]] loc
+    z = 99;
+    (void)msg[0];
+  }
+  // CIR_FLAT: ^[[BB_RESUME]](%[[RESUME_EH:.*]]: !cir.ptr<!void> loc({{.*}}), %[[RESUME_SEL:.*]]: !u32i
+  // CIR_FLAT:   cir.resume.flat %[[RESUME_EH]], %[[RESUME_SEL]]
+
+  // CIR_FLAT: ^[[AFTER_TRY]]:
+  // CIR_FLAT: cir.load
+
+  return z;
+}
+
+// CIR_FLAT: cir.func {{.*}} @_Z3tc2v
+unsigned long long tc2() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    int a = 4;
+    z = division(x, y);
+    a++;
+  } catch (int idx) {
+    // CIR_FLAT: cir.eh.inflight_exception [@_ZTIi, @_ZTIPKc]
+    z = 98;
+    idx++;
+  } catch (const char* msg) {
+    z = 99;
+    (void)msg[0];
+  } catch (...) {
+    // CIR_FLAT:   cir.catch_param
+    // CIR_FLAT:   cir.const #cir.int<100> : !s32i
+    // CIR_FLAT:   cir.br ^[[AFTER_TRY:.*]] loc
+    // CIR_FLAT: ^[[AFTER_TRY]]:  // 4 preds
+    // CIR_FLAT:   cir.load
+    // CIR_FLAT:   cir.return
+    z = 100;
+  }
+
+  return z;
+}
+
+// CIR_FLAT: cir.func {{.*}} @_Z3tc3v
+unsigned long long tc3() {
+  int x = 50, y = 3;
+  unsigned long long z;
+
+  try {
+    z = division(x, y);
+  } catch (...) {
+    // CIR_FLAT:   cir.eh.inflight_exception loc
+    // CIR_FLAT:   cir.br ^[[CATCH_ALL:.*]]({{.*}} : {{.*}}) loc
+    // CIR_FLAT: ^[[CATCH_ALL]](%[[CATCH_ALL_EH:.*]]: !cir.ptr<!void>
+    // CIR_FLAT:   cir.catch_param begin %[[CATCH_ALL_EH]] -> !cir.ptr<!void>
+    // CIR_FLAT:   cir.const #cir.int<100> : !s32i
+    // CIR_FLAT:   cir.br ^[[AFTER_TRY:.*]] loc
+    // CIR_FLAT: ^[[AFTER_TRY]]:  // 2 preds
+    // CIR_FLAT:   cir.load
+    z = 100;
+  }
+
+  return z;
+}
diff --git a/clang/test/CIR/Incubator/Lowering/types.cir b/clang/test/CIR/Incubator/Lowering/types.cir
new file mode 100644
index 0000000000000..c195b09e1a934
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/types.cir
@@ -0,0 +1,18 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!void = !cir.void
+!u8i = !cir.int<u, 8>
+module {
+  cir.global external @testVTable = #cir.vtable<{#cir.const_array<[#cir.ptr<-8> : !cir.ptr<!u8i>]> : !cir.array<!cir.ptr<!u8i> x 1>}> : !cir.record<struct {!cir.array<!cir.ptr<!u8i> x 1>}>
+  // CHECK: llvm.mlir.constant(-8 : i64) : i64
+  // CHECK:  llvm.inttoptr %{{[0-9]+}} : i64 to !llvm.ptr
+  cir.func @testTypeLowering() {
+    // Should lower void pointers as opaque pointers.
+    %0 = cir.const #cir.ptr<null> : !cir.ptr<!void>
+    // CHECK: llvm.mlir.zero : !llvm.ptr
+    %1 = cir.const #cir.ptr<null> : !cir.ptr<!cir.ptr<!void>>
+    // CHECK: llvm.mlir.zero : !llvm.ptr
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/unary-inc-dec.cir b/clang/test/CIR/Incubator/Lowering/unary-inc-dec.cir
new file mode 100644
index 0000000000000..4dac6ac55318b
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/unary-inc-dec.cir
@@ -0,0 +1,63 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<2> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.unary(inc, %3) : !s32i, !s32i
+    cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.unary(dec, %5) : !s32i, !s32i
+    cir.store %6, %1 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+
+// MLIR: = llvm.mlir.constant(1 : i32)
+// MLIR: = llvm.add
+// MLIR: = llvm.mlir.constant(1 : i32)
+// MLIR: = llvm.sub
+
+// LLVM: = add i32 %[[#]], 1
+// LLVM: = sub i32 %[[#]], 1
+
+  cir.func @floatingPoint(%arg0: !cir.float, %arg1: !cir.double) {
+  // MLIR: llvm.func @floatingPoint
+    %0 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["f", init] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["d", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.float, !cir.ptr<!cir.float>
+    cir.store %arg1, %1 : !cir.double, !cir.ptr<!cir.double>
+
+    %2 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+    %3 = cir.unary(inc, %2) : !cir.float, !cir.float
+    cir.store %3, %0 : !cir.float, !cir.ptr<!cir.float>
+    // MLIR: %[[#F_ONE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
+    // MLIR: = llvm.fadd %[[#F_ONE]], %{{[0-9]+}}  : f32
+
+    %4 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+    %5 = cir.unary(dec, %4) : !cir.float, !cir.float
+    cir.store %5, %0 : !cir.float, !cir.ptr<!cir.float>
+    // MLIR: %[[#D_ONE:]] = llvm.mlir.constant(-1.000000e+00 : f32) : f32
+    // MLIR: = llvm.fadd %[[#D_ONE]], %{{[0-9]+}}  : f32
+
+    %6 = cir.load %1 : !cir.ptr<!cir.double>, !cir.double
+    %7 = cir.unary(inc, %6) : !cir.double, !cir.double
+    cir.store %7, %1 : !cir.double, !cir.ptr<!cir.double>
+    // MLIR: %[[#D_ONE:]] = llvm.mlir.constant(1.000000e+00 : f64) : f64
+    // MLIR: = llvm.fadd %[[#D_ONE]], %{{[0-9]+}}  : f64
+
+    %8 = cir.load %1 : !cir.ptr<!cir.double>, !cir.double
+    %9 = cir.unary(dec, %8) : !cir.double, !cir.double
+    cir.store %9, %1 : !cir.double, !cir.ptr<!cir.double>
+    // MLIR: %[[#D_ONE:]] = llvm.mlir.constant(-1.000000e+00 : f64) : f64
+    // MLIR: = llvm.fadd %[[#D_ONE]], %{{[0-9]+}}  : f64
+
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/unary-not.cir b/clang/test/CIR/Incubator/Lowering/unary-not.cir
new file mode 100644
index 0000000000000..dd8372b132ed5
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/unary-not.cir
@@ -0,0 +1,78 @@
+// RUN: cir-opt %s -cir-to-llvm -o - | FileCheck %s -check-prefix=MLIR
+// RUN: cir-translate %s -cir-to-llvmir --disable-cc-lowering  | FileCheck %s -check-prefix=LLVM
+!s32i = !cir.int<s, 32>
+module {
+    cir.func @foo() -> !s32i  {
+        %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+        %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+        %2 = cir.const #cir.int<1> : !s32i
+        cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+        %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+        %4 = cir.unary(not, %3) : !s32i, !s32i
+        cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+        %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        cir.return %5 : !s32i
+    }
+
+// MLIR: = llvm.load
+// MLIR: = llvm.mlir.constant(-1 : i32)
+// MLIR: = llvm.xor
+
+// LLVM: = xor i32 %[[#]], -1
+
+
+    cir.func @floatingPoint(%arg0: !cir.float, %arg1: !cir.double) {
+    // MLIR: llvm.func @floatingPoint
+        %0 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["f", init] {alignment = 4 : i64}
+        %1 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["d", init] {alignment = 8 : i64}
+        cir.store %arg0, %0 : !cir.float, !cir.ptr<!cir.float>
+        cir.store %arg1, %1 : !cir.double, !cir.ptr<!cir.double>
+        %2 = cir.load %0 : !cir.ptr<!cir.float>, !cir.float
+        %3 = cir.cast float_to_bool %2 : !cir.float -> !cir.bool
+        // MLIR: %[[#F_ZERO:]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
+        // MLIR: %[[#F_BOOL:]] = llvm.fcmp "une" %{{.+}}, %[[#F_ZERO]] : f32
+        %4 = cir.unary(not, %3) : !cir.bool, !cir.bool
+        // MLIR: %[[#F_ONE:]] = llvm.mlir.constant(true) : i1
+        // MLIR: = llvm.xor %[[#F_BOOL]], %[[#F_ONE]]  : i1
+        %5 = cir.load %1 : !cir.ptr<!cir.double>, !cir.double
+        %6 = cir.cast float_to_bool %5 : !cir.double -> !cir.bool
+        // MLIR: %[[#D_ZERO:]] = llvm.mlir.constant(0.000000e+00 : f64) : f64
+        // MLIR: %[[#D_BOOL:]] = llvm.fcmp "une" %{{.+}}, %[[#D_ZERO]] : f64
+        %7 = cir.unary(not, %6) : !cir.bool, !cir.bool
+        // MLIR: %[[#D_ONE:]] = llvm.mlir.constant(true) : i1
+        // MLIR: = llvm.xor %[[#D_BOOL]], %[[#D_ONE]]  : i1
+        cir.return
+    }
+
+    cir.func @CStyleValueNegation(%arg0: !s32i, %arg1: !cir.float) {
+    // MLIR: llvm.func @CStyleValueNegation
+        %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+        %3 = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["f", init] {alignment = 4 : i64}
+        cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+        cir.store %arg1, %3 : !cir.float, !cir.ptr<!cir.float>
+
+        %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %6 = cir.cast int_to_bool %5 : !s32i -> !cir.bool
+        %7 = cir.unary(not, %6) : !cir.bool, !cir.bool
+        %8 = cir.cast bool_to_int %7 : !cir.bool -> !s32i
+        // MLIR: %[[#INT:]] = llvm.load %{{.+}} : !llvm.ptr
+        // MLIR: %[[#IZERO:]] = llvm.mlir.constant(0 : i32) : i32
+        // MLIR: %[[#ICMP:]] = llvm.icmp "ne" %[[#INT]], %[[#IZERO]] : i32
+        // MLIR: %[[#IONE:]] = llvm.mlir.constant(true) : i1
+        // MLIR: %[[#IXOR:]] = llvm.xor %[[#ICMP]], %[[#IONE]]  : i1
+        // MLIR: = llvm.zext %[[#IXOR]] : i1 to i32
+
+        %17 = cir.load %3 : !cir.ptr<!cir.float>, !cir.float
+        %18 = cir.cast float_to_bool %17 : !cir.float -> !cir.bool
+        %19 = cir.unary(not, %18) : !cir.bool, !cir.bool
+        %20 = cir.cast bool_to_int %19 : !cir.bool -> !s32i
+        // MLIR: %[[#FLOAT:]] = llvm.load %{{.+}} : !llvm.ptr
+        // MLIR: %[[#FZERO:]] = llvm.mlir.constant(0.000000e+00 : f32) : f32
+        // MLIR: %[[#FCMP:]] = llvm.fcmp "une" %[[#FLOAT]], %[[#FZERO]] : f32
+        // MLIR: %[[#FONE:]] = llvm.mlir.constant(true) : i1
+        // MLIR: %[[#FXOR:]] = llvm.xor %[[#FCMP]], %[[#FONE]]  : i1
+        // MLIR: = llvm.zext %[[#FXOR]] : i1 to i32
+
+        cir.return
+    }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/unary-plus-minus.cir b/clang/test/CIR/Incubator/Lowering/unary-plus-minus.cir
new file mode 100644
index 0000000000000..cdb4d90fa854c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/unary-plus-minus.cir
@@ -0,0 +1,43 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+module {
+  cir.func @foo() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<2> : !s32i
+    cir.store %2, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i>
+
+    %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %4 = cir.unary(plus, %3) : !s32i, !s32i
+    cir.store %4, %0 : !s32i, !cir.ptr<!s32i>
+
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    %6 = cir.unary(minus, %5) : !s32i, !s32i
+    cir.store %6, %1 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+
+// MLIR: %[[#INPUT_PLUS:]] = llvm.load
+// MLIR: llvm.store %[[#INPUT_PLUS]]
+// MLIR: %[[#INPUT_MINUS:]] = llvm.load
+// MLIR: %[[ZERO:[a-z0-9_]+]] = llvm.mlir.constant(0 : i32)
+// MLIR: llvm.sub %[[ZERO]], %[[#INPUT_MINUS]]
+
+  cir.func @floatingPoints(%arg0: !cir.double) {
+  // MLIR: llvm.func @floatingPoints(%arg0: f64)
+    %0 = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["X", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !cir.double, !cir.ptr<!cir.double>
+    %1 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+    %2 = cir.unary(plus, %1) : !cir.double, !cir.double
+    // MLIR: llvm.store %arg0, %[[#F_PLUS:]] {{.*}}: f64, !llvm.ptr
+    // MLIR: %{{[0-9]}} = llvm.load %[[#F_PLUS]] {alignment = 8 : i64} : !llvm.ptr -> f64
+    %3 = cir.load %0 : !cir.ptr<!cir.double>, !cir.double
+    %4 = cir.unary(minus, %3) : !cir.double, !cir.double
+    // MLIR: %[[#F_MINUS:]] = llvm.load %{{[0-9]}} {alignment = 8 : i64} : !llvm.ptr -> f64
+    // MLIR: %{{[0-9]}} = llvm.fneg %[[#F_MINUS]] : f64
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/union-in-struct-init.c b/clang/test/CIR/Incubator/Lowering/union-in-struct-init.c
new file mode 100644
index 0000000000000..80b5ed32ad758
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/union-in-struct-init.c
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll 
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+typedef struct {
+  union {
+    int a;
+    long b;
+  };
+} S;
+
+S s = { .a = 1 };
+
+// LLVM: @s = global { { i32, [4 x i8] } } { { i32, [4 x i8] } { i32 1, [4 x i8] zeroinitializer } }
+
+typedef struct {
+  union {
+    int a;
+    long b;
+  };
+} S2;
+
+S2 s2 = { .b = 1 };
+
+// LLVM: @s2 = global { { i64 } } { { i64 } { i64 1 } }
+
+typedef struct {
+  union {
+    int a;
+    long b;
+    long double c;
+  };
+} S3;
+
+S3 s3 = { .a = 1 };
+
+// LLVM: @s3 = global { { i32, [12 x i8] } } { { i32, [12 x i8] } { i32 1, [12 x i8] zeroinitializer } }
+
+typedef struct {
+  int a, b, c, d;
+} T;
+
+typedef union {
+  struct {
+    int a;
+    long b;
+  };
+  T c;
+} S4;
+
+S4 s4 = {.c = {1, 2, 3, 4}};
+
+// LLVM: @s4 = global { %struct.T } { %struct.T { i32 1, i32 2, i32 3, i32 4 } }
diff --git a/clang/test/CIR/Incubator/Lowering/unions.cir b/clang/test/CIR/Incubator/Lowering/unions.cir
new file mode 100644
index 0000000000000..255321c509bf9
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/unions.cir
@@ -0,0 +1,43 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s
+
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+#true = #cir.bool<true> : !cir.bool
+!rec_U1_ = !cir.record<union "U1" {!cir.bool, !s16i, !s32i} #cir.record.decl.ast>
+!rec_U2_ = !cir.record<union "U2" {f64, !rec_U1_} #cir.record.decl.ast>
+!rec_U3_ = !cir.record<union "U3" {!s16i, !rec_U1_} #cir.record.decl.ast>
+module {
+  // Should lower union to struct with only the largest member.
+  cir.global external @u1 = #cir.zero : !rec_U1_
+  // CHECK: llvm.mlir.global external @u1(#llvm.zero) {addr_space = 0 : i32} : !llvm.struct<"union.U1", (i32)>
+
+  // Should recursively find the largest member if there are nested unions.
+  cir.global external @u2 = #cir.zero : !rec_U2_
+  cir.global external @u3 = #cir.zero : !rec_U3_
+  // CHECK: llvm.mlir.global external @u2(#llvm.zero) {addr_space = 0 : i32} : !llvm.struct<"union.U2", (f64)>
+  // CHECK: llvm.mlir.global external @u3(#llvm.zero) {addr_space = 0 : i32} : !llvm.struct<"union.U3", (struct<"union.U1", (i32)>)>
+
+  // CHECK: llvm.func @test
+  cir.func @test(%arg0: !cir.ptr<!rec_U1_>) {
+
+    // Should store directly to the union's base address.
+    %5 = cir.const #true
+    %6 = cir.get_member %arg0[0] {name = "b"} : !cir.ptr<!rec_U1_> -> !cir.ptr<!cir.bool>
+    cir.store %5, %6 : !cir.bool, !cir.ptr<!cir.bool>
+    // CHECK: %[[#TRUE:]] = llvm.mlir.constant(true) : i1
+    // The bitcast it just to bypass the type checker. It will be replaced by an opaque pointer.
+    // CHECK: %[[#ADDR:]] = llvm.bitcast %{{.+}} : !llvm.ptr
+    // CHECK: %[[#VAL:]] = llvm.zext %[[#TRUE]] : i1 to i8
+    // CHECK: llvm.store %[[#VAL]], %[[#ADDR]] {{.*}}: i8, !llvm.ptr
+
+    // Should load direclty from the union's base address.
+    %7 = cir.get_member %arg0[0] {name = "b"} : !cir.ptr<!rec_U1_> -> !cir.ptr<!cir.bool>
+    %8 = cir.load %7 : !cir.ptr<!cir.bool>, !cir.bool
+    // The bitcast it just to bypass the type checker. It will be replaced by an opaque pointer.
+    // CHECK: %[[#BASE:]] = llvm.bitcast %{{.+}} : !llvm.ptr
+    // CHECK: %{{.+}} = llvm.load %[[#BASE]] {alignment = 1 : i64} : !llvm.ptr -> i8
+
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/var-arg-x86_64.c b/clang/test/CIR/Incubator/Lowering/var-arg-x86_64.c
new file mode 100644
index 0000000000000..3c86881f7bd42
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/var-arg-x86_64.c
@@ -0,0 +1,210 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fno-clangir-call-conv-lowering %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s
+
+#include <stdarg.h>
+
+double f1(int n, ...) {
+  va_list valist;
+  va_start(valist, n);
+  double res = va_arg(valist, double);
+  va_end(valist);
+  return res;
+}
+
+// CHECK: [[VA_LIST_TYPE:%.+]] = type { i32, i32, ptr, ptr }
+
+// CHECK: define {{.*}}@f1
+// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]]
+// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
+// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]])
+// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
+// CHECK: [[FP_OFFSET_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 1
+// CHECK: [[FP_OFFSET:%.+]] = load {{.*}}, ptr [[FP_OFFSET_P]]
+// CHECK: [[COMPARED:%.+]] = icmp ule i32 {{.*}}, 160
+// CHECK: br i1 [[COMPARED]], label %[[THEN_BB:.+]], label %[[ELSE_BB:.+]]
+//
+// CHECK: [[THEN_BB]]:
+// CHECK:   [[UPDATED_FP_OFFSET:%.+]] = add i32 [[FP_OFFSET]], 8
+// CHECK:   store i32 [[UPDATED_FP_OFFSET]], ptr [[FP_OFFSET_P]]
+// CHECK:   br label %[[CONT_BB:.+]]
+//
+// CHECK: [[ELSE_BB]]:
+// CHECK:   [[OVERFLOW_ARG_AREA_ADDR:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2
+// CHECK:   [[OVERFLOW_ARG_AREA:%.+]] = load ptr, ptr [[OVERFLOW_ARG_AREA_ADDR]]
+// CHECK:   [[OVERFLOW_ARG_AREA_OFFSET:%.+]] = getelementptr {{.*}} [[OVERFLOW_ARG_AREA]], i64 8
+// CHECK:   store ptr [[OVERFLOW_ARG_AREA_OFFSET]], ptr [[OVERFLOW_ARG_AREA_ADDR]]
+// CHECK:   br label %[[CONT_BB]]
+//
+// CHECK: [[CONT_BB]]:
+// CHECK: [[VA_LIST3:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
+// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST3]])
+
+// CIR: cir.func {{.*}} @f1
+// CIR: [[VA_LIST_ALLOCA:%.+]] = cir.alloca !cir.array<!rec___va_list_tag x 1>,
+// CIR: [[RES:%.+]] = cir.alloca !cir.double, !cir.ptr<!cir.double>, ["res",
+// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast array_to_ptrdecay [[VA_LIST_ALLOCA]]
+// CIR: cir.va.start [[VASTED_VA_LIST]]
+// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast array_to_ptrdecay [[VA_LIST_ALLOCA]]
+// CIR: [[VAARG_RESULT:%.+]] = cir.scope
+// CIR: [[FP_OFFSET_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][1] {name = "fp_offset"}
+// CIR: [[FP_OFFSET:%.+]] = cir.load [[FP_OFFSET_P]]
+// CIR: [[OFFSET_CONSTANT:%.+]] = cir.const #cir.int<160>
+// CIR: [[CMP:%.+]] = cir.cmp(le, [[FP_OFFSET]], [[OFFSET_CONSTANT]])
+// CIR: cir.brcond [[CMP]] ^[[InRegBlock:.+]], ^[[InMemBlock:.+]] loc
+//
+// CIR: ^[[InRegBlock]]:
+// CIR: [[REG_SAVE_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][3] {name = "reg_save_area"}
+// CIR: [[REG_SAVE_AREA:%.+]] = cir.load [[REG_SAVE_AREA_P]]
+// CIR: [[UPDATED:%.+]] = cir.ptr_stride [[REG_SAVE_AREA]], [[FP_OFFSET]] : (!cir.ptr<!void>, !u32i) -> !cir.ptr<!void>
+// CIR: [[CONSTANT:%.+]] = cir.const #cir.int<8>
+// CIR: [[ADDED:%.+]] = cir.binop(add, [[FP_OFFSET]], [[CONSTANT]])
+// CIR: cir.store{{.*}} [[ADDED]], [[FP_OFFSET_P]]
+// CIR: cir.br ^[[ContBlock:.+]]([[UPDATED]]
+//
+// CIR: ^[[InMemBlock]]:
+// CIR: [[OVERFLOW_ARG_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][2] {name = "overflow_arg_area"}
+// CIR: [[OVERFLOW_ARG_AREA:%.+]] = cir.load [[OVERFLOW_ARG_AREA_P]]
+// CIR: [[OFFSET:%.+]] = cir.const #cir.int<8>
+// CIR: [[CASTED:%.+]] = cir.cast bitcast [[OVERFLOW_ARG_AREA]] : !cir.ptr<!void>
+// CIR: [[NEW_VALUE:%.+]] = cir.ptr_stride [[CASTED]], [[OFFSET]] : (!cir.ptr<!s8i>, !s32i) -> !cir.ptr<!s8i>
+// CIR: [[CASTED_P:%.+]] = cir.cast bitcast [[OVERFLOW_ARG_AREA_P]] : !cir.ptr<!cir.ptr<!void>>
+// CIR: cir.store [[NEW_VALUE]], [[CASTED_P]]
+// CIR: cir.br ^[[ContBlock]]([[OVERFLOW_ARG_AREA]]
+//
+// CIR: ^[[ContBlock]]([[ARG:.+]]: !cir.ptr
+// CIR: [[CASTED_ARG_P:%.+]] = cir.cast bitcast [[ARG]]
+// CIR: [[CASTED_ARG:%.+]] = cir.load align(16) [[CASTED_ARG_P]]
+// CIR: cir.yield [[CASTED_ARG]]
+//
+// CIR: cir.store{{.*}} [[VAARG_RESULT]], [[RES]]
+long double f2(int n, ...) {
+  va_list valist;
+  va_start(valist, n);
+  long double res = va_arg(valist, long double);
+  va_end(valist);
+  return res;
+}
+
+// CHECK: define {{.*}}@f2
+// CHECK: [[RESULT:%.+]] = alloca x86_fp80
+// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]]
+// CHECK: [[RES:%.+]] = alloca x86_fp80
+// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
+// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]])
+// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
+// CHECK: [[OVERFLOW_AREA_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2
+// CHECK: [[OVERFLOW_AREA:%.+]] = load ptr, ptr [[OVERFLOW_AREA_P]]
+// Ptr Mask Operations
+// CHECK: [[OVERFLOW_AREA_OFFSET_ALIGNED:%.+]] = getelementptr i8, ptr [[OVERFLOW_AREA]], i64 15
+// CHECK: [[PTR_MASKED:%.+]] = call ptr @llvm.ptrmask.{{.*}}.[[PTR_SIZE_INT:.*]](ptr [[OVERFLOW_AREA_OFFSET_ALIGNED]], [[PTR_SIZE_INT]] -16)
+// CHECK: [[OVERFLOW_AREA_NEXT:%.+]] = getelementptr i8, ptr [[PTR_MASKED]], i64 16
+// CHECK: store ptr [[OVERFLOW_AREA_NEXT]], ptr [[OVERFLOW_AREA_P]]
+// CHECK: [[VALUE:%.+]] = load x86_fp80, ptr [[PTR_MASKED]]
+// CHECK: store x86_fp80 [[VALUE]], ptr [[RES]]
+// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0
+// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST2]])
+// CHECK: [[VALUE2:%.+]] = load x86_fp80, ptr [[RES]]
+// CHECK: store x86_fp80 [[VALUE2]], ptr [[RESULT]]
+// CHECK: [[RETURN_VALUE:%.+]] = load x86_fp80, ptr [[RESULT]]
+// CHECK: ret x86_fp80 [[RETURN_VALUE]]
+
+// CIR: cir.func {{.*}} @f2
+// CIR: [[VA_LIST_ALLOCA:%.+]] = cir.alloca !cir.array<!rec___va_list_tag x 1>, !cir.ptr<!cir.array<!rec___va_list_tag x 1>>, ["valist"]
+// CIR: [[RES:%.+]] = cir.alloca !cir.long_double<!cir.f80>, !cir.ptr<!cir.long_double<!cir.f80>>, ["res"
+// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast array_to_ptrdecay [[VA_LIST_ALLOCA]]
+// CIR: cir.va.start [[VASTED_VA_LIST]]
+// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast array_to_ptrdecay [[VA_LIST_ALLOCA]]
+// CIR: [[OVERFLOW_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][2] {name = "overflow_arg_area"}
+// CIR-DAG: [[OVERFLOW_AREA:%.+]] = cir.load [[OVERFLOW_AREA_P]]
+// CIR-DAG: [[CASTED:%.+]] = cir.cast bitcast [[OVERFLOW_AREA]] : !cir.ptr<!void>
+// CIR-DAG: [[CONSTANT:%.+]] = cir.const #cir.int<15>
+// CIR-DAG: [[PTR_STRIDE:%.+]] = cir.ptr_stride [[CASTED]], [[CONSTANT]] : (!cir.ptr<!u8i>, !u32i) -> !cir.ptr<!u8i>
+// CIR-DAG: [[MINUS_ALIGN:%.+]] = cir.const #cir.int<-16>
+// CIR-DAG: [[ALIGNED:%.+]] = cir.ptr_mask([[PTR_STRIDE]], [[MINUS_ALIGN]]
+// CIR: [[ALIGN:%.+]] = cir.const #cir.int<16>
+// CIR: [[CAST_ALIGNED:%.+]] = cir.cast bitcast [[ALIGNED]] : !cir.ptr<!u8i> -> !cir.ptr<!cir.long_double<!cir.f80>>
+// CIR: [[CAST_ALIGNED_VALUE:%.+]] = cir.load [[CAST_ALIGNED]]
+// CIR: cir.store{{.*}} [[CAST_ALIGNED_VALUE]], [[RES]]
+// CIR. cir.via.end
+
+const char *f3(va_list args) {
+  return va_arg(args, const char *);
+}
+
+// CHECK: define{{.*}} @f3
+// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca ptr
+// ...
+// CHECK: [[VA_LIST:%.+]] = load {{.*}} [[VA_LIST_ALLOCA]]
+// CHECK: [[OFFSET_PTR:%.+]] = getelementptr {{.*}} [[VA_LIST]], i32 0, i32 0
+// CHECK: [[OFFSET:%.+]] = load {{.*}}, ptr [[OFFSET_PTR]]
+// CHECK: [[CMP:%.+]] = icmp ule i32 [[OFFSET]], 40
+// CHECK: br i1 [[CMP]], label %[[THEN_BB:.+]], label %[[ELSE_BB:.+]]
+//
+// CHECK: [[THEN_BB]]:
+// ...
+// CHECK:   [[NEW_OFFSET:%.+]] = add i32 [[OFFSET]], 8
+// CHECK:   store i32 [[NEW_OFFSET]], ptr [[OFFSET_PTR]]
+// CHECK:   br label %[[CONT_BB:.+]]
+//
+// CHECK: [[ELSE_BB]]:
+// ...
+// CHECK:   [[OVERFLOW_ARG_AREA_ADDR:%.+]] = getelementptr {{.*}} [[VA_LIST]], i32 0, i32 2
+// CHECK:   [[OVERFLOW_ARG_AREA:%.+]] = load ptr, ptr [[OVERFLOW_ARG_AREA_ADDR]]
+// CHECK:   [[OVERFLOW_ARG_AREA_OFFSET:%.+]] = getelementptr {{.*}} [[OVERFLOW_ARG_AREA]], i64 8
+// CHECK:   store ptr [[OVERFLOW_ARG_AREA_OFFSET]], ptr [[OVERFLOW_ARG_AREA_ADDR]]
+// CHECK:   br label %[[CONT_BB]]
+//
+// CHECK: [[CONT_BB]]:
+// ...
+// CHECK: ret
+
+// CIR-LABEL:   cir.func {{.*}} @f3(
+// CIR:           %[[VALIST_VAR:.*]] = cir.alloca !cir.ptr<!rec___va_list_tag>, !cir.ptr<!cir.ptr<!rec___va_list_tag>>, ["args", init] {alignment = 8 : i64}
+// CIR:           %[[VALIST:.*]] = cir.load align(8) %[[VALIST_VAR]] : !cir.ptr<!cir.ptr<!rec___va_list_tag>>, !cir.ptr<!rec___va_list_tag>
+// CIR:           %[[GP_OFFSET_PTR:.*]] = cir.get_member %[[VALIST]][0] {name = "gp_offset"} : !cir.ptr<!rec___va_list_tag> -> !cir.ptr<!u32i>
+// CIR:           %[[GP_OFFSET:.*]] = cir.load %[[GP_OFFSET_PTR]] : !cir.ptr<!u32i>, !u32i
+// CIR:           %[[VAL_6:.*]] = cir.const #cir.int<40> : !u32i
+// CIR:           %[[VAL_7:.*]] = cir.cmp(le, %[[GP_OFFSET]], %[[VAL_6]]) : !u32i, !cir.bool
+// CIR:           cir.brcond %[[VAL_7]]
+
+// CIR:           %[[REG_SAVE_AREA_PTR:.*]] = cir.get_member %[[VALIST]][3] {name = "reg_save_area"} : !cir.ptr<!rec___va_list_tag> -> !cir.ptr<!cir.ptr<!void>>
+// CIR:           %[[REG_SAVE_AREA:.*]] = cir.load %[[REG_SAVE_AREA_PTR]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:           %[[CUR_REG_SAVE_AREA:.*]] = cir.ptr_stride %[[REG_SAVE_AREA]], %[[GP_OFFSET]] : (!cir.ptr<!void>, !u32i) -> !cir.ptr<!void>
+// CIR:           %[[VAL_11:.*]] = cir.const #cir.int<8> : !u32i
+// CIR:           %[[NEW_REG_SAVE_AREA:.*]] = cir.binop(add, %[[GP_OFFSET]], %[[VAL_11]]) : !u32i
+// CIR:           cir.store %[[NEW_REG_SAVE_AREA]], %[[GP_OFFSET_PTR]] : !u32i, !cir.ptr<!u32i>
+// CIR:           cir.br ^[[CONT_BB:.*]](%[[CUR_REG_SAVE_AREA]] : !cir.ptr<!void>)
+
+// CIR:           %[[OVERFLOW_ARG_AREA_PTR:.*]] = cir.get_member %[[VALIST]][2] {name = "overflow_arg_area"} : !cir.ptr<!rec___va_list_tag> -> !cir.ptr<!cir.ptr<!void>>
+// CIR:           %[[OVERFLOW_ARG_AREA:.*]] = cir.load %[[OVERFLOW_ARG_AREA_PTR]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
+// CIR:           %[[VAL_15:.*]] = cir.const #cir.int<8> : !s32i
+// CIR:           %[[CUR_OVERFLOW_ARG_AREA:.*]] = cir.cast bitcast %[[OVERFLOW_ARG_AREA]] : !cir.ptr<!void> -> !cir.ptr<!s8i>
+// CIR:           %[[NEW_OVERFLOW_ARG_AREA:.*]] = cir.ptr_stride %[[CUR_OVERFLOW_ARG_AREA]], %[[VAL_15]] : (!cir.ptr<!s8i>, !s32i) -> !cir.ptr<!s8i>
+// CIR:           %[[VAL_18:.*]] = cir.cast bitcast %[[OVERFLOW_ARG_AREA_PTR]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!cir.ptr<!s8i>>
+// CIR:           cir.store %[[NEW_OVERFLOW_ARG_AREA]], %[[VAL_18]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CIR:           cir.br ^[[CONT_BB]](%[[OVERFLOW_ARG_AREA]] : !cir.ptr<!void>)
+
+// ...
+// CIR:           cir.return
+
+void f4(va_list args) {
+  for (; va_arg(args, int); );
+}
+// CIR-LABEL:   cir.func {{.*}} @f4
+// CIR:           cir.for : cond {
+// CIR:             %[[VALIST:.*]] = cir.load align(8) %[[VALIST_VAR]] : !cir.ptr<!cir.ptr<!rec___va_list_tag>>, !cir.ptr<!rec___va_list_tag>
+// CIR:             %[[VAARG_RESULT:.*]] = cir.scope {
+//                    ... // The contents are tested elsewhere.
+// CIR:               cir.yield {{.*}} : !s32i
+// CIR:             } : !s32i
+// CIR:             %[[CMP:.*]] = cir.cast int_to_bool %[[VAARG_RESULT]] : !s32i -> !cir.bool
+// CIR:             cir.condition(%[[CMP]])
+// CIR:           } body {
+// CIR:             cir.yield
+// CIR:           } step {
+// CIR:             cir.yield
+// CIR:           }
+// CIR:           cir.return
+// CIR:         }
diff --git a/clang/test/CIR/Incubator/Lowering/variadics.cir b/clang/test/CIR/Incubator/Lowering/variadics.cir
new file mode 100644
index 0000000000000..44b67dea251e8
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/variadics.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=MLIR
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+!u8i = !cir.int<u, 8>
+
+!rec___va_list_tag = !cir.record<struct "__va_list_tag" {!u32i, !u32i, !cir.ptr<!u8i>, !cir.ptr<!u8i>} #cir.record.decl.ast>
+
+module {
+  cir.func @average(%arg0: !s32i, ...) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["count", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.array<!rec___va_list_tag x 1>, !cir.ptr<!cir.array<!rec___va_list_tag x 1>>, ["args"] {alignment = 16 : i64}
+    %3 = cir.alloca !cir.array<!rec___va_list_tag x 1>, !cir.ptr<!cir.array<!rec___va_list_tag x 1>>, ["args_copy"] {alignment = 16 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.cast array_to_ptrdecay %2 : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
+    cir.va.start %4 : !cir.ptr<!rec___va_list_tag>
+    //      MLIR: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.__va_list_tag", (i32, i32, ptr, ptr)>
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : !llvm.ptr to !llvm.ptr
+    // MLIR-NEXT: llvm.intr.vastart %{{[0-9]+}} : !llvm.ptr
+    %5 = cir.cast array_to_ptrdecay %3 : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
+    %6 = cir.cast array_to_ptrdecay %2 : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
+    cir.va.copy %6 to %5 : !cir.ptr<!rec___va_list_tag>, !cir.ptr<!rec___va_list_tag>
+    //      MLIR: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.__va_list_tag", (i32, i32, ptr, ptr)>
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.__va_list_tag", (i32, i32, ptr, ptr)>
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : !llvm.ptr to !llvm.ptr
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : !llvm.ptr to !llvm.ptr
+    // MLIR-NEXT: llvm.intr.vacopy %13 to %{{[0-9]+}} : !llvm.ptr, !llvm.ptr
+    %7 = cir.cast array_to_ptrdecay %2 : !cir.ptr<!cir.array<!rec___va_list_tag x 1>> -> !cir.ptr<!rec___va_list_tag>
+    cir.va.end %7 : !cir.ptr<!rec___va_list_tag>
+    //      MLIR: %{{[0-9]+}} = llvm.getelementptr %{{[0-9]+}}[0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"struct.__va_list_tag", (i32, i32, ptr, ptr)>
+    // MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : !llvm.ptr to !llvm.ptr
+    // MLIR-NEXT: llvm.intr.vaend %{{[0-9]+}} : !llvm.ptr
+    %8 = cir.const #cir.int<0> : !s32i
+    cir.store %8, %1 : !s32i, !cir.ptr<!s32i>
+    %9 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %9 : !s32i
+  }
+}
diff --git a/clang/test/CIR/Incubator/Lowering/vec-cmp.cir b/clang/test/CIR/Incubator/Lowering/vec-cmp.cir
new file mode 100644
index 0000000000000..cea94bd22edae
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/vec-cmp.cir
@@ -0,0 +1,16 @@
+// RUN: cir-opt %s -cir-to-llvm -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=MLIR
+
+!s16i = !cir.int<s, 16>
+!u16i = !cir.int<u, 16>
+
+cir.func @vec_cmp(%0: !cir.vector<!s16i x 16>, %1: !cir.vector<!s16i x 16>) -> () {
+  %2 = cir.vec.cmp(lt, %0, %1) : !cir.vector<!s16i x 16>, !cir.vector<!cir.int<u, 1> x 16> 
+  %3 = cir.cast bitcast %2 : !cir.vector<!cir.int<u, 1> x 16> -> !u16i
+  cir.return
+}
+    
+// MLIR: llvm.func @vec_cmp
+// MLIR-NEXT: %{{[0-9]+}} = llvm.icmp "slt" %arg0, %arg1 : vector<16xi16>
+// MLIR-NEXT: %{{[0-9]+}} = llvm.bitcast %{{[0-9]+}} : vector<16xi1> to i16
+// MLIR-NEXT: llvm.return
diff --git a/clang/test/CIR/Incubator/Lowering/vectype.cpp b/clang/test/CIR/Incubator/Lowering/vectype.cpp
new file mode 100644
index 0000000000000..eabac1c2fe92b
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/vectype.cpp
@@ -0,0 +1,349 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: cir-opt %t.cir -cir-to-llvm -o %t.mlir
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ii
+// RUN: FileCheck --input-file=%t.mlir %s
+
+typedef int vi4 __attribute__((vector_size(16)));
+typedef double vd2 __attribute__((vector_size(16)));
+typedef long long vll2 __attribute__((vector_size(16)));
+typedef unsigned short vus2 __attribute__((vector_size(4)));
+
+void vector_int_test(int x) {
+
+  // Vector constant.
+  vi4 a = { 1, 2, 3, 4 };
+  // CHECK: %[[#T42:]] = llvm.mlir.constant(dense<[1, 2, 3, 4]> : vector<4xi32>) : vector<4xi32>
+  // CHECK: llvm.store %[[#T42]], %[[#T3:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Non-const vector initialization.
+  vi4 b = { x, 5, 6, x + 1 };
+  // CHECK: %[[#T43:]] = llvm.load %[[#T1:]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T44:]] = llvm.mlir.constant(5 : i32) : i32
+  // CHECK: %[[#T45:]] = llvm.mlir.constant(6 : i32) : i32
+  // CHECK: %[[#T46:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T47:]] = llvm.mlir.constant(1 : i32) : i32
+  // CHECK: %[[#T48:]] = llvm.add %[[#T46]], %[[#T47]] overflow<nsw> : i32
+  // CHECK: %[[#T49:]] = llvm.mlir.poison : vector<4xi32>
+  // CHECK: %[[#T50:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#T51:]] = llvm.insertelement %[[#T43]], %[[#T49]][%[[#T50]] : i64] : vector<4xi32>
+  // CHECK: %[[#T52:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#T53:]] = llvm.insertelement %[[#T44]], %[[#T51]][%[[#T52]] : i64] : vector<4xi32>
+  // CHECK: %[[#T54:]] = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: %[[#T55:]] = llvm.insertelement %[[#T45]], %[[#T53]][%[[#T54]] : i64] : vector<4xi32>
+  // CHECK: %[[#T56:]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[#T57:]] = llvm.insertelement %[[#T48]], %[[#T55]][%[[#T56]] : i64] : vector<4xi32>
+  // CHECK: llvm.store %[[#T57]], %[[#T5:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Vector to vector conversion
+  vd2 bb = (vd2)b;
+  // CHECK: %[[#bval:]] = llvm.load %[[#bmem:]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#bbval:]] = llvm.bitcast %[[#bval]] : vector<4xi32> to vector<2xf64>
+  // CHECK: llvm.store %[[#bbval]], %[[#bbmem:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Scalar to vector conversion, a.k.a. vector splat.
+  b = a + 7;
+  // CHECK: %[[#poison:]] = llvm.mlir.poison : vector<4xi32>
+  // CHECK: %[[#zeroInt:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#inserted:]] = llvm.insertelement %[[#seven:]], %[[#poison]][%[[#zeroInt]] : i64] : vector<4xi32>
+  // CHECK: %[[#shuffled:]] = llvm.shufflevector %[[#inserted]], %[[#poison]] [0, 0, 0, 0] : vector<4xi32>
+
+  // Extract element.
+  int c = a[x];
+  // CHECK: %[[#T58:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T59:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T60:]] = llvm.extractelement %[[#T58]][%[[#T59]] : i32] : vector<4xi32>
+  // CHECK: llvm.store %[[#T60]], %[[#T7:]] {alignment = 4 : i64} : i32, !llvm.ptr
+
+  // Insert element.
+  a[x] = x;
+  // CHECK: %[[#T61:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T62:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T63:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T64:]] = llvm.insertelement %[[#T61]], %[[#T63]][%[[#T62]] : i32] : vector<4xi32>
+  // CHECK: llvm.store %[[#T64]], %[[#T3]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Compound assignment
+  a[x] += a[0];
+  // CHECK: %[[#LOADCA1:]] = llvm.load %{{[0-9]+}} {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#RHSCA:]] = llvm.extractelement %[[#LOADCA1:]][%{{[0-9]+}} : i32] : vector<4xi32>
+  // CHECK: %[[#LOADCAIDX2:]] = llvm.load %{{[0-9]+}} {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#LOADCAVEC3:]] = llvm.load %{{[0-9]+}} {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#LHSCA:]] = llvm.extractelement %[[#LOADCAVEC3:]][%[[#LOADCAIDX2:]] : i32] : vector<4xi32>
+  // CHECK: %[[#SUMCA:]] = llvm.add %[[#LHSCA:]], %[[#RHSCA:]] overflow<nsw> : i32
+  // CHECK: %[[#LOADCAVEC4:]] = llvm.load %{{[0-9]+}} {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#RESULTCAVEC:]] = llvm.insertelement %[[#SUMCA:]], %[[#LOADCAVEC4:]][%[[#LOADCAIDX2:]] : i32] : vector<4xi32>
+  // CHECK: llvm.store %[[#RESULTCAVEC:]], %{{[0-9]+}} {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Binary arithmetic operators.
+  vi4 d = a + b;
+  // CHECK: %[[#T65:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T66:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T67:]] = llvm.add %[[#T65]], %[[#T66]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T67]], %[[#T9:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 e = a - b;
+  // CHECK: %[[#T68:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T69:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T70:]] = llvm.sub %[[#T68]], %[[#T69]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T70]], %[[#T11:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 f = a * b;
+  // CHECK: %[[#T71:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T72:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T73:]] = llvm.mul %[[#T71]], %[[#T72]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T73]], %[[#T13:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 g = a / b;
+  // CHECK: %[[#T74:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T75:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T76:]] = llvm.sdiv %[[#T74]], %[[#T75]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T76]], %[[#T15:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 h = a % b;
+  // CHECK: %[[#T77:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T78:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T79:]] = llvm.srem %[[#T77]], %[[#T78]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T79]], %[[#T17:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 i = a & b;
+  // CHECK: %[[#T80:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T81:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T82:]] = llvm.and %[[#T80]], %[[#T81]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T82]], %[[#T19:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 j = a | b;
+  // CHECK: %[[#T83:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T84:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T85:]] = llvm.or %[[#T83]], %[[#T84]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T85]], %[[#T21:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 k = a ^ b;
+  // CHECK: %[[#T86:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T87:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T88:]] = llvm.xor %[[#T86]], %[[#T87]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T88]], %[[#T23:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Unary arithmetic operators.
+  vi4 l = +a;
+  // CHECK: %[[#T89:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: llvm.store %[[#T89]], %[[#T25:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 m = -a;
+  // CHECK: %[[#T90:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T91:]] = llvm.mlir.zero : vector<4xi32>
+  // CHECK: %[[#T92:]] = llvm.sub %[[#T91]], %[[#T90]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T92]], %[[#T27:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 n = ~a;
+  // CHECK: %[[#T93:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T94:]] = llvm.mlir.constant(-1 : i32) : i32
+  // CHECK: %[[#T95:]] = llvm.mlir.undef : vector<4xi32>
+  // CHECK: %[[#T96:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#T97:]] = llvm.insertelement %[[#T94]], %[[#T95]][%[[#T96]] : i64] : vector<4xi32>
+  // CHECK: %[[#T98:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#T99:]] = llvm.insertelement %[[#T94]], %[[#T97]][%[[#T98]] : i64] : vector<4xi32>
+  // CHECK: %[[#T100:]] = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: %[[#T101:]] = llvm.insertelement %[[#T94]], %[[#T99]][%[[#T100]] : i64] : vector<4xi32>
+  // CHECK: %[[#T102:]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[#T103:]] = llvm.insertelement %[[#T94]], %[[#T101]][%[[#T102]] : i64] : vector<4xi32>
+  // CHECK: %[[#T104:]] = llvm.xor %[[#T93]], %[[#T103]]  : vector<4xi32>
+  // CHECK: llvm.store %[[#T104]], %[[#T29:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Ternary conditional operator
+  vi4 tc = a ? b : d;
+  // CHECK: %[[#Zero:]] = llvm.mlir.zero : vector<4xi32>
+  // CHECK: %[[#BitVec:]] = llvm.icmp "ne" %[[#A:]], %[[#Zero]] : vector<4xi32>
+  // CHECK: %[[#Res:]] = llvm.select %[[#BitVec]], %[[#B:]], %[[#D:]] : vector<4xi1>, vector<4xi32>
+
+  // Comparisons
+  vi4 o = a == b;
+  // CHECK: %[[#T105:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T106:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T107:]] = llvm.icmp "eq" %[[#T105]], %[[#T106]] : vector<4xi32>
+  // CHECK: %[[#T108:]] = llvm.sext %[[#T107]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T108]], %[[#To:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 p = a != b;
+  // CHECK: %[[#T109:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T110:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T111:]] = llvm.icmp "ne" %[[#T109]], %[[#T110]] : vector<4xi32>
+  // CHECK: %[[#T112:]] = llvm.sext %[[#T111]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T112]], %[[#Tp:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 q = a < b;
+  // CHECK: %[[#T113:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T114:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T115:]] = llvm.icmp "slt" %[[#T113]], %[[#T114]] : vector<4xi32>
+  // CHECK: %[[#T116:]] = llvm.sext %[[#T115]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T116]], %[[#Tq:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 r = a > b;
+  // CHECK: %[[#T117:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T118:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T119:]] = llvm.icmp "sgt" %[[#T117]], %[[#T118]] : vector<4xi32>
+  // CHECK: %[[#T120:]] = llvm.sext %[[#T119]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T120]], %[[#Tr:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 s = a <= b;
+  // CHECK: %[[#T121:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T122:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T123:]] = llvm.icmp "sle" %[[#T121]], %[[#T122]] : vector<4xi32>
+  // CHECK: %[[#T124:]] = llvm.sext %[[#T123]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T124]], %[[#Ts:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+  vi4 t = a >= b;
+  // CHECK: %[[#T125:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T126:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T127:]] = llvm.icmp "sge" %[[#T125]], %[[#T126]] : vector<4xi32>
+  // CHECK: %[[#T128:]] = llvm.sext %[[#T127]] : vector<4xi1> to vector<4xi32>
+  // CHECK: llvm.store %[[#T128]], %[[#Tt:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // __builtin_shufflevector
+  vi4 u = __builtin_shufflevector(a, b, 7, 5, 3, 1);
+  // CHECK: %[[#Tu:]] = llvm.shufflevector %[[#bsva:]], %[[#bsvb:]] [7, 5, 3, 1] : vector<4xi32>
+  vi4 v = __builtin_shufflevector(a, b);
+  // CHECK: %[[#sv_a:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#sv_b:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#sv0:]] = llvm.mlir.constant(3 : i32) : i32
+  // CHECK: %[[#sv1:]] = llvm.mlir.undef : vector<4xi32>
+  // CHECK: %[[#sv2:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#sv3:]] = llvm.insertelement %[[#sv0]], %[[#sv1]][%[[#sv2]] : i64] : vector<4xi32>
+  // CHECK: %[[#sv4:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#sv5:]] = llvm.insertelement %[[#sv0]], %[[#sv3]][%[[#sv4]] : i64] : vector<4xi32>
+  // CHECK: %[[#sv6:]] = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: %[[#sv7:]] = llvm.insertelement %[[#sv0]], %[[#sv5]][%[[#sv6]] : i64] : vector<4xi32>
+  // CHECK: %[[#sv8:]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[#sv9:]] = llvm.insertelement %[[#sv0]], %[[#sv7]][%[[#sv8]] : i64] : vector<4xi32>
+  // CHECK: %[[#svA:]] = llvm.and %[[#sv_b]], %[[#sv9]]  : vector<4xi32>
+  // CHECK: %[[#svB:]] = llvm.mlir.undef : vector<4xi32>
+  // CHECK: %[[#svC:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#svD:]] = llvm.extractelement %[[#svA]][%[[#svC]] : i64] : vector<4xi32>
+  // CHECK: %[[#svE:]] = llvm.extractelement %[[#sv_a]][%[[#svD]] : i32] : vector<4xi32>
+  // CHECK: %[[#svF:]] = llvm.insertelement %[[#svE]], %[[#svB]][%[[#svC]] : i64] : vector<4xi32>
+  // CHECK: %[[#svG:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#svH:]] = llvm.extractelement %[[#svA]][%[[#svG]] : i64] : vector<4xi32>
+  // CHECK: %[[#svI:]] = llvm.extractelement %[[#sv_a]][%[[#svH]] : i32] : vector<4xi32>
+  // CHECK: %[[#svJ:]] = llvm.insertelement %[[#svI]], %[[#svF]][%[[#svG]] : i64] : vector<4xi32>
+  // CHECK: %[[#svK:]] = llvm.mlir.constant(2 : i64) : i64
+  // CHECK: %[[#svL:]] = llvm.extractelement %[[#svA]][%[[#svK]] : i64] : vector<4xi32>
+  // CHECK: %[[#svM:]] = llvm.extractelement %[[#sv_a]][%[[#svL]] : i32] : vector<4xi32>
+  // CHECK: %[[#svN:]] = llvm.insertelement %[[#svM]], %[[#svJ]][%[[#svK]] : i64] : vector<4xi32>
+  // CHECK: %[[#svO:]] = llvm.mlir.constant(3 : i64) : i64
+  // CHECK: %[[#svP:]] = llvm.extractelement %[[#svA]][%[[#svO]] : i64] : vector<4xi32>
+  // CHECK: %[[#svQ:]] = llvm.extractelement %[[#sv_a]][%[[#svP:]] : i32] : vector<4xi32>
+  // CHECK: %[[#svR:]] = llvm.insertelement %[[#svQ]], %[[#svN]][%[[#svO]] : i64] : vector<4xi32>
+  // CHECK: llvm.store %[[#svR]], %[[#sv_v:]] {alignment = 16 : i64} : vector<4xi32>, !llvm.ptr
+
+  // Shifts
+  vi4 w = a << b;
+  // CHECK: %[[#T198:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T199:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %{{[0-9]+}}  = llvm.shl %[[#T198]], %[[#T199]] : vector<4xi32>
+  vi4 y = a >> b;
+  // CHECK: %[[#T201:]] = llvm.load %[[#T3]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %[[#T202:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<4xi32>
+  // CHECK: %{{[0-9]+}}  = llvm.ashr %[[#T201]], %[[#T202]] : vector<4xi32>
+
+  vus2 z = { (unsigned short)x, (unsigned short)x };  
+  vus2 zamt = { 3, 4 };
+  // CHECK: %[[#T219:]] = llvm.mlir.constant(dense<[3, 4]> : vector<2xi16>) : vector<2xi16>
+  // CHECK: llvm.store %[[#T219]], %[[#AMT_SAVE:]] {alignment = 4 : i64} : vector<2xi16>
+  // CHECK: %[[#T221:]] = llvm.load %[[#AMT_SAVE]] {alignment = 4 : i64} : !llvm.ptr -> vector<2xi16>
+  vus2 zzz = z >> zamt;
+  // CHECK: %{{[0-9]+}}  = llvm.lshr %{{[0-9]+}}, %[[#T221]] : vector<2xi16>
+}
+
+void vector_double_test(int x, double y) {
+
+  // Vector constant.
+  vd2 a = { 1.5, 2.5 };
+  // CHECK: %[[#T28:]] = llvm.mlir.constant(dense<[1.500000e+00, 2.500000e+00]> : vector<2xf64>) : vector<2xf64>
+  // CHECK: llvm.store %[[#T28]], %[[#T5:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Non-const vector initialization.
+  vd2 b = { y, y + 1.0 };
+  // CHECK: %[[#T29:]] = llvm.load %[[#T3:]] {alignment = 8 : i64} : !llvm.ptr -> f64
+  // CHECK: %[[#T30:]] = llvm.load %[[#T3]] {alignment = 8 : i64} : !llvm.ptr -> f64
+  // CHECK: %[[#T31:]] = llvm.mlir.constant(1.000000e+00 : f64) : f64
+  // CHECK: %[[#T32:]] = llvm.fadd %[[#T30]], %[[#T31]]  : f64
+  // CHECK: %[[#T33:]] = llvm.mlir.poison : vector<2xf64>
+  // CHECK: %[[#T34:]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[#T35:]] = llvm.insertelement %[[#T29]], %[[#T33]][%[[#T34]] : i64] : vector<2xf64>
+  // CHECK: %[[#T36:]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[#T37:]] = llvm.insertelement %[[#T32]], %[[#T35]][%[[#T36]] : i64] : vector<2xf64>
+  // CHECK: llvm.store %[[#T37]], %[[#T7:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Extract element.
+  double c = a[x];
+  // CHECK: %[[#T38:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T39:]] = llvm.load %[[#T1]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T40:]] = llvm.extractelement %[[#T38]][%[[#T39]] : i32] : vector<2xf64>
+  // CHECK: llvm.store %[[#T40]], %[[#T9:]] {alignment = 8 : i64} : f64, !llvm.ptr
+
+  // Insert element.
+  a[x] = y;
+  // CHECK: %[[#T41:]] = llvm.load %[[#T3]] {alignment = 8 : i64} : !llvm.ptr -> f64
+  // CHECK: %[[#T42:]] = llvm.load %[[#T1:]] {alignment = 4 : i64} : !llvm.ptr -> i32
+  // CHECK: %[[#T43:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T44:]] = llvm.insertelement %[[#T41]], %[[#T43]][%[[#T42]] : i32] : vector<2xf64>
+  // CHECK: llvm.store %[[#T44]], %[[#T5]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Binary arithmetic operators.
+  vd2 d = a + b;
+  // CHECK: %[[#T45:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T46:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T47:]] = llvm.fadd %[[#T45]], %[[#T46]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T47]], %[[#T11:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+  vd2 e = a - b;
+  // CHECK: %[[#T48:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T49:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T50:]] = llvm.fsub %[[#T48]], %[[#T49]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T50]], %[[#T13:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+  vd2 f = a * b;
+  // CHECK: %[[#T51:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T52:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T53:]] = llvm.fmul %[[#T51]], %[[#T52]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T53]], %[[#T15:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+  vd2 g = a / b;
+  // CHECK: %[[#T54:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T55:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T56:]] = llvm.fdiv %[[#T54]], %[[#T55]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T56]], %[[#T17:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Unary arithmetic operators.
+  vd2 l = +a;
+  // CHECK: %[[#T57:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: llvm.store %[[#T57]], %[[#T19:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+  vd2 m = -a;
+  // CHECK: %[[#T58:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T59:]] = llvm.fneg %[[#T58]]  : vector<2xf64>
+  // CHECK: llvm.store %[[#T59]], %[[#T21:]] {alignment = 16 : i64} : vector<2xf64>, !llvm.ptr
+
+  // Comparisons
+  vll2 o = a == b;
+  // CHECK: %[[#T60:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T61:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T62:]] = llvm.fcmp "oeq" %[[#T60]], %[[#T61]] : vector<2xf64>
+  // CHECK: %[[#T63:]] = llvm.sext %[[#T62]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T63]], %[[#To:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 p = a != b;
+  // CHECK: %[[#T64:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T65:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T66:]] = llvm.fcmp "une" %[[#T64]], %[[#T65]] : vector<2xf64>
+  // CHECK: %[[#T67:]] = llvm.sext %[[#T66]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T67]], %[[#Tp:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 q = a < b;
+  // CHECK: %[[#T68:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T69:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T70:]] = llvm.fcmp "olt" %[[#T68]], %[[#T69]] : vector<2xf64>
+  // CHECK: %[[#T71:]] = llvm.sext %[[#T70]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T71]], %[[#Tq:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 r = a > b;
+  // CHECK: %[[#T72:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T73:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T74:]] = llvm.fcmp "ogt" %[[#T72]], %[[#T73]] : vector<2xf64>
+  // CHECK: %[[#T75:]] = llvm.sext %[[#T74]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T75]], %[[#Tr:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 s = a <= b;
+  // CHECK: %[[#T76:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T77:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T78:]] = llvm.fcmp "ole" %[[#T76]], %[[#T77]] : vector<2xf64>
+  // CHECK: %[[#T79:]] = llvm.sext %[[#T78]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T79]], %[[#Ts:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+  vll2 t = a >= b;
+  // CHECK: %[[#T80:]] = llvm.load %[[#T5]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T81:]] = llvm.load %[[#T7]] {alignment = 16 : i64} : !llvm.ptr -> vector<2xf64>
+  // CHECK: %[[#T82:]] = llvm.fcmp "oge" %[[#T80]], %[[#T81]] : vector<2xf64>
+  // CHECK: %[[#T83:]] = llvm.sext %[[#T82]] : vector<2xi1> to vector<2xi64>
+  // CHECK: llvm.store %[[#T83]], %[[#Tt:]] {alignment = 16 : i64} : vector<2xi64>, !llvm.ptr
+
+  // __builtin_convertvector
+  vus2 w = __builtin_convertvector(a, vus2);
+  // CHECK: %[[#cv0:]] = llvm.fptoui %[[#cv1:]] : vector<2xf64> to vector<2xi16>
+}
diff --git a/clang/test/CIR/Incubator/Lowering/vtable-thunk.cpp b/clang/test/CIR/Incubator/Lowering/vtable-thunk.cpp
new file mode 100644
index 0000000000000..176d75a36ce00
--- /dev/null
+++ b/clang/test/CIR/Incubator/Lowering/vtable-thunk.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ogcg.ll %s
+
+// Test that thunks lower correctly from CIR to LLVM IR and match OGCG output
+
+class Base1 {
+public:
+  virtual void foo() {}
+  int x;
+};
+
+class Base2 {
+public:
+  virtual void bar() {}
+  int y;
+};
+
+class Derived : public Base1, public Base2 {
+public:
+  void bar() override {}
+};
+
+void test() {
+  Derived d;
+  Base2* b2 = &d;
+  b2->bar();
+}
+
+// ============================================================================
+// VTable Structure Validation
+// ============================================================================
+
+// Check vtable contains thunk with correct offset (16 bytes on x86_64)
+// Both CIR and OGCG should produce identical vtable structure
+// LLVM: @_ZTV7Derived = linkonce_odr constant { [4 x ptr], [3 x ptr] }
+// LLVM-SAME: @_ZThn16_N7Derived3barEv
+
+// OGCG: @_ZTV7Derived = linkonce_odr {{.*}} constant { [4 x ptr], [3 x ptr] }
+// OGCG-SAME: @_ZThn16_N7Derived3barEv
+
+// ============================================================================
+// Thunk Implementation - This Pointer Adjustment
+// ============================================================================
+
+// CIR lowering should produce the same pointer adjustment as OGCG
+// LLVM-LABEL: define linkonce_odr void @_ZThn16_N7Derived3barEv
+// LLVM: %[[VAR1:[0-9]+]] = getelementptr i8, ptr %{{[0-9]+}}, i64 -16
+// LLVM: call void @_ZN7Derived3barEv(ptr %[[VAR1]])
+
+// OGCG-LABEL: define linkonce_odr void @_ZThn16_N7Derived3barEv
+// OGCG: %[[VAR2:[0-9]+]] = getelementptr inbounds i8, ptr %{{.*}}, i64 -16
+// OGCG: call void @_ZN7Derived3barEv(ptr {{.*}} %[[VAR2]])
+
diff --git a/clang/test/CIR/Incubator/Tools/cir-translate/cir-translate-triple.cir b/clang/test/CIR/Incubator/Tools/cir-translate/cir-translate-triple.cir
new file mode 100644
index 0000000000000..fa653ef3de25f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Tools/cir-translate/cir-translate-triple.cir
@@ -0,0 +1,11 @@
+// RUN: cir-translate --cir-to-llvmir --target x86_64-unknown-linux-gnu --disable-cc-lowering %s -o %t.ll
+// RUN: FileCheck %s -input-file %t.ll -check-prefix=LLVM
+
+module {
+  cir.func @foo() {
+    cir.return
+  }
+}
+
+// LLVM-DAG: target triple = "x86_64-unknown-linux-gnu"
+// LLVM-DAG: target datalayout = "{{.*}}"
diff --git a/clang/test/CIR/Incubator/Tools/cir-translate/has-triple-and-data-layout.cir b/clang/test/CIR/Incubator/Tools/cir-translate/has-triple-and-data-layout.cir
new file mode 100644
index 0000000000000..81da113f1d648
--- /dev/null
+++ b/clang/test/CIR/Incubator/Tools/cir-translate/has-triple-and-data-layout.cir
@@ -0,0 +1,24 @@
+// RUN: cir-translate --cir-to-llvmir --target x86_64-unknown-linux-gnu --disable-cc-lowering %s -o %t.x86.ll
+// RUN: FileCheck %s -input-file %t.x86.ll -check-prefix=X86
+// RUN: cir-translate --cir-to-llvmir --target spirv64-unknown-unknown --disable-cc-lowering %s -o %t.spirv64.ll
+// RUN: FileCheck %s -input-file %t.spirv64.ll -check-prefix=SPIRV64
+// RUN: cir-translate --cir-to-llvmir --disable-cc-lowering %s -o %t.default.ll
+// RUN: FileCheck %s -input-file %t.default.ll -check-prefix=DEFAULT
+
+module attributes {
+  cir.triple = "spirv64-unknown-unknown",
+  dlti.dl_spec = #dlti.dl_spec<"dlti.global_memory_space" = 7 : ui64>
+} {
+  cir.func @foo() {
+    cir.return
+  }
+}
+
+// X86-NOT: target datalayout = "G7"
+// X86-DAG: target triple = "x86_64-unknown-linux-gnu"
+
+// SPIRV64-NOT: target datalayout = "G7"
+// SPIRV64-DAG: target triple = "spirv64-unknown-unknown"
+
+// DEFAULT-DAG: target datalayout = "G7"
+// DEFAULT-DAG: target triple = "spirv64-unknown-unknown"
diff --git a/clang/test/CIR/Incubator/Tools/cir-translate/has-triple-no-data-layout.cir b/clang/test/CIR/Incubator/Tools/cir-translate/has-triple-no-data-layout.cir
new file mode 100644
index 0000000000000..34c543362bed1
--- /dev/null
+++ b/clang/test/CIR/Incubator/Tools/cir-translate/has-triple-no-data-layout.cir
@@ -0,0 +1,23 @@
+// RUN: cir-translate --cir-to-llvmir --target x86_64-unknown-linux-gnu --disable-cc-lowering %s -o %t.x86.ll
+// RUN: FileCheck %s -input-file %t.x86.ll -check-prefix=X86
+// RUN: cir-translate --cir-to-llvmir --target spirv64-unknown-unknown --disable-cc-lowering %s -o %t.spirv64.ll
+// RUN: FileCheck %s -input-file %t.spirv64.ll -check-prefix=SPIRV64
+// RUN: cir-translate --cir-to-llvmir --disable-cc-lowering %s -o %t.default.ll
+// RUN: FileCheck %s -input-file %t.default.ll -check-prefix=DEFAULT
+
+module attributes {
+  cir.triple = "spirv64-unknown-unknown"
+} {
+  cir.func @foo() {
+    cir.return
+  }
+}
+
+// X86-DAG: target triple = "x86_64-unknown-linux-gnu"
+// X86-DAG: target datalayout = "{{.*}}"
+
+// SPIRV64-DAG: target triple = "spirv64-unknown-unknown"
+// SPIRV64-DAG: target datalayout = "{{.*}}"
+
+// DEFAULT-DAG: target triple = "spirv64-unknown-unknown"
+// DEFAULT-DAG: target datalayout = "{{.*}}"
diff --git a/clang/test/CIR/Incubator/Tools/cir-translate/invalid-translate-triple.cir b/clang/test/CIR/Incubator/Tools/cir-translate/invalid-translate-triple.cir
new file mode 100644
index 0000000000000..07bd766a37875
--- /dev/null
+++ b/clang/test/CIR/Incubator/Tools/cir-translate/invalid-translate-triple.cir
@@ -0,0 +1,8 @@
+// RUN: cir-translate -verify-diagnostics --cir-to-llvmir --target foobar --disable-cc-lowering %s 2>&1
+
+// expected-error at below {{invalid target triple 'foobar'}}
+module {
+  cir.func @foo() {
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Tools/cir-translate/no-triple-has-data-layout.cir b/clang/test/CIR/Incubator/Tools/cir-translate/no-triple-has-data-layout.cir
new file mode 100644
index 0000000000000..f2853941271fa
--- /dev/null
+++ b/clang/test/CIR/Incubator/Tools/cir-translate/no-triple-has-data-layout.cir
@@ -0,0 +1,23 @@
+// RUN: cir-translate --cir-to-llvmir --target x86_64-unknown-linux-gnu --disable-cc-lowering %s -o %t.x86.ll
+// RUN: FileCheck %s -input-file %t.x86.ll -check-prefix=X86
+// RUN: cir-translate --cir-to-llvmir --target spirv64-unknown-unknown --disable-cc-lowering %s -o %t.spirv64.ll
+// RUN: FileCheck %s -input-file %t.spirv64.ll -check-prefix=SPIRV64
+// RUN: cir-translate --cir-to-llvmir --disable-cc-lowering %s -o %t.default.ll
+// RUN: FileCheck %s -input-file %t.default.ll -check-prefix=DEFAULT
+
+module attributes {
+  dlti.dl_spec = #dlti.dl_spec<"dlti.global_memory_space" = 7 : ui64>
+} {
+  cir.func @foo() {
+    cir.return
+  }
+}
+
+// X86-NOT: target datalayout = "G7"
+// X86-DAG: target triple = "x86_64-unknown-linux-gnu"
+
+// SPIRV64-NOT: target datalayout = "G7"
+// SPIRV64-DAG: target triple = "spirv64-unknown-unknown"
+
+// DEFAULT-NOT: target datalayout = "G7"
+// DEFAULT-DAG: target triple = "x86_64-unknown-linux-gnu"
diff --git a/clang/test/CIR/Incubator/Tools/cir-translate/no-triple-no-data-layout.cir b/clang/test/CIR/Incubator/Tools/cir-translate/no-triple-no-data-layout.cir
new file mode 100644
index 0000000000000..f18f69dd876d0
--- /dev/null
+++ b/clang/test/CIR/Incubator/Tools/cir-translate/no-triple-no-data-layout.cir
@@ -0,0 +1,21 @@
+// RUN: cir-translate --cir-to-llvmir --target x86_64-unknown-linux-gnu --disable-cc-lowering %s -o %t.x86.ll
+// RUN: FileCheck %s -input-file %t.x86.ll -check-prefix=X86
+// RUN: cir-translate --cir-to-llvmir --target spirv64-unknown-unknown --disable-cc-lowering %s -o %t.spirv64.ll
+// RUN: FileCheck %s -input-file %t.spirv64.ll -check-prefix=SPIRV64
+// RUN: cir-translate --cir-to-llvmir --disable-cc-lowering %s -o %t.default.ll
+// RUN: FileCheck %s -input-file %t.default.ll -check-prefix=DEFAULT
+
+module {
+  cir.func @foo() {
+    cir.return
+  }
+}
+
+// X86-DAG: target triple = "x86_64-unknown-linux-gnu"
+// X86-DAG: target datalayout = "{{.*}}"
+
+// SPIRV64-DAG: target triple = "spirv64-unknown-unknown"
+// SPIRV64-DAG: target datalayout = "{{.*}}"
+
+// DEFAULT-DAG: target triple = "x86_64-unknown-linux-gnu"
+// DEFAULT-DAG: target datalayout = "{{.*}}"
diff --git a/clang/test/CIR/Incubator/Tools/cir-translate/warn-default-triple.cir b/clang/test/CIR/Incubator/Tools/cir-translate/warn-default-triple.cir
new file mode 100644
index 0000000000000..519e96598d432
--- /dev/null
+++ b/clang/test/CIR/Incubator/Tools/cir-translate/warn-default-triple.cir
@@ -0,0 +1,8 @@
+// RUN: cir-translate -verify-diagnostics --cir-to-llvmir --disable-cc-lowering %s
+
+// expected-warning at below {{no target triple provided, assuming x86_64-unknown-linux-gnu}}
+module {
+  cir.func @foo() {
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/ABILowering/cast.cir b/clang/test/CIR/Incubator/Transforms/ABILowering/cast.cir
new file mode 100644
index 0000000000000..fba20b4428ed7
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/ABILowering/cast.cir
@@ -0,0 +1,53 @@
+// RUN: cir-opt --cir-abi-lowering -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!S1 = !cir.record<struct "S1" {!s32i, !s32i, !s32i}>
+!S2 = !cir.record<struct "S2" {!s32i, !s32i, !s32i}>
+!Field1 = !cir.data_member<!s32i in !S1>
+!Field2 = !cir.data_member<!s32i in !S2>
+!Method1 = !cir.method<!cir.func<(!s32i)> in !S1>
+!Method2 = !cir.method<!cir.func<(!s32i)> in !S2>
+
+module attributes {
+  cir.triple = "x86_64-unknown-linux-gnu",
+  dlti.dl_spec = #dlti.dl_spec<i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">
+} {
+  cir.func @bitcast_data_member(%arg0 : !Field1) -> !Field2 {
+    %0 = cir.cast bitcast %arg0: !Field1 -> !Field2
+    cir.return %0 : !Field2
+  }
+  // CHECK:      @bitcast_data_member(%[[ARG:.+]]: !s64i) -> !s64i
+  // CHECK-NEXT:   cir.return %[[ARG]] : !s64i
+  // CHECK-NEXT: }
+
+  cir.func @bitcast_method(%arg0 : !Method1) -> !Method2 {
+    %0 = cir.cast bitcast %arg0: !Method1 -> !Method2
+    cir.return %0 : !Method2
+  }
+  // CHECK:      @bitcast_method(%[[ARG:.+]]: ![[ABI_TY:.+]]) -> ![[ABI_TY]]
+  // CHECK-NEXT:   cir.return %[[ARG]] : ![[ABI_TY]]
+  // CHECK-NEXT: }
+
+  cir.func @data_member_to_bool(%arg0 : !Field1) -> !cir.bool {
+    %0 = cir.cast member_ptr_to_bool %arg0: !Field1 -> !cir.bool
+    cir.return %0 : !cir.bool
+  }
+  // CHECK:      @data_member_to_bool(%[[ARG:.+]]: !s64i) -> !cir.bool
+  // CHECK-NEXT:   %[[NULL:.+]] = cir.const #cir.int<-1> : !s64i
+  // CHECK-NEXT:   %[[RES:.+]] = cir.cmp(ne, %[[ARG]], %[[NULL]]) : !s64i, !cir.bool
+  // CHECK-NEXT:   cir.return %[[RES]] : !cir.bool
+  // CHECK-NEXT: }
+
+  cir.func @method_to_bool(%arg0 : !Method1) -> !cir.bool {
+    %0 = cir.cast member_ptr_to_bool %arg0: !Method1 -> !cir.bool
+    cir.return %0 : !cir.bool
+  }
+  // CHECK:      @method_to_bool(%[[ARG:.+]]: ![[ABI_TY:.+]]) -> !cir.bool
+  // CHECK-NEXT:   %[[NULL:.+]] = cir.const #cir.int<0> : !s64i
+  // CHECK-NEXT:   %[[PTR:.+]] = cir.extract_member %[[ARG]][0] : ![[ABI_TY]] -> !s64i
+  // CHECK-NEXT:   %[[RES:.+]] = cir.cmp(ne, %[[PTR]], %[[NULL]]) : !s64i, !cir.bool
+  // CHECK-NEXT:   cir.return %[[RES]] : !cir.bool
+  // CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/ABILowering/cmp.cir b/clang/test/CIR/Incubator/Transforms/ABILowering/cmp.cir
new file mode 100644
index 0000000000000..6aa999aada955
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/ABILowering/cmp.cir
@@ -0,0 +1,41 @@
+// RUN: cir-opt --cir-abi-lowering -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!s32i = !cir.int<s, 32>
+!S = !cir.record<struct "S" {!s32i, !s32i, !s32i}>
+!Field = !cir.data_member<!s32i in !S>
+!Method = !cir.method<!cir.func<(!s32i)> in !S>
+
+module attributes {
+  cir.triple = "x86_64-unknown-linux-gnu",
+  dlti.dl_spec = #dlti.dl_spec<i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">
+} {
+  cir.func @cmp_data_member(%arg0: !Field, %arg1: !Field) -> !cir.bool {
+    %0 = cir.cmp(eq, %arg0, %arg1) : !Field, !cir.bool
+    cir.return %0 : !cir.bool
+  }
+  // CHECK:      @cmp_data_member(%[[ARG0:.+]]: !s64i, %[[ARG1:.+]]: !s64i) -> !cir.bool
+  // CHECK-NEXT:   %[[RES:.+]] = cir.cmp(eq, %[[ARG0]], %[[ARG1]]) : !s64i, !cir.bool
+  // CHECK-NEXT:   cir.return %[[RES]] : !cir.bool
+  // CHECK-NEXT: }
+
+  cir.func @cmp_method(%arg0: !Method, %arg1: !Method) -> !cir.bool {
+    %0 = cir.cmp(eq, %arg0, %arg1) : !Method, !cir.bool
+    cir.return %0 : !cir.bool
+  }
+  // CHECK:      @cmp_method(%[[ARG0:.+]]: ![[ABI_TY:.+]], %[[ARG1:.+]]: ![[ABI_TY]]) -> !cir.bool
+  // CHECK-NEXT:   %[[ZERO:.+]] = cir.const #cir.int<0> : !s64i
+  // CHECK-NEXT:   %[[ARG0_PTR:.+]] = cir.extract_member %[[ARG0]][0] : ![[ABI_TY]] -> !s64i
+  // CHECK-NEXT:   %[[ARG1_PTR:.+]] = cir.extract_member %[[ARG1]][0] : ![[ABI_TY]] -> !s64i
+  // CHECK-NEXT:   %[[PTR_EQ:.+]] = cir.cmp(eq, %[[ARG0_PTR]], %[[ARG1_PTR]]) : !s64i, !cir.bool
+  // CHECK-NEXT:   %[[ARG0_PTR_NULL:.+]] = cir.cmp(eq, %[[ARG0_PTR]], %[[ZERO]]) : !s64i, !cir.bool
+  // CHECK-NEXT:   %[[ARG0_OFFSET:.+]] = cir.extract_member %[[ARG0]][1] : ![[ABI_TY]] -> !s64i
+  // CHECK-NEXT:   %[[ARG1_OFFSET:.+]] = cir.extract_member %[[ARG1]][1] : ![[ABI_TY]] -> !s64i
+  // CHECK-NEXT:   %[[OFFSET_EQ:.+]] = cir.cmp(eq, %[[ARG0_OFFSET]], %[[ARG1_OFFSET]]) : !s64i, !cir.bool
+  // CHECK-NEXT:   %[[TRUE:.+]] = cir.const #true
+  // CHECK-NEXT:   %[[FALSE:.+]] = cir.const #false
+  // CHECK-NEXT:   %[[X:.+]] = cir.select if %[[ARG0_PTR_NULL]] then %[[TRUE]] else %[[OFFSET_EQ]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+  // CHECK-NEXT:   %[[RES:.+]] = cir.select if %[[X]] then %[[PTR_EQ]] else %[[FALSE]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+  // CHECK-NEXT:   cir.return %[[RES]] : !cir.bool
+  // CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/ABILowering/const.cir b/clang/test/CIR/Incubator/Transforms/ABILowering/const.cir
new file mode 100644
index 0000000000000..62fc40682c93d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/ABILowering/const.cir
@@ -0,0 +1,40 @@
+// RUN: cir-opt --cir-abi-lowering -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!s32i = !cir.int<s, 32>
+!S = !cir.record<struct "S" {!s32i, !s32i, !s32i}>
+!Field = !cir.data_member<!s32i in !S>
+!Method = !cir.method<!cir.func<(!s32i)> in !S>
+
+module attributes {
+  cir.triple = "x86_64-unknown-linux-gnu",
+  dlti.dl_spec = #dlti.dl_spec<i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">
+} {
+  cir.func @const_data_member() -> !Field {
+    %0 = cir.const #cir.data_member<1> : !Field
+    cir.return %0 : !Field
+  }
+  // CHECK:      @const_data_member() -> !s64i
+  // CHECK-NEXT:   %[[RES:.+]] = cir.const #cir.int<4> : !s64i
+  // CHECK-NEXT:   cir.return %[[RES]] : !s64i
+  // CHECK-NEXT: }
+
+  cir.func private @f(%arg0: !cir.ptr<!S>, %arg1: !s32i)
+  cir.func @const_method_nonvirtual() -> !Method {
+    %0 = cir.const #cir.method<@f> : !Method
+    cir.return %0 : !Method
+  }
+  // CHECK:      @const_method_nonvirtual() -> ![[ABI_TY:.+]] {
+  // CHECK-NEXT:   %[[RES:.+]] = cir.const #cir.const_record<{#cir.global_view<@f> : !s64i, #cir.int<0> : !s64i}> : ![[ABI_TY]]
+  // CHECK-NEXT:   cir.return %[[RES]] : ![[ABI_TY]]
+  // CHECK-NEXT: }
+
+  cir.func @const_method_virtual() -> !Method {
+    %0 = cir.const #cir.method<vtable_offset = 8> : !Method
+    cir.return %0 : !Method
+  }
+  // CHECK:      cir.func @const_method_virtual() -> ![[ABI_TY:.+]] {
+  // CHECK-NEXT:   %[[RES:.+]] = cir.const #cir.const_record<{#cir.int<9> : !s64i, #cir.int<0> : !s64i}> : ![[ABI_TY]]
+  // CHECK-NEXT:   cir.return %[[RES]] : ![[ABI_TY]]
+  // CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/ABILowering/func.cir b/clang/test/CIR/Incubator/Transforms/ABILowering/func.cir
new file mode 100644
index 0000000000000..c8482024ceab3
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/ABILowering/func.cir
@@ -0,0 +1,28 @@
+// RUN: cir-opt --cir-abi-lowering -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!s32i = !cir.int<s, 32>
+!S = !cir.record<struct "S" {!s32i, !s32i, !s32i}>
+!Field = !cir.data_member<!s32i in !S>
+!Method = !cir.method<!cir.func<(!s32i)> in !S>
+
+module attributes {
+  cir.triple = "x86_64-unknown-linux-gnu",
+  dlti.dl_spec = #dlti.dl_spec<i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">
+} {
+  cir.func @foo(%arg0 : !Field, %arg1: !Method) -> !Field {
+    cir.return %arg0 : !Field
+  }
+  // CHECK:      @foo(%[[ARG0:.+]]: !s64i, %[[ARG1:.+]]: ![[METHOD_ABI_TY:.+]]) -> !s64i
+  // CHECK-NEXT:   cir.return %[[ARG0]] : !s64i
+  // CHECK-NEXT: }
+
+  cir.func @no_args() -> !Field {
+    %0 = cir.const #cir.data_member<1> : !Field
+    cir.return %0 : !Field
+  }
+  // CHECK:      @no_args() -> !s64i
+  // CHECK-NEXT:   %[[RES:.+]] = cir.const #cir.int<4> : !s64i
+  // CHECK-NEXT:   cir.return %[[RES]] : !s64i
+  // CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/ABILowering/global.cir b/clang/test/CIR/Incubator/Transforms/ABILowering/global.cir
new file mode 100644
index 0000000000000..6cf56057eb7c2
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/ABILowering/global.cir
@@ -0,0 +1,22 @@
+// RUN: cir-opt --cir-abi-lowering -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!s32i = !cir.int<s, 32>
+!S = !cir.record<struct "S" {!s32i, !s32i, !s32i}>
+!Field = !cir.data_member<!s32i in !S>
+!Method = !cir.method<!cir.func<(!s32i)> in !S>
+
+module attributes {
+  cir.triple = "x86_64-unknown-linux-gnu",
+  dlti.dl_spec = #dlti.dl_spec<i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">
+} {
+  cir.global external @const_data_member = #cir.data_member<1> : !Field
+  // CHECK: cir.global external @const_data_member = #cir.int<4> : !s64i
+
+  cir.func private @f(%arg0: !cir.ptr<!S>, %arg1: !s32i)
+  cir.global external @const_method_nonvirtual = #cir.method<@f> : !Method
+  // CHECK: cir.global external @const_method_nonvirtual = #cir.const_record<{#cir.global_view<@f> : !s64i, #cir.int<0> : !s64i}> : !{{.+}}
+
+  cir.global external @const_method_virtual = #cir.method<vtable_offset = 8> : !Method
+  // CHECK: cir.global external @const_method_virtual = #cir.const_record<{#cir.int<9> : !s64i, #cir.int<0> : !s64i}> : !{{.+}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/ABILowering/member-ptr.cir b/clang/test/CIR/Incubator/Transforms/ABILowering/member-ptr.cir
new file mode 100644
index 0000000000000..c1b58af53732d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/ABILowering/member-ptr.cir
@@ -0,0 +1,106 @@
+// RUN: cir-opt --cir-abi-lowering -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!void = !cir.void
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!Base = !cir.record<struct "Base" {!s32i, !s32i, !s32i}>
+!Derived = !cir.record<struct "Derived" {!Base, !s32i}>
+!BaseField = !cir.data_member<!s32i in !Base>
+!DerivedField = !cir.data_member<!s32i in !Derived>
+!BaseMethod = !cir.method<!cir.func<(!s32i)> in !Base>
+!DerivedMethod = !cir.method<!cir.func<(!s32i)> in !Derived>
+
+module attributes {
+  cir.triple = "x86_64-unknown-linux-gnu",
+  dlti.dl_spec = #dlti.dl_spec<i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f64 = dense<64> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">
+} {
+  cir.func @get_runtime_member(%arg0: !cir.ptr<!Base>, %arg1: !BaseField) -> !cir.ptr<!s32i> {
+    %0 = cir.get_runtime_member %arg0[%arg1 : !BaseField] : !cir.ptr<!Base> -> !cir.ptr<!s32i>
+    cir.return %0 : !cir.ptr<!s32i>
+  }
+  // CHECK:      cir.func @get_runtime_member(%[[ARG0:.+]]: !cir.ptr<!rec_Base>, %[[ARG1:.+]]: !s64i) -> !cir.ptr<!s32i> {
+  // CHECK-NEXT:   %[[BYTE_PTR:.+]] = cir.cast bitcast %[[ARG0]] : !cir.ptr<!rec_Base> -> !cir.ptr<!s8i>
+  // CHECK-NEXT:   %[[COMPUTED:.+]] = cir.ptr_stride %[[BYTE_PTR]], %[[ARG1]] : (!cir.ptr<!s8i>, !s64i) -> !cir.ptr<!s8i>
+  // CHECK-NEXT:   %[[RESULT:.+]] = cir.cast bitcast %[[COMPUTED]] : !cir.ptr<!s8i> -> !cir.ptr<!s32i>
+  // CHECK-NEXT:   cir.return %[[RESULT]] : !cir.ptr<!s32i>
+  // CHECK-NEXT: }
+
+  cir.func @get_method(%arg0: !cir.ptr<!Base>, %arg1: !BaseMethod) {
+    %0, %1 = cir.get_method %arg1, %arg0 : (!BaseMethod, !cir.ptr<!Base>) -> (!cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>, !cir.ptr<!void>)
+    cir.return
+  }
+  // CHECK:      cir.func @get_method(%[[ARG0:.+]]: !cir.ptr<!rec_Base>, %[[ARG1:.+]]: ![[ABI_TY:.+]]) {
+  // CHECK-NEXT:   %[[MASK:.+]] = cir.const #cir.int<1> : !s64i
+  // CHECK-NEXT:   %[[OFFSET:.+]] = cir.extract_member %[[ARG1]][1] : ![[ABI_TY]] -> !s64i
+  // CHECK-NEXT:   %[[THIS:.+]] = cir.cast bitcast %[[ARG0]] : !cir.ptr<!rec_Base> -> !cir.ptr<!void>
+  // CHECK-NEXT:   %{{.+}} = cir.ptr_stride %[[THIS]], %[[OFFSET]] : (!cir.ptr<!void>, !s64i) -> !cir.ptr<!void>
+  // CHECK-NEXT:   %[[PTR:.+]] = cir.extract_member %[[ARG1]][0] : ![[ABI_TY]] -> !s64i
+  // CHECK-NEXT:   %[[PTR_MASKED:.+]] = cir.binop(and, %[[PTR]], %[[MASK]]) : !s64i
+  // CHECK-NEXT:   %[[IS_VIRT:.+]] = cir.cmp(eq, %[[PTR_MASKED]], %[[MASK]]) : !s64i, !cir.bool
+  // CHECK-NEXT:   cir.brcond %[[IS_VIRT]] ^[[BLK_VIRT:.+]], ^[[BLK_NON_VIRT:.+]]
+  // CHECK-NEXT: ^[[BLK_VIRT]]:
+  // CHECK-NEXT:   %[[VPTR_PTR:.+]] = cir.cast bitcast %[[ARG0]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.ptr<!s8i>>
+  // CHECK-NEXT:   %[[VPTR:.+]] = cir.load %[[VPTR_PTR]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+  // CHECK-NEXT:   %[[VPTR_OFFSET:.+]] = cir.binop(sub, %[[PTR]], %[[MASK]]) : !s64i
+  // CHECK-NEXT:   %[[VELEM_PTR:.+]] = cir.ptr_stride %[[VPTR]], %[[VPTR_OFFSET]] : (!cir.ptr<!s8i>, !s64i) -> !cir.ptr<!s8i>
+  // CHECK-NEXT:   %[[VFPTR_PTR:.+]] = cir.cast bitcast %[[VELEM_PTR]] : !cir.ptr<!s8i> -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>>
+  // CHECK-NEXT:   %[[VFPTR:.+]] = cir.load %[[VFPTR_PTR]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>>, !cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>
+  // CHECK-NEXT:   cir.br ^[[BLK_CONT:.+]](%[[VFPTR]] : !cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>)
+  // CHECK-NEXT: ^[[BLK_NON_VIRT]]:
+  // CHECK-NEXT:   %[[FPTR:.+]] = cir.cast int_to_ptr %[[PTR]] : !s64i -> !cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>
+  // CHECK-NEXT:   cir.br ^[[BLK_CONT]](%[[FPTR]] : !cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>)
+  // CHECK-NEXT: ^[[BLK_CONT]](%14: !cir.ptr<!cir.func<(!cir.ptr<!void>, !s32i)>>):
+  // CHECK-NEXT:   cir.return
+  // CHECK-NEXT: }
+
+  cir.func @data_member_derived_to_base(%arg0: !DerivedField) -> !BaseField {
+    %0 = cir.base_data_member %arg0 : !DerivedField [8] -> !BaseField
+    cir.return %0 : !BaseField
+  }
+  // CHECK:      @data_member_derived_to_base(%[[ARG:.+]]: !s64i) -> !s64i {
+  // CHECK-NEXT:   %[[NULL:.+]] = cir.const #cir.int<-1> : !s64i
+  // CHECK-NEXT:   %[[IS_NULL:.+]] = cir.cmp(eq, %[[ARG]], %[[NULL]]) : !s64i, !cir.bool
+  // CHECK-NEXT:   %[[OFFSET:.+]] = cir.const #cir.int<8> : !s64i
+  // CHECK-NEXT:   %[[COMPUTED:.+]] = cir.binop(sub, %[[ARG]], %[[OFFSET]]) : !s64i
+  // CHECK-NEXT:   %[[RES:.+]] = cir.select if %[[IS_NULL]] then %[[NULL]] else %[[COMPUTED]] : (!cir.bool, !s64i, !s64i) -> !s64i
+  // CHECK-NEXT:   cir.return %[[RES]] : !s64i
+  // CHECK-NEXT: }
+
+  cir.func @data_member_base_to_derived(%arg0: !BaseField) -> !DerivedField {
+    %0 = cir.derived_data_member %arg0 : !BaseField [8] -> !DerivedField
+    cir.return %0 : !DerivedField
+  }
+  // CHECK:      @data_member_base_to_derived(%[[ARG:.+]]: !s64i) -> !s64i {
+  // CHECK-NEXT:   %[[NULL:.+]] = cir.const #cir.int<-1> : !s64i
+  // CHECK-NEXT:   %[[IS_NULL:.+]] = cir.cmp(eq, %[[ARG]], %[[NULL]]) : !s64i, !cir.bool
+  // CHECK-NEXT:   %[[OFFSET:.+]] = cir.const #cir.int<8> : !s64i
+  // CHECK-NEXT:   %[[COMPUTED:.+]] = cir.binop(add, %[[ARG]], %[[OFFSET]]) : !s64i
+  // CHECK-NEXT:   %[[RES:.+]] = cir.select if %[[IS_NULL]] then %[[NULL]] else %[[COMPUTED]] : (!cir.bool, !s64i, !s64i) -> !s64i
+  // CHECK-NEXT:   cir.return %[[RES]] : !s64i
+  // CHECK-NEXT: }
+
+  cir.func @method_derived_to_base(%arg0: !DerivedMethod) -> !BaseMethod {
+    %0 = cir.base_method %arg0 : !DerivedMethod [8] -> !BaseMethod
+    cir.return %0 : !BaseMethod
+  }
+  // CHECK:      @method_derived_to_base(%[[ARG:.+]]: ![[ABI_TY:.+]]) -> ![[ABI_TY]] {
+  // CHECK-NEXT:   %[[THIS_OFFSET:.+]] = cir.extract_member %[[ARG]][1] : ![[ABI_TY]] -> !s64i
+  // CHECK-NEXT:   %[[BASE_OFFSET:.+]] = cir.const #cir.int<8> : !s64i
+  // CHECK-NEXT:   %[[COMPUTED:.+]] = cir.binop(sub, %[[THIS_OFFSET]], %[[BASE_OFFSET]]) : !s64i
+  // CHECK-NEXT:   %[[RES:.+]] = cir.insert_member %[[ARG]][1], %[[COMPUTED]] : ![[ABI_TY]], !s64i
+  // CHECK-NEXT:   cir.return %[[RES]] : ![[ABI_TY]]
+  // CHECK-NEXT: }
+
+  cir.func @method_base_to_derived(%arg0: !BaseMethod) -> !DerivedMethod {
+    %0 = cir.derived_method %arg0 : !BaseMethod [8] -> !DerivedMethod
+    cir.return %0 : !DerivedMethod
+  }
+  // CHECK:      @method_base_to_derived(%[[ARG:.+]]: ![[ABI_TY:.+]]) -> ![[ABI_TY]] {
+  // CHECK-NEXT:   %[[THIS_OFFSET:.+]] = cir.extract_member %[[ARG]][1] : ![[ABI_TY]] -> !s64i
+  // CHECK-NEXT:   %[[BASE_OFFSET:.+]] = cir.const #cir.int<8> : !s64i
+  // CHECK-NEXT:   %[[COMPUTED:.+]] = cir.binop(add, %[[THIS_OFFSET]], %[[BASE_OFFSET]]) : !s64i
+  // CHECK-NEXT:   %[[RES:.+]] = cir.insert_member %[[ARG]][1], %[[COMPUTED]] : ![[ABI_TY]], !s64i
+  // CHECK-NEXT:   cir.return %[[RES]] : ![[ABI_TY]]
+  // CHECK-NEXT: }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/Inputs/folly-coro.h b/clang/test/CIR/Incubator/Transforms/Inputs/folly-coro.h
new file mode 100644
index 0000000000000..21e4b337eb226
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/Inputs/folly-coro.h
@@ -0,0 +1,44 @@
+#include "std.h"
+
+namespace folly {
+namespace coro {
+
+using std::suspend_always;
+using std::suspend_never;
+using std::coroutine_handle;
+
+using SemiFuture = int;
+
+template<class T>
+struct Task {
+    struct promise_type {
+        Task<T> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_value(T);
+        void unhandled_exception();
+        auto yield_value(Task<T>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    T await_resume();
+};
+
+template<>
+struct Task<void> {
+    struct promise_type {
+        Task<void> get_return_object() noexcept;
+        suspend_always initial_suspend() noexcept;
+        suspend_always final_suspend() noexcept;
+        void return_void() noexcept;
+        void unhandled_exception() noexcept;
+        auto yield_value(Task<void>) noexcept { return final_suspend(); }
+    };
+    bool await_ready() noexcept { return false; }
+    void await_suspend(coroutine_handle<>) noexcept {}
+    void await_resume() noexcept {}
+    SemiFuture semi();
+};
+
+} // coro
+} // folly
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/Inputs/std.h b/clang/test/CIR/Incubator/Transforms/Inputs/std.h
new file mode 100644
index 0000000000000..1bc2b85047845
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/Inputs/std.h
@@ -0,0 +1,29 @@
+namespace std {
+
+template <class Ret, typename... T>
+struct coroutine_traits { using promise_type = typename Ret::promise_type; };
+
+template <class Promise = void>
+struct coroutine_handle {
+  static coroutine_handle from_address(void *) noexcept;
+};
+template <>
+struct coroutine_handle<void> {
+  template <class PromiseType>
+  coroutine_handle(coroutine_handle<PromiseType>) noexcept;
+  static coroutine_handle from_address(void *);
+};
+
+struct suspend_always {
+  bool await_ready() noexcept { return false; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+struct suspend_never {
+  bool await_ready() noexcept { return true; }
+  void await_suspend(coroutine_handle<>) noexcept {}
+  void await_resume() noexcept {}
+};
+
+} // namespace std
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/Inputs/string.h b/clang/test/CIR/Incubator/Transforms/Inputs/string.h
new file mode 100644
index 0000000000000..44693164b1cd7
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/Inputs/string.h
@@ -0,0 +1,11 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef __SIZE_TYPE__ size_t;
+
+size_t strlen(const char *s);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/clang/test/CIR/Incubator/Transforms/builtin-assume.cir b/clang/test/CIR/Incubator/Transforms/builtin-assume.cir
new file mode 100644
index 0000000000000..c4f1317abb2b5
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/builtin-assume.cir
@@ -0,0 +1,38 @@
+// RUN: cir-opt -cir-canonicalize -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!s32i = !cir.int<s, 32>
+module {
+  // Make sure canonicalizers don't erase assume builtins.
+
+  cir.func @assume(%arg0: !s32i) {
+    %0 = cir.const #cir.int<0> : !s32i
+    %1 = cir.cmp(gt, %arg0, %0) : !s32i, !cir.bool
+    cir.assume %1 : !cir.bool
+    cir.return
+  }
+  //      CHECK: cir.func @assume(%arg0: !s32i) {
+  // CHECK-NEXT:   %0 = cir.const #cir.int<0> : !s32i
+  // CHECK-NEXT:   %1 = cir.cmp(gt, %arg0, %0) : !s32i, !cir.bool
+  // CHECK-NEXT:   cir.assume %1 : !cir.bool
+  // CHECK-NEXT:   cir.return
+  // CHECK-NEXT: }
+
+  cir.func @assume_aligned(%arg0: !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {
+    %0 = cir.assume.aligned %arg0 : !cir.ptr<!s32i>[alignment 8]
+    cir.return %0 : !cir.ptr<!s32i>
+  }
+  //      CHECK: cir.func @assume_aligned(%arg0: !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {
+  // CHECK-NEXT:   %0 = cir.assume.aligned %arg0 : !cir.ptr<!s32i>[alignment 8]
+  // CHECK-NEXT:   cir.return %0 : !cir.ptr<!s32i>
+  // CHECK-NEXT: }
+
+  cir.func @assume_separate_storage(%arg0: !cir.ptr<!cir.void>, %arg1: !cir.ptr<!cir.void>) {
+    cir.assume.separate_storage %arg0, %arg1 : !cir.ptr<!cir.void>
+    cir.return
+  }
+  //      CHECK: cir.func @assume_separate_storage(%arg0: !cir.ptr<!void>, %arg1: !cir.ptr<!void>) {
+  // CHECK-NEXT:   cir.assume.separate_storage %arg0, %arg1 : !cir.ptr<!void>
+  // CHECK-NEXT:   cir.return
+  // CHECK-NEXT: }
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/complex-fold.cir b/clang/test/CIR/Incubator/Transforms/complex-fold.cir
new file mode 100644
index 0000000000000..f31042b3acc7d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/complex-fold.cir
@@ -0,0 +1,64 @@
+// RUN: cir-opt --canonicalize -o %t.cir %s
+// RUN: FileCheck --input-file %t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @complex_create_fold() -> !cir.complex<!s32i> {
+    %0 = cir.const #cir.int<1> : !s32i
+    %1 = cir.const #cir.int<2> : !s32i
+    %2 = cir.complex.create %0, %1 : !s32i -> !cir.complex<!s32i>
+    cir.return %2 : !cir.complex<!s32i>
+  }
+
+  // CHECK-LABEL: cir.func @complex_create_fold() -> !cir.complex<!s32i> {
+  //  CHECK-NEXT:   %[[#A:]] = cir.const #cir.complex<#cir.int<1> : !s32i, #cir.int<2> : !s32i> : !cir.complex<!s32i>
+  //  CHECK-NEXT:   cir.return %[[#A]] : !cir.complex<!s32i>
+  //  CHECK-NEXT: }
+
+  cir.func @fold_complex_real() -> !s32i {
+    %0 = cir.const #cir.int<1> : !s32i
+    %1 = cir.const #cir.int<2> : !s32i
+    %2 = cir.complex.create %0, %1 : !s32i -> !cir.complex<!s32i>
+    %3 = cir.complex.real %2 : !cir.complex<!s32i> -> !s32i
+    cir.return %3 : !s32i
+  }
+
+  // CHECK-LABEL: cir.func @fold_complex_real() -> !s32i {
+  //  CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<1> : !s32i
+  //  CHECK-NEXT:   cir.return %[[#A]] : !s32i
+  //  CHECK-NEXT: }
+
+  cir.func @fold_complex_real_from_create_test(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %0 = cir.complex.create %arg0, %arg1 : !s32i -> !cir.complex<!s32i>
+    %1 = cir.complex.real %0 : !cir.complex<!s32i> -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //       CHECK: cir.func @fold_complex_real_from_create_test(%[[ARG_0:.*]]: !s32i, %[[ARG_1:.*]]: !s32i) -> !s32i {
+  //  CHECK-NEXT:   cir.return %[[ARG_0]] : !s32i
+  //  CHECK-NEXT: }
+
+  cir.func @fold_complex_imag() -> !s32i {
+    %0 = cir.const #cir.int<1> : !s32i
+    %1 = cir.const #cir.int<2> : !s32i
+    %2 = cir.complex.create %0, %1 : !s32i -> !cir.complex<!s32i>
+    %3 = cir.complex.imag %2 : !cir.complex<!s32i> -> !s32i
+    cir.return %3 : !s32i
+  }
+
+  // CHECK-LABEL: cir.func @fold_complex_imag() -> !s32i {
+  //  CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<2> : !s32i
+  //  CHECK-NEXT:   cir.return %[[#A]] : !s32i
+  //  CHECK-NEXT: }
+
+  cir.func @fold_complex_imag_from_create_test(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %0 = cir.complex.create %arg0, %arg1 : !s32i -> !cir.complex<!s32i>
+    %1 = cir.complex.imag %0 : !cir.complex<!s32i> -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //       CHECK: cir.func @fold_complex_imag_from_create_test(%[[ARG_0:.*]]: !s32i, %[[ARG_1:.*]]: !s32i) -> !s32i {
+  //  CHECK-NEXT:   cir.return %[[ARG_1]] : !s32i
+  //  CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/goto_solver.cir b/clang/test/CIR/Incubator/Transforms/goto_solver.cir
new file mode 100644
index 0000000000000..2adae1eb7459f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/goto_solver.cir
@@ -0,0 +1,63 @@
+// RUN: cir-opt %s -cir-goto-solver -o - | FileCheck %s
+
+!void = !cir.void
+
+cir.func @a(){
+  %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr", init] {alignment = 8 : i64}
+  %1 = cir.blockaddress <@a, "label1"> -> !cir.ptr<!void>
+  cir.store align(8) %1, %0 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  cir.br ^bb1
+^bb1:
+  cir.label "label1"
+  cir.br ^bb2
+^bb2:
+  // This label is not referenced by any blockaddressOp, so it should be removed
+  cir.label "label2"
+  cir.return
+}
+
+// CHECK:  cir.func @a()
+// CHECK:   %1 = cir.blockaddress <@a, "label1"> -> !cir.ptr<!void>
+// CHECK: ^bb1:
+// CHECK:   cir.label "label1"
+// CHECK:   cir.br ^bb2
+// CHECK: ^bb2:
+// CHECK-NOT: cir.label "label2"
+
+cir.func @b(){
+  %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr", init] {alignment = 8 : i64}
+  %1 = cir.blockaddress <@b, "label1"> -> !cir.ptr<!void>
+  cir.store align(8) %1, %0 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  cir.goto "label2"
+^bb1:
+  cir.label "label1"
+  cir.br ^bb2
+^bb2: 
+  // This label is not referenced by any blockaddressOp, so it should be removed
+  cir.label "label2"
+  cir.return
+}
+
+// CHECK: cir.func @b() {
+// CHECK:   %1 = cir.blockaddress <@b, "label1"> -> !cir.ptr<!void>
+// CHECK:   cir.store align(8) %1, {{.*}} : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+// CHECK:   cir.br ^bb2
+// CHECK: ^bb1:
+// CHECK:   cir.label "label1"
+// CHECK:   cir.br ^bb2
+// CHECK: ^bb2:
+// CHECK-NOT: cir.label "label2"
+
+cir.func @c() {
+  cir.label "label1"
+  %0 = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["ptr", init] {alignment = 8 : i64}
+  %1 = cir.blockaddress <@c, "label1"> -> !cir.ptr<!void>
+  cir.store align(8) %1, %0 : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  cir.return
+}
+
+// CHECK: cir.func @c
+// CHECK:   cir.label "label1"
+// CHECK:   %1 = cir.blockaddress <@c, "label1"> -> !cir.ptr<!void>
+// CHECK:   cir.store align(8) %1, {{.*}} : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+
diff --git a/clang/test/CIR/Incubator/Transforms/idiom-iter.cpp b/clang/test/CIR/Incubator/Transforms/idiom-iter.cpp
new file mode 100644
index 0000000000000..5591baa04ff63
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/idiom-iter.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -I%S/../Inputs -fclangir-idiom-recognizer="remarks=found-calls" -clangir-verify-diagnostics %s -o %t.cir
+
+namespace std {
+template<typename T, unsigned N> struct array {
+  T arr[N];
+  struct iterator {
+    T *p;
+    constexpr explicit iterator(T *p) : p(p) {}
+    constexpr bool operator!=(iterator o) { return p != o.p; }
+    constexpr iterator &operator++() { ++p; return *this; }
+    constexpr T &operator*() { return *p; }
+  };
+  constexpr iterator begin() { return iterator(arr); }
+};
+}
+
+void iter_test()
+{
+  std::array<unsigned char, 3> v2 = {1, 2, 3};
+  (void)v2.begin(); // no remark should be produced.
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/idiom-recognizer.cpp b/clang/test/CIR/Incubator/Transforms/idiom-recognizer.cpp
new file mode 100644
index 0000000000000..fb00d0611f21f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/idiom-recognizer.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-idiom-recognizer -emit-cir -I%S/../Inputs -mmlir --mlir-print-ir-after-all %s -o - 2>&1 | FileCheck %s -check-prefix=PASS_ENABLED
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -emit-cir -I%S/../Inputs -fclangir-idiom-recognizer="remarks=found-calls" -clangir-verify-diagnostics %s -o %t.cir
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fclangir-idiom-recognizer -emit-cir -I%S/../Inputs -mmlir --mlir-print-ir-before=cir-idiom-recognizer %s -o - 2>&1 | FileCheck %s -check-prefix=BEFORE-IDIOM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fclangir-idiom-recognizer -emit-cir -I%S/../Inputs -mmlir --mlir-print-ir-after=cir-idiom-recognizer %s -o - 2>&1 | FileCheck %s -check-prefix=AFTER-IDIOM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fclangir-idiom-recognizer -emit-cir -I%S/../Inputs -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o - 2>&1 | FileCheck %s -check-prefix=AFTER-LOWERING-PREPARE
+
+// PASS_ENABLED:  IR Dump After IdiomRecognizer (cir-idiom-recognizer)
+
+#include "std-cxx.h"
+
+int test_find(unsigned char n = 3)
+{
+    unsigned num_found = 0;
+    std::array<unsigned char, 9> v = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto f = std::find(v.begin(), v.end(), n); // expected-remark {{found call to std::find()}}
+                                               // expected-remark at -1 {{found call to begin() iterator}}
+                                               // expected-remark at -2 {{found call to end() iterator}}
+
+    // BEFORE-IDIOM: {{.*}} cir.call @_ZNSt5arrayIhLj9EE5beginEv(
+    // AFTER-IDIOM: {{.*}} cir.std.begin({{.*}}, @_ZNSt5arrayIhLj9EE5beginEv
+    // AFTER-LOWERING-PREPARE: {{.*}} cir.call @_ZNSt5arrayIhLj9EE5beginEv(
+
+    // BEFORE-IDIOM: {{.*}} cir.call @_ZNSt5arrayIhLj9EE3endEv(
+    // AFTER-IDIOM: {{.*}} cir.std.end({{.*}}, @_ZNSt5arrayIhLj9EE3endEv
+    // AFTER-LOWERING-PREPARE: {{.*}} cir.call @_ZNSt5arrayIhLj9EE3endEv(
+
+    // BEFORE-IDIOM: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+    // AFTER-IDIOM: {{.*}} cir.std.find({{.*}}, @_ZSt4findIPhhET_S1_S1_RKT0_
+    // AFTER-LOWERING-PREPARE: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+
+    if (f != v.end()) // expected-remark {{found call to end() iterator}}
+        num_found++;
+    return num_found;
+}
+
+namespace yolo {
+template<typename T, unsigned N> struct array {
+  T arr[N];
+  typedef T value_type;
+  typedef value_type* iterator;
+  constexpr iterator begin() { return iterator(arr); }
+};
+}
+
+void iter_test() {
+  yolo::array<unsigned char, 3> v = {1, 2, 3};
+  (void)v.begin(); // no remark should be produced.
+}
diff --git a/clang/test/CIR/Incubator/Transforms/idiom-string.c b/clang/test/CIR/Incubator/Transforms/idiom-string.c
new file mode 100644
index 0000000000000..9c44712571ca1
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/idiom-string.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wincompatible-library-redeclaration -fclangir -fclangir-idiom-recognizer="remarks=found-calls" -emit-cir -clangir-verify-diagnostics %s -o %t.cir
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wincompatible-library-redeclaration -fclangir -fclangir-idiom-recognizer -emit-cir -mmlir --mlir-print-ir-before=cir-idiom-recognizer %s -o - 2>&1 | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wincompatible-library-redeclaration -fclangir -fclangir-idiom-recognizer -emit-cir -mmlir --mlir-print-ir-after=cir-idiom-recognizer %s -o - 2>&1 | FileCheck %s -check-prefix=RAISED
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wincompatible-library-redeclaration -fclangir -fclangir-idiom-recognizer -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o - 2>&1 | FileCheck %s -check-prefix=LOWERED
+
+#include "Inputs/string.h"
+
+// === CALL RECOGNITION, RAISING AND LOWERING PREPARE ===
+int test_strlen(const char *s) {
+    // CIR:    {{.*}} cir.call @strlen(
+    // RAISED: {{.*}} cir.std.strlen(
+    // LOWERED: {{.*}} cir.call @strlen(
+    return strlen(s); // expected-remark {{found call to strlen()}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/if.cir b/clang/test/CIR/Incubator/Transforms/if.cir
new file mode 100644
index 0000000000000..c86f087a677ac
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/if.cir
@@ -0,0 +1,48 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @foo(%arg0: !s32i) -> !s32i {
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    cir.if %4 {
+      %5 = cir.const #cir.int<1> : !s32i
+      cir.return %5 : !s32i
+    } else {
+      %5 = cir.const #cir.int<0> : !s32i
+      cir.return %5 : !s32i
+    }
+    cir.return %arg0 : !s32i
+  }
+//      CHECK: cir.func @foo(%arg0: !s32i) -> !s32i {
+// CHECK-NEXT:   %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+// CHECK-NEXT:   cir.brcond %0 ^bb1, ^bb2
+// CHECK-NEXT: ^bb1:  // pred: ^bb0
+// CHECK-NEXT:   %1 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.return %1 : !s32i
+// CHECK-NEXT: ^bb2:  // pred: ^bb0
+// CHECK-NEXT:   %2 = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT:   cir.return %2 : !s32i
+// CHECK-NEXT: ^bb3:  // no predecessors
+// CHECK-NEXT:   cir.return %arg0 : !s32i
+// CHECK-NEXT: }
+
+  cir.func @onlyIf(%arg0: !s32i) -> !s32i {
+    %4 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    cir.if %4 {
+      %5 = cir.const #cir.int<1> : !s32i
+      cir.return %5 : !s32i
+    }
+    cir.return %arg0 : !s32i
+  }
+//      CHECK: cir.func @onlyIf(%arg0: !s32i) -> !s32i {
+// CHECK-NEXT:   %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+// CHECK-NEXT:   cir.brcond %0 ^bb1, ^bb2
+// CHECK-NEXT: ^bb1:  // pred: ^bb0
+// CHECK-NEXT:   %1 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:   cir.return %1 : !s32i
+// CHECK-NEXT: ^bb2:  // pred: ^bb0
+// CHECK-NEXT:   cir.return %arg0 : !s32i
+// CHECK-NEXT: }
+
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lib-opt-find.cpp b/clang/test/CIR/Incubator/Transforms/lib-opt-find.cpp
new file mode 100644
index 0000000000000..9196dc50c8997
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lib-opt-find.cpp
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -fclangir -fclangir-idiom-recognizer -fclangir-lib-opt -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "std-cxx.h"
+
+int test1(unsigned char n = 3)
+{
+    // CHECK: test1
+    unsigned num_found = 0;
+    // CHECK: %[[pattern_addr:.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, ["n"
+    std::array<unsigned char, 9> v = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+    auto f = std::find(v.begin(), v.end(), n);
+
+    // CHECK: %[[first:.*]] = cir.call @_ZNSt5arrayIhLj9EE5beginEv
+    // CHECK: %[[last:.*]] = cir.call @_ZNSt5arrayIhLj9EE3endEv
+    // CHECK: %[[cast_to_void:.*]] = cir.cast bitcast %[[first]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+    // CHECK: %[[load_pattern:.*]] = cir.load{{.*}} %[[pattern_addr]] : !cir.ptr<!u8i>, !u8i
+    // CHECK: %[[pattern:.*]] = cir.cast integral %[[load_pattern:.*]] : !u8i -> !s32i
+
+    // CHECK-NOT: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+    // CHECK: %[[array_size:.*]] = cir.const #cir.int<9> : !u64i
+
+    // CHECK: %[[result_cast:.*]] = cir.libc.memchr(%[[cast_to_void]], %[[pattern]], %[[array_size]])
+    // CHECK: %[[memchr_res:.*]] = cir.cast bitcast %[[result_cast]] : !cir.ptr<!void> -> !cir.ptr<!u8i>
+    // CHECK: %[[nullptr:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!u8i>
+    // CHECK: %[[cmp_res:.*]] = cir.cmp(eq, %[[nullptr]], %[[memchr_res]]) : !cir.ptr<!u8i>, !cir.bool
+    // CHECK: cir.ternary(%[[cmp_res]], true {
+    // CHECK:   cir.yield %[[last]] : !cir.ptr<!u8i>
+    // CHECK: }, false {
+    // CHECK:   cir.yield %[[memchr_res]] : !cir.ptr<!u8i>
+    // CHECK: }) : (!cir.bool) -> !cir.ptr<!u8i>
+
+    if (f != v.end())
+        num_found++;
+
+    return num_found;
+}
+
+unsigned char* test2(unsigned char* first, unsigned char* last, unsigned char v)
+{
+    return std::find(first, last, v);
+    // CHECK: test2
+
+    // CHECK: %[[first_storage:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["first", init]
+    // CHECK: %[[last_storage:.*]] = cir.alloca !cir.ptr<!u8i>, !cir.ptr<!cir.ptr<!u8i>>, ["last", init]
+    // CHECK: %[[pattern_storage:.*]] = cir.alloca !u8i, !cir.ptr<!u8i>, ["v", init]
+    // CHECK: %[[first:.*]] = cir.load{{.*}} %[[first_storage]]
+    // CHECK: %[[last:.*]] = cir.load{{.*}} %[[last_storage]]
+    // CHECK: %[[cast_to_void:.*]] = cir.cast bitcast %[[first]] : !cir.ptr<!u8i> -> !cir.ptr<!void>
+    // CHECK: %[[load_pattern:.*]] = cir.load{{.*}} %[[pattern_storage]] : !cir.ptr<!u8i>, !u8i
+    // CHECK: %[[pattern:.*]] = cir.cast integral %[[load_pattern:.*]] : !u8i -> !s32i
+
+    // CHECK-NOT: {{.*}} cir.call @_ZSt4findIPhhET_S1_S1_RKT0_(
+    // CHECK: %[[array_size:.*]] = cir.ptr_diff %[[last]], %[[first]] : !cir.ptr<!u8i> -> !u64i
+
+    // CHECK: %[[result_cast:.*]] = cir.libc.memchr(%[[cast_to_void]], %[[pattern]], %[[array_size]])
+    // CHECK: %[[memchr_res:.*]] = cir.cast bitcast %[[result_cast]] : !cir.ptr<!void> -> !cir.ptr<!u8i>
+    // CHECK: %[[nullptr:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!u8i>
+    // CHECK: %[[cmp_res:.*]] = cir.cmp(eq, %[[nullptr]], %[[memchr_res]]) : !cir.ptr<!u8i>, !cir.bool
+    // CHECK: cir.ternary(%[[cmp_res]], true {
+    // CHECK:   cir.yield %[[last]] : !cir.ptr<!u8i>
+    // CHECK: }, false {
+    // CHECK:   cir.yield %[[memchr_res]] : !cir.ptr<!u8i>
+    // CHECK: }) : (!cir.bool) -> !cir.ptr<!u8i>
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lib-opt-string.cpp b/clang/test/CIR/Incubator/Transforms/lib-opt-string.cpp
new file mode 100644
index 0000000000000..d34dbd1493826
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lib-opt-string.cpp
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -I%S/Inputs -fclangir -fclangir-idiom-recognizer -fclangir-lib-opt -emit-cir -mmlir --mlir-print-ir-after=cir-lib-opt %s -o /dev/null 2>&1 | FileCheck %s
+#include "string.h"
+
+// Test strlen(str) ==|!=|> 0 --> *str ==|!=|> 0
+int test_strlen_eq_zero(const char *str) {
+// CHECK-LABEL:   cir.func{{.*}} @_Z19test_strlen_eq_zeroPKc(
+// CHECK-SAME:      %[[ARG_STR:.*]]: !cir.ptr<!s8i>
+// CHECK:           %[[VAR_STR:.*]] = cir.alloca !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK:           cir.store %[[ARG_STR]], %[[VAR_STR]] : !cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
+// CHECK:           %[[STR:.*]] = cir.load{{.*}} %[[VAR_STR]] : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+// CHECK:           %[[FIRST_CHAR:.*]] = cir.load{{.*}} %[[STR]] : !cir.ptr<!s8i>, !s8i
+// CHECK:           %[[FIRST_INT:.*]] = cir.cast integral %[[FIRST_CHAR]] : !s8i -> !u64i
+// CHECK:           %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i
+// CHECK:           %[[CMP:.*]] = cir.cmp(eq, %[[FIRST_INT]], %[[ZERO]]) : !u64i, !cir.bool
+// CHECK:         }
+
+    return strlen(str) == 0ULL; // expected-remark "strlen opt: transformed strlen into load"
+}
+
+// Test strlen(str) <|>= len --> memchr(str, 0, len) <|>= len
+int test_strlen_lt_var(const char *str, size_t len) {
+  // CHECK-LABEL:   cir.func{{.*}} @_Z18test_strlen_lt_varPKcm(
+  // CHECK:           %[[STR:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.ptr<!s8i>>, !cir.ptr<!s8i>
+  // CHECK:           %[[LEN:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!u64i>, !u64i
+  // CHECK:           %[[STR_VOID_PTR:.*]] = cir.cast bitcast %[[STR]] : !cir.ptr<!s8i> -> !cir.ptr<!void>
+  // CHECK:           %[[NULL_CHAR:.*]] = cir.const #cir.int<0> : !s8i
+  // CHECK:           %[[NULL_INT:.*]] = cir.cast integral %[[NULL_CHAR]] : !s8i -> !s32i
+  // CHECK:           %[[FOUND_VOID_PTR:.*]] = cir.libc.memchr(%[[STR_VOID_PTR]], %[[NULL_INT]], %[[LEN]])
+  // CHECK:           %[[FOUND_STR_PTR:.*]] = cir.cast bitcast %[[FOUND_VOID_PTR]] : !cir.ptr<!void> -> !cir.ptr<!s8i>
+  // CHECK:           %[[DIST:.*]] = cir.ptr_diff %[[FOUND_STR_PTR]], %[[STR]] : !cir.ptr<!s8i> -> !u64i
+  // CHECK:           %[[CMP:.*]] = cir.cmp(lt, %[[DIST]], %[[LEN]]) : !u64i, !cir.bool
+  // CHECK:         }
+
+    return strlen(str) < len; // expected-remark "strlen opt: transformed strlen into memchr"
+}
+
+// Test strlen(str) >|<=|==|!= len --> memchr(str, 0, len + 1) >|<=|==|!= len
+int test_strlen_eq_var(const char *str, size_t len) {
+  // CHECK-LABEL:   cir.func{{.*}} @_Z18test_strlen_eq_varPKcm(
+  // CHECK:           %[[ONE:.*]] = cir.const #cir.int<1>
+  // CHECK:           %[[LEN_PLUS_ONE:.*]] = cir.binop(add, %{{.*}}, %[[ONE]])
+  // CHECK:           %[[RESULT:.*]] = cir.libc.memchr(%{{.*}}, %{{.*}}, %[[LEN_PLUS_ONE]])
+  // CHECK:         }
+
+    return strlen(str) == len; // expected-remark "strlen opt: transformed strlen into memchr"
+}
+
+// Applicability tests:
+
+// Multiple users, not applicable.
+int test_strlen_multiple_users(const char *str, size_t len1, size_t len2) {
+  // Check that we still have a strlen op.
+  // CHECK-LABEL:   cir.func{{.*}} @_Z26test_strlen_multiple_usersPKcmm(
+  // CHECK:           %[[LEN:.*]] = {{.*}}strlen(
+  // CHECK:         }
+
+  size_t len = strlen(str);
+  return len1 < len && len < len2; // expected-remark "strlen opt: result of strlen has more than one use"
+}
+
+// Non-comparison user, not applicable.
+int test_strlen_non_cmp_users(const char *str) {
+  // Check that we still have a strlen op.
+  // CHECK-LABEL:   cir.func{{.*}} @_Z25test_strlen_non_cmp_usersPKc(
+  // CHECK:           %[[LEN:.*]] = {{.*}}strlen(
+  // CHECK:         }
+
+  return strlen(str); // expected-remark "strlen opt: could not find cir.cmp user of strlen result"
+}
+
+// Memory operation blocks move.
+int test_strlen_store_between_def_and_use(const char *str, size_t *ptr) {
+  // Check that we still have either a strlen op, or a call to strlen.
+  // CHECK-LABEL:   cir.func{{.*}} @_Z37test_strlen_store_between_def_and_usePKcPm(
+  // CHECK:           %[[LEN:.*]] = {{.*}}strlen
+  // CHECK:         }
+
+  size_t len = strlen(str);
+  *ptr = 10;
+  return len < *ptr; // expected-remark "strlen opt: could not move max length before strlen"
+}
+
+// Can't adjust value being compared.
+int test_strlen_cant_adjust(const char *str) {
+  // Check that we still have either a strlen op, or a call to strlen.
+  // CHECK-LABEL:   cir.func{{.*}} @_Z23test_strlen_cant_adjustPKc(
+  // CHECK:           %[[LEN:.*]] = {{.*}}strlen
+  // CHECK:         }
+
+  return strlen(str) < 10.0; // expected-remark "strlen opt: could not adjust the max value"
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lib-opt.cpp b/clang/test/CIR/Incubator/Transforms/lib-opt.cpp
new file mode 100644
index 0000000000000..17895e567645e
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lib-opt.cpp
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-idiom-recognizer -fclangir-lib-opt -emit-cir -mmlir --mlir-print-ir-after-all %s -o - 2>&1 | FileCheck %s -check-prefix=CIR
+
+// CIR: IR Dump After LibOpt (cir-lib-opt)
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check-agg.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check-agg.cpp
new file mode 100644
index 0000000000000..c2567d3a8f38f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check-agg.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -fclangir-lifetime-check="history=all;remarks=all" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir-analysis-only -fclangir-lifetime-check="history=all;remarks=all" %s -clangir-verify-diagnostics -emit-obj -o /dev/null
+
+typedef enum SType {
+  INFO_ENUM_0 = 9,
+  INFO_ENUM_1 = 2020,
+} SType;
+
+typedef struct InfoRaw {
+    SType type;
+    const void* __attribute__((__may_alias__)) next;
+    unsigned int fa;
+    unsigned f;
+    unsigned s;
+    unsigned w;
+    unsigned h;
+    unsigned g;
+    unsigned a;
+} InfoRaw;
+
+typedef unsigned long long FlagsPriv;
+typedef struct InfoPriv {
+    SType type;
+    void* __attribute__((__may_alias__)) next;
+    FlagsPriv flags;
+} InfoPriv;
+
+static const FlagsPriv PrivBit = 0x00000001;
+
+void escape_info(InfoRaw *info);
+typedef SType ( *FnPtr)(unsigned s, const InfoRaw* i);
+struct X {
+  struct entries {
+    FnPtr wildfn = nullptr;
+  };
+  static entries e;
+};
+
+void exploded_fields(bool cond) {
+  {
+    InfoRaw info = {INFO_ENUM_0}; // expected-note {{invalidated here}}
+    if (cond) {
+      InfoPriv privTmp = {INFO_ENUM_1};
+      privTmp.flags = PrivBit;
+      info.next = &privTmp;
+    } // expected-note {{pointee 'privTmp' invalidated at end of scope}}
+
+    // If the 'if' above is taken, info.next is invalidated at the end of the scope, otherwise
+    // it's also invalid because it was initialized with 'nullptr'. This could be a noisy
+    // check if calls like `escape_info` are used to further initialize `info`.
+
+    escape_info(&info); // expected-remark {{pset => { invalid, nullptr }}}
+                        // expected-warning at -1 {{passing aggregate containing invalid pointer member 'info.next'}}
+    X::e.wildfn(0, &info); // expected-remark {{pset => { invalid, nullptr }}}
+                           // expected-warning at -1 {{passing aggregate containing invalid pointer member 'info.next'}}
+  }
+}
+
+void exploded_fields1(bool cond, unsigned t) {
+  {
+    InfoRaw info = {INFO_ENUM_0, &t};
+    if (cond) {
+      InfoPriv privTmp = {INFO_ENUM_1};
+      privTmp.flags = PrivBit;
+      info.next = &privTmp;
+    }
+
+    // A warning is not emitted here, lack of context for inferring
+    // anything about `cond` would make it too noisy given `info.next`
+    // wasn't null initialized.
+
+    escape_info(&info); // expected-remark {{pset => { t }}}
+  }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check-coro-task.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check-coro-task.cpp
new file mode 100644
index 0000000000000..cf101b7904919
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check-coro-task.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -I%S/Inputs -fclangir -fclangir-lifetime-check="history=all;remarks=all;history_limit=1" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+#include "folly-coro.h"
+
+folly::coro::Task<int> go(int const& val);
+folly::coro::Task<int> go1() {
+  auto task = go(1); // expected-note {{coroutine bound to resource with expired lifetime}}
+                     // expected-note at -1 {{at the end of scope or full-expression}}
+  co_return co_await task; // expected-remark {{pset => { task, invalid }}}
+                           // expected-warning at -1 {{use of coroutine 'task' with dangling reference}}
+}
+
+folly::coro::Task<int> go1_lambda() {
+  auto task = [i = 3]() -> folly::coro::Task<int> { // expected-note {{coroutine bound to lambda with expired lifetime}}
+    co_return i;
+  }(); // expected-note {{at the end of scope or full-expression}}
+  co_return co_await task; // expected-remark {{pset => { task, invalid }}}
+                           // expected-warning at -1 {{use of coroutine 'task' with dangling reference}}
+}
+
+folly::coro::Task<int> go2_lambda() {
+  auto task = []() -> folly::coro::Task<int> { // expected-note {{coroutine bound to lambda with expired lifetime}}
+    co_return 3;
+  }(); // expected-note {{at the end of scope or full-expression}}
+  co_return co_await task; // expected-remark {{pset => { task, invalid }}}
+                           // expected-warning at -1 {{use of coroutine 'task' with dangling reference}}
+}
+
+folly::coro::Task<int> go3_lambda() {
+  auto* fn = +[](int const& i) -> folly::coro::Task<int> { co_return i; };
+  auto task = fn(3); // expected-note {{coroutine bound to resource with expired lifetime}}
+                     // expected-note at -1 {{at the end of scope or full-expression}}
+  co_return co_await task; // expected-remark {{pset => { task, invalid }}}
+                           // expected-warning at -1 {{use of coroutine 'task' with dangling reference}}
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check-lambda.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check-lambda.cpp
new file mode 100644
index 0000000000000..617e18edf4995
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check-lambda.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -I%S/Inputs -Wno-return-stack-address -fclangir -fclangir-lifetime-check="history=all;history_limit=1" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+// Check also implements:
+// EXP61-CPP. A lambda object must not outlive any of its reference captured objects
+
+// This can be diagnosed by clang with -Wreturn-stack-address
+auto g() {
+  int i = 12; // expected-note {{declared here but invalid after enclosing function ends}}
+  return [&] { // expected-warning {{returned lambda captures local variable}}
+    i += 100;
+    return i;
+  };
+}
+
+// This cannot be diagnosed by -Wreturn-stack-address
+auto g2() {
+  int i = 12; // expected-note {{declared here but invalid after enclosing function ends}}
+  auto lam = [&] {
+    i += 100;
+    return i;
+  };
+  return lam; // expected-warning {{returned lambda captures local variable}}
+}
+
+auto g3(int val) {
+  auto outer = [val] {
+    int i = val; // expected-note {{declared here but invalid after enclosing lambda ends}}
+    auto inner = [&] {
+      i += 30;
+      return i;
+    };
+    return inner; // expected-warning {{returned lambda captures local variable}}
+  };
+  return outer();
+}
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check-owner.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check-owner.cpp
new file mode 100644
index 0000000000000..089bc48860361
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check-owner.cpp
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=all;remarks=all;history_limit=1" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+struct [[gsl::Owner(int)]] MyIntOwner { // expected-remark {{pset => { fn_arg:0 }}}
+  int val;
+  MyIntOwner(int v) : val(v) {}
+  void changeInt(int i);
+  int &operator*();
+  int read() const;
+};
+
+struct [[gsl::Pointer(int)]] MyIntPointer { // expected-remark {{pset => { fn_arg:0 }}}
+  int *ptr;
+  MyIntPointer(int *p = nullptr) : ptr(p) {}
+  MyIntPointer(const MyIntOwner &);
+  int &operator*();
+  MyIntOwner toOwner();
+  int read() { return *ptr; }
+};
+
+void yolo() {
+  MyIntPointer p;
+  {
+    MyIntOwner o(1);
+    p = o;
+    *p = 3; // expected-remark {{pset => { o__1' }}}
+  }       // expected-note {{pointee 'o' invalidated at end of scope}}
+  *p = 4; // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+}
+
+void yolo2() {
+  MyIntPointer p;
+  MyIntOwner o(1);
+  p = o;
+  (void)o.read();
+  (void)p.read(); // expected-remark {{pset => { o__1' }}}
+  o.changeInt(42); // expected-note {{invalidated by non-const use of owner type}}
+  (void)p.read(); // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+  p = o;
+  (void)p.read(); // expected-remark {{pset => { o__2' }}}
+  o.changeInt(33); // expected-note {{invalidated by non-const use of owner type}}
+  (void)p.read(); // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+}
+
+void yolo3() {
+  MyIntPointer p, q;
+  MyIntOwner o(1);
+  p = o;
+  q = o;
+  (void)q.read(); // expected-remark {{pset => { o__1' }}}
+  (void)p.read(); // expected-remark {{pset => { o__1' }}}
+  o.changeInt(42); // expected-note {{invalidated by non-const use of owner type}}
+  (void)p.read(); // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+  (void)q.read(); // expected-warning {{use of invalid pointer 'q'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+}
+
+void yolo4() {
+  MyIntOwner o0(1);
+  MyIntOwner o1(2);
+  MyIntPointer p{o0}, q{o1};
+  p.read(); // expected-remark {{pset => { o0__1' }}}
+  q.read(); // expected-remark {{pset => { o1__1' }}}
+  o0 = o1; // expected-note {{invalidated by non-const use of owner type}}
+  p.read(); // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+  q.read(); // expected-remark {{pset => { o1__1' }}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check-range-for-vector.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check-range-for-vector.cpp
new file mode 100644
index 0000000000000..fc51c5ce18322
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check-range-for-vector.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -mconstructor-aliases -fclangir -fclangir-lifetime-check="history=all" -fclangir-skip-system-headers -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+// XFAIL: *
+
+#include "std-cxx.h"
+
+// expected-no-diagnostics
+
+typedef enum SType {
+  INFO_ENUM_0 = 9,
+  INFO_ENUM_1 = 2020,
+} SType;
+
+typedef struct InfoRaw {
+    SType type;
+    const void* __attribute__((__may_alias__)) next;
+    unsigned u;
+} InfoRaw;
+
+void swappy(unsigned c) {
+  std::vector<InfoRaw> images(c);
+  for (auto& image : images) {
+    image = {INFO_ENUM_1};
+  }
+
+  std::vector<InfoRaw> images2(c);
+  for (unsigned i = 0; i < c; i++) {
+    images2[i] = {INFO_ENUM_1};
+  }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check-remarks.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check-remarks.cpp
new file mode 100644
index 0000000000000..83cef25c54dae
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check-remarks.cpp
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="remarks=pset-invalid" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+int *p0() {
+  int *p = nullptr;
+  {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+  return p;
+}
+
+int *p1(bool b = true) {
+  int *p = nullptr;
+  if (b) {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark at -1 {{pset => { invalid, nullptr }}}
+  return p;
+}
+
+void p2(int b) {
+  int *p = nullptr;
+  switch (int x = 0; b) {
+  case 1:
+    p = &x;
+  case 2:
+    *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+    // expected-remark at -1 {{pset => { nullptr }}}
+    break;
+  }
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  // expected-remark at -1 {{pset => { nullptr, invalid }}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check-smart-pointer-after-move.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check-smart-pointer-after-move.cpp
new file mode 100644
index 0000000000000..417eb0753835d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check-smart-pointer-after-move.cpp
@@ -0,0 +1,239 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fclangir-lifetime-check="history=invalid,null" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+namespace std {
+template <typename T>
+T&& move(T& t) {
+  return static_cast<T&&>(t);
+}
+
+// Minimal unique_ptr implementation for testing
+template <typename T>
+class [[gsl::Owner(T)]] unique_ptr {
+  T* ptr;
+
+public:
+  unique_ptr() : ptr(nullptr) {}
+  explicit unique_ptr(T* p) : ptr(p) {}
+
+  // Move constructor
+  unique_ptr(unique_ptr&& other) : ptr(other.ptr) {
+    other.ptr = nullptr;
+  }
+
+  // Move assignment
+  unique_ptr& operator=(unique_ptr&& other) {
+    if (this != &other) {
+      delete ptr;
+      ptr = other.ptr;
+      other.ptr = nullptr;
+    }
+    return *this;
+  }
+
+  // Deleted copy operations
+  unique_ptr(const unique_ptr&) = delete;
+  unique_ptr& operator=(const unique_ptr&) = delete;
+
+  ~unique_ptr() { delete ptr; }
+
+  // Safe operations (allowed after move)
+  T* get() const { return ptr; }
+  T* release() { T* p = ptr; ptr = nullptr; return p; }
+  void reset(T* p = nullptr) { delete ptr; ptr = p; }
+  explicit operator bool() const { return ptr != nullptr; }
+
+  // Unsafe operations (should warn if used after move)
+  T& operator*() const { return *ptr; }
+  T* operator->() const { return ptr; }
+};
+
+// Minimal shared_ptr implementation for testing
+template <typename T>
+class [[gsl::Owner(T)]] shared_ptr {
+  T* ptr;
+
+public:
+  shared_ptr() : ptr(nullptr) {}
+  explicit shared_ptr(T* p) : ptr(p) {}
+
+  // Move constructor
+  shared_ptr(shared_ptr&& other) : ptr(other.ptr) {
+    other.ptr = nullptr;
+  }
+
+  // Move assignment
+  shared_ptr& operator=(shared_ptr&& other) {
+    if (this != &other) {
+      delete ptr;
+      ptr = other.ptr;
+      other.ptr = nullptr;
+    }
+    return *this;
+  }
+
+  ~shared_ptr() { delete ptr; }
+
+  // Safe operations (allowed after move)
+  T* get() const { return ptr; }
+  void reset(T* p = nullptr) { delete ptr; ptr = p; }
+  explicit operator bool() const { return ptr != nullptr; }
+
+  // Unsafe operations (should warn if used after move)
+  T& operator*() const { return *ptr; }
+  T* operator->() const { return ptr; }
+};
+
+} // namespace std
+
+struct Data {
+  int value;
+  void process();
+};
+
+// Test 1: Safe operations after move (unique_ptr)
+void test_unique_ptr_safe_operations() {
+  std::unique_ptr<int> p(new int(42));
+  std::unique_ptr<int> q = std::move(p);
+
+  // Safe operations - should NOT warn
+  int* raw = p.get();        // OK - get() is safe
+  if (p) {                    // OK - bool conversion is safe
+    // Not reached
+  }
+  p.reset();                  // OK - reset() is safe
+}
+
+// Test 2: Unsafe operations after move (unique_ptr)
+void test_unique_ptr_unsafe_operations() {
+  std::unique_ptr<int> p(new int(42));
+  std::unique_ptr<int> q = std::move(p); // expected-note {{moved here via std::move or rvalue reference}}
+
+  // Unsafe operations - should warn
+  int x = *p; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+// Test 3: Unsafe arrow operator after move (unique_ptr)
+void test_unique_ptr_arrow_after_move() {
+  std::unique_ptr<Data> p(new Data());
+  std::unique_ptr<Data> q = std::move(p); // expected-note {{moved here via std::move or rvalue reference}}
+
+  // Unsafe operation - should warn
+  p->process(); // expected-warning {{use of invalid pointer 'p'}}
+}
+
+// Test 4: Reinit after move (unique_ptr)
+// TODO: Implement reset() as reinitializing operation
+/*
+void test_unique_ptr_reinit() {
+  std::unique_ptr<int> p(new int(42));
+  std::unique_ptr<int> q = std::move(p);
+
+  p.reset(new int(10)); // Reinitialize
+  int x = *p; // OK - reinitialized
+}
+*/
+
+// Test 5: Safe operations after move (shared_ptr)
+void test_shared_ptr_safe_operations() {
+  std::shared_ptr<int> p(new int(42));
+  std::shared_ptr<int> q = std::move(p);
+
+  // Safe operations - should NOT warn
+  int* raw = p.get();         // OK - get() is safe
+  if (p) {                     // OK - bool conversion is safe
+    // Not reached
+  }
+  p.reset();                   // OK - reset() is safe
+}
+
+// Test 6: Unsafe operations after move (shared_ptr)
+void test_shared_ptr_unsafe_operations() {
+  std::shared_ptr<int> p(new int(42));
+  std::shared_ptr<int> q = std::move(p); // expected-note {{moved here via std::move or rvalue reference}}
+
+  // Unsafe operations - should warn
+  int x = *p; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+// Test 7: Unsafe arrow operator after move (shared_ptr)
+void test_shared_ptr_arrow_after_move() {
+  std::shared_ptr<Data> p(new Data());
+  std::shared_ptr<Data> q = std::move(p); // expected-note {{moved here via std::move or rvalue reference}}
+
+  // Unsafe operation - should warn
+  p->process(); // expected-warning {{use of invalid pointer 'p'}}
+}
+
+// Test 8: Move via function parameter (unique_ptr)
+void consume_unique_ptr(std::unique_ptr<int>&& ptr) {}
+void consume_two_unique_ptrs(std::unique_ptr<int>&& ptr1, std::unique_ptr<int>&& ptr2) {}
+
+void test_unique_ptr_move_via_param() {
+  std::unique_ptr<int> p(new int(42));
+  consume_unique_ptr(std::move(p));
+
+  // Safe after move
+  if (p) {  // OK - bool conversion
+    // Not reached
+  }
+}
+
+// Test 9: Move via function parameter with unsafe use (unique_ptr)
+void test_unique_ptr_move_param_unsafe() {
+  std::unique_ptr<int> p(new int(42));
+  consume_unique_ptr(std::move(p)); // expected-note {{moved here via std::move or rvalue reference}}
+
+  // Unsafe after move
+  int x = *p; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+// Test 10: Multiple safe operations after move
+void test_multiple_safe_ops() {
+  std::unique_ptr<int> p(new int(42));
+  std::unique_ptr<int> q = std::move(p);
+
+  // Multiple safe operations in sequence
+  int* r1 = p.get();  // OK
+  int* r2 = p.get();  // OK
+  if (p) {}           // OK
+  if (!p) {}          // OK
+  p.reset();          // OK
+}
+
+// Test 11: Safe then unsafe
+void test_safe_then_unsafe() {
+  std::unique_ptr<int> p(new int(42));
+  std::unique_ptr<int> q = std::move(p); // expected-note {{moved here via std::move or rvalue reference}}
+
+  int* raw = p.get();  // OK - safe operation
+  int x = *p;           // expected-warning {{use of invalid pointer 'p'}}
+}
+
+// Test 12: Move in conditional
+void test_move_in_conditional(bool cond) {
+  std::unique_ptr<int> p(new int(42));
+  if (cond) {
+    std::unique_ptr<int> q = std::move(p); // expected-note {{moved here via std::move or rvalue reference}}
+  }
+  int* raw = p.get();  // OK - get() is safe even after conditional move
+  int x = *p;           // expected-warning {{use of invalid pointer 'p'}}
+}
+
+// Test 13: Release after move
+void test_release_after_move() {
+  std::unique_ptr<int> p(new int(42));
+  std::unique_ptr<int> q = std::move(p);
+
+  int* raw = p.release(); // OK - release() is safe
+}
+
+// Test 14: Multiple owner arguments with rvalue references
+// Regression test for emittedDiagnostics guard bug
+void test_multi_arg_owner_move() {
+  std::unique_ptr<int> x(new int(1));
+  std::unique_ptr<int> y(new int(2));
+  consume_unique_ptr(std::move(x)); // expected-note {{moved here via std::move or rvalue reference}}
+  consume_two_unique_ptrs(std::move(x), std::move(y)); // expected-warning {{use of invalid pointer 'x'}}
+                                                        // expected-note at -1 {{moved here via std::move or rvalue reference}}
+  int use_y = *y; // expected-warning {{use of invalid pointer 'y'}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check-string.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check-string.cpp
new file mode 100644
index 0000000000000..4e5f780aaeee2
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check-string.cpp
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -fclangir-lifetime-check="history=all;remarks=all" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+int strlen(char const *);
+
+struct [[gsl::Owner(char *)]] String { // expected-remark {{pset => { fn_arg:0 }}}
+  long size;
+  long capacity;
+  const char *storage;
+  char operator[](int);
+  String() : size{0}, capacity{0} {}
+  String(char const *s) : size{strlen(s)}, capacity{size}, storage{s} {}
+};
+
+struct [[gsl::Pointer(int)]] StringView { // expected-remark {{pset => { fn_arg:0 }}}
+  long size;
+  const char *storage;
+  char operator[](int);
+  StringView(const String &s) : size{s.size}, storage{s.storage} {}
+  StringView() : size{0}, storage{nullptr} {}
+  int getSize() const;
+};
+
+void sv0() {
+  StringView sv;
+  String name = "abcdefghijklmnop";
+  sv = name;
+  (void)sv.getSize(); // expected-remark {{pset => { name__1' }}}
+  name = "frobozz"; // expected-note {{invalidated by non-const use of owner type}}
+  (void)sv.getSize(); // expected-warning {{use of invalid pointer 'sv'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+  sv = name;
+  (void)sv.getSize(); // expected-remark {{pset => { name__2' }}}
+}
+
+void sv1() {
+  StringView sv, sv_other;
+  String name = "abcdefghijklmnop";
+  sv = name;
+  sv_other = sv;
+  (void)sv.getSize();  // expected-remark {{pset => { name__1' }}}
+  (void)sv_other.getSize();  // expected-remark {{pset => { name__1' }}}
+  name = "frobozz"; // expected-note {{invalidated by non-const use of owner type}}
+  (void)sv.getSize(); // expected-warning {{use of invalid pointer 'sv'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+  (void)sv_other.getSize(); // expected-warning {{use of invalid pointer 'sv_other'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+  sv = name;
+  (void)sv.getSize(); // expected-remark {{pset => { name__2' }}}
+}
+
+void sv2() {
+  StringView sv;
+  String name = "abcdefghijklmnop";
+  sv = name;
+  char read0 = sv[0]; // expected-remark {{pset => { name__1' }}}
+  name = "frobozz"; // expected-note {{invalidated by non-const use of owner type}}
+  char read1 = sv[0]; // expected-warning {{use of invalid pointer 'sv'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+  sv = name;
+  char read2 = sv[0]; // expected-remark {{pset => { name__2' }}}
+  char read3 = name[1]; // expected-note {{invalidated by non-const use of owner type}}
+  char read4 = sv[1]; // expected-warning {{use of invalid pointer 'sv'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+}
+
+class Stream {
+ public:
+  Stream& operator<<(char);
+  Stream& operator<<(const StringView &);
+  // FIXME: conservative for now, but do not invalidate const Owners?
+  Stream& operator<<(const String &);
+};
+
+void sv3() {
+  Stream cout;
+  StringView sv;
+  String name = "abcdefghijklmnop";
+  sv = name;
+  cout << sv; // expected-remark {{pset => { name__1' }}}
+  name = "frobozz"; // expected-note {{invalidated by non-const use of owner type}}
+  cout << sv[2]; // expected-warning {{use of invalid pointer 'sv'}}
+  sv = name; // expected-remark at -1 {{pset => { invalid }}}
+  cout << sv; // expected-remark {{pset => { name__2' }}}
+  cout << name; // expected-note {{invalidated by non-const use of owner type}}
+  cout << sv; // expected-warning {{passing invalid pointer 'sv'}}
+  // expected-remark at -1 {{pset => { invalid }}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check-use-after-move.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check-use-after-move.cpp
new file mode 100644
index 0000000000000..f9401e3d2fc23
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check-use-after-move.cpp
@@ -0,0 +1,157 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -fclangir-lifetime-check="history=invalid,null" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+namespace std {
+template <typename T>
+T&& move(T& t) {
+  return static_cast<T&&>(t);
+}
+}
+
+void consume_int(int&&);
+void consume_double(double&&);
+void consume_float(float&&);
+
+// Test 1: Basic int move
+void test_int_basic() {
+  int a = 10;
+  consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  int b = a; // expected-warning {{use of moved-from value 'a'}}
+}
+
+// Test 2: Multiple types
+void test_double() {
+  double d = 3.14;
+  consume_double(std::move(d)); // expected-note {{moved here via std::move or rvalue reference}}
+  double e = d; // expected-warning {{use of moved-from value 'd'}}
+}
+
+void test_float() {
+  float f = 1.5f;
+  consume_float(std::move(f)); // expected-note {{moved here via std::move or rvalue reference}}
+  float g = f; // expected-warning {{use of moved-from value 'f'}}
+}
+
+// Test 4: Negative cases - NOT moves
+void take_lvalue(int&);
+void take_value(int);
+
+void test_lvalue_ref() {
+  int a = 10;
+  take_lvalue(a); // Not a move
+  int b = a; // OK
+}
+
+void test_by_value() {
+  int a = 10;
+  take_value(a); // Not a move (copies value)
+  int b = a; // OK
+}
+
+// Test 5: Use in expressions
+void test_use_in_expr() {
+  int a = 10;
+  consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  int b = a + 5; // expected-warning {{use of moved-from value 'a'}}
+}
+
+int test_use_in_return() {
+  int a = 10;
+  consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  return a; // expected-warning {{use of moved-from value 'a'}}
+}
+
+// Test 6: Multiple uses after move
+void test_multiple_uses() {
+  int a = 10;
+  consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  int b = a; // expected-warning {{use of moved-from value 'a'}}
+  int c = a; // expected-warning {{use of moved-from value 'a'}}
+}
+
+// Test 7: Move in conditional
+void test_move_in_if(bool cond) {
+  int a = 10;
+  if (cond) {
+    consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  }
+  int b = a; // expected-warning {{use of moved-from value 'a'}}
+}
+
+// Test 8: Move with different primitive types
+void consume_char(char&&);
+void consume_bool(bool&&);
+
+void test_char() {
+  char c = 'x';
+  consume_char(std::move(c)); // expected-note {{moved here via std::move or rvalue reference}}
+  char d = c; // expected-warning {{use of moved-from value 'c'}}
+}
+
+void test_bool() {
+  bool b = true;
+  consume_bool(std::move(b)); // expected-note {{moved here via std::move or rvalue reference}}
+  bool c = b; // expected-warning {{use of moved-from value 'b'}}
+}
+
+// Test 8: Conditional move
+void test_conditional_move(bool cond) {
+  int a = 10;
+  if (cond) {
+    consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  }
+  int b = a; // expected-warning {{use of moved-from value 'a'}}
+}
+
+// Test 10: Move-after-move
+void test_move_after_move() {
+  int a = 10;
+  consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  consume_int(std::move(a)); // expected-warning {{use of moved-from value 'a'}}
+}
+
+// Test 16: Function parameter move
+void test_parameter_move(int a) {
+  consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  int b = a; // expected-warning {{use of moved-from value 'a'}}
+}
+
+// Test 17: Loop with conditional move
+void test_loop_with_move() {
+  int a = 10;
+  for (int i = 0; i < 3; i++) {
+    if (i == 1) {
+      consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+                                  // expected-warning at -1 {{use of moved-from value 'a'}}
+    }
+    if (i == 2) {
+      int b = a; // expected-warning {{use of moved-from value 'a'}}
+    }
+  }
+}
+
+// Test 15: Switch with fallthrough
+void test_switch_fallthrough(int cond) {
+  int a = 10;
+  switch (cond) {
+  case 1:
+    consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  case 2: // fallthrough
+    int b = a; // expected-warning {{use of moved-from value 'a'}}
+    break;
+  }
+}
+
+// Test 21: Move in declaration
+void test_move_in_declaration() {
+  int a = 10;
+  int b(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  int c = a; // expected-warning {{use of moved-from value 'a'}}
+}
+
+// Test 22: Warn at every use location (consistent with invalid pointer behavior)
+void test_warn_at_every_use() {
+  int a = 10;
+  consume_int(std::move(a)); // expected-note {{moved here via std::move or rvalue reference}}
+  int b = a; // expected-warning {{use of moved-from value 'a'}}
+  int c = a; // expected-warning {{use of moved-from value 'a'}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-check.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-check.cpp
new file mode 100644
index 0000000000000..017de9f6495d3
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-check.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=invalid,null" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+int *p0() {
+  int *p = nullptr;
+  {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  return p;
+}
+
+int *p1(bool b = true) {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  if (b) {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+  return p;
+}
+
+void p2() {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  *p = 42;          // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void p3() {
+  int *p;
+  p = nullptr; // expected-note {{invalidated here}}
+  *p = 42;     // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void p4() {
+  int *p;  // expected-note {{uninitialized here}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void p5() {
+  int *p = nullptr;
+  {
+    int a[10];
+    p = &a[0];
+  }        // expected-note {{pointee 'a' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-fn-args.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-fn-args.cpp
new file mode 100644
index 0000000000000..eea6ae863d3c8
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-fn-args.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -fclangir-lifetime-check="history=all;remarks=all" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+struct A {
+  void* ctx;
+  void setInfo(void** ctxPtr);
+};
+
+void A::setInfo(void** ctxPtr) {
+  if (ctxPtr != nullptr) {
+    *ctxPtr = ctx; // expected-remark {{pset => { fn_arg:1 }}}
+  }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-invalid-option.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-invalid-option.cpp
new file mode 100644
index 0000000000000..70f419b7c2743
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-invalid-option.cpp
@@ -0,0 +1,7 @@
+// RUN: not %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="yolo=invalid,null" -emit-cir %s -o - 2>&1 | FileCheck %s
+// RUN: not %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-idiom-recognizer="idiom=invalid" -emit-cir %s -o - 2>&1 | FileCheck %s --check-prefix=IDIOM
+// RUN: not %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-idiom-recognizer -fclangir-lib-opt="libopt=invalid" -emit-cir %s -o - 2>&1 | FileCheck %s --check-prefix=LIBOPT
+
+// CHECK: clangir pass option 'yolo=invalid,null' not recognized
+// IDIOM: clangir pass option 'idiom=invalid' not recognized
+// LIBOPT: clangir pass option 'libopt=invalid' not recognized
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-loop-valid.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-loop-valid.cpp
new file mode 100644
index 0000000000000..e7ee7aca7cf38
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-loop-valid.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=invalid,null;remarks=pset-always" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+// Loops that do not change psets
+
+// p1179r1: 2.4.9.1
+// No diagnostic needed, pset(p) = {a} before and after the loop
+void valid0(bool b, int j) {
+  int a[10];
+  int *p = &a[0];
+  while (j) {
+    if (b) {
+      p = &a[j];
+    }
+    j = j - 1;
+  }
+  *p = 12; // expected-remark {{pset => { a }}}
+}
+
+// p1179r1: 2.4.9.2
+void valid1(bool b, int j) {
+  int a[4], c[5];
+  int *p = &a[0];
+  while (j) {
+    if (b) {
+      p = &c[j];
+    }
+    j = j - 1;
+  }
+  *p = 0; // expected-remark {{pset => { a, c }}}
+
+  while (j) {
+    if (b) {
+      p = &c[j];
+    }
+    j = j - 1;
+  }
+  *p = 0; // expected-remark {{pset => { a, c }}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-loop.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-loop.cpp
new file mode 100644
index 0000000000000..cf58ddf48f73b
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-loop.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=invalid,null;remarks=pset-invalid" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+void loop_basic_for() {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  for (int i = 0; i < 10; i = i + 1) {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+           // expected-remark at -1 {{pset => { nullptr, invalid }}}
+}
+
+void loop_basic_while() {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  int i = 0;
+  while (i < 10) {
+    int x = 0;
+    p = &x;
+    *p = 42;
+    i = i + 1;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+           // expected-remark at -1 {{pset => { nullptr, invalid }}}
+}
+
+void loop_basic_dowhile() {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  int i = 0;
+  do {
+    int x = 0;
+    p = &x;
+    *p = 42;
+    i = i + 1;
+  } while (i < 10); // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42;          // expected-warning {{use of invalid pointer 'p'}}
+                    // expected-remark at -1 {{pset => { nullptr, invalid }}}
+}
+
+// p1179r1: 2.4.9.3
+void loop0(bool b, int j) {
+  int a[4], c[4];
+  int *p = &a[0];
+  while (j) {
+    // This access is invalidated after the first iteration
+    *p = 42;     // expected-warning {{use of invalid pointer 'p'}}
+                 // expected-remark at -1 {{pset => { c, nullptr }}}
+    p = nullptr; // expected-note {{invalidated here}}
+    if (b) {
+      p = &c[j];
+    }
+    j = j - 1;
+  }
+  *p = 0; // expected-warning {{use of invalid pointer 'p'}}
+          // expected-remark at -1 {{pset => { a, c, nullptr }}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-null-passing.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-null-passing.cpp
new file mode 100644
index 0000000000000..05c97f8df3b0c
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-null-passing.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -fclangir-lifetime-check="history=all" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+class _j {};
+typedef _j* jobj;
+
+typedef enum SType {
+  INFO_ENUM_0 = 9,
+  INFO_ENUM_1 = 2020,
+} SType;
+
+typedef SType ( *FnPtr2)(unsigned session, jobj* surface);
+
+struct X {
+  struct entries {
+    FnPtr2 wildfn = nullptr;
+  };
+  static entries e;
+};
+
+void nullpassing() {
+  jobj o = nullptr;
+  X::e.wildfn(0, &o);
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-switch.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-switch.cpp
new file mode 100644
index 0000000000000..ca56b95f71a0a
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-switch.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-lifetime-check="history=invalid,null" -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+void s0(int b) {
+  int *p = nullptr;
+  switch (b) {
+  default: {
+    int x = 0;
+    p = &x;
+    *p = 42;
+  } // expected-note {{pointee 'x' invalidated at end of scope}}
+  }
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void s1(int b) {
+  int *p = nullptr;
+  switch (b) {
+  default:
+    int x = 0;
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void s2(int b) {
+  int *p = nullptr;
+  switch (int x = 0; b) {
+  default:
+    p = &x;
+    *p = 42;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
+
+void s3(int b) {
+  int *p = nullptr; // expected-note {{invalidated here}}
+  switch (int x = 0; b) {
+  case 1:
+    p = &x;
+  case 2:
+    *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+    break;
+  }        // expected-note {{pointee 'x' invalidated at end of scope}}
+  *p = 42; // expected-warning {{use of invalid pointer 'p'}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/lifetime-this.cpp b/clang/test/CIR/Incubator/Transforms/lifetime-this.cpp
new file mode 100644
index 0000000000000..78eb7ef3e4ebf
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/lifetime-this.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -I%S/../Inputs -mconstructor-aliases -fclangir -fclangir-lifetime-check="history=all;remarks=all" -fclangir-skip-system-headers -clangir-verify-diagnostics -emit-cir %s -o %t.cir
+
+#include "std-cxx.h"
+
+struct S {
+  S(int, int, const S* s);
+  void f(int a, int b);
+};
+
+void S::f(int a, int b) {
+  std::shared_ptr<S> l = std::make_shared<S>(a, b, this); // expected-remark {{pset => { this }}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/live-object-analysis.cir b/clang/test/CIR/Incubator/Transforms/live-object-analysis.cir
new file mode 100644
index 0000000000000..327077ec99e2a
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/live-object-analysis.cir
@@ -0,0 +1,158 @@
+// RUN: cir-opt --pass-pipeline='builtin.module(cir.func(cir-live-object-diagnostics))' --verify-diagnostics %s -o /dev/null
+
+!s32i = !cir.int<s, 32>
+#fn_attr = #cir<extra({nothrow = #cir.nothrow, uwtable = #cir.uwtable<async>})>
+module {
+  cir.func dso_local @test_alloca() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr"] {alignment = 8 : i64}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of ptr}}
+    cir.store align(4) %2, %3 : !s32i, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of local}}
+    cir.return
+  }
+  cir.func dso_local @test_arg(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["arg", init] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of ptr}}
+    cir.store align(4) %2, %3 : !s32i, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of arg}}
+    cir.return
+  }
+  cir.func dso_local @test_unknown(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.const #cir.int<1> : !s32i
+    cir.store align(4) %0, %arg0 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+  cir.func dso_local @test_if_else() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr"] {alignment = 8 : i64}
+    cir.scope {
+      %5 = cir.load align(4) %0 : !cir.ptr<!s32i>, !s32i
+      %6 = cir.load align(4) %1 : !cir.ptr<!s32i>, !s32i
+      %7 = cir.cmp(lt, %5, %6) : !s32i, !cir.bool
+      cir.if %7 {
+        cir.store align(8) %0, %2 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+      } else {
+        cir.store align(8) %1, %2 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+      }
+    }
+    %3 = cir.const #cir.int<1> : !s32i
+    %4 = cir.load deref align(8) %2 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of ptr}}
+    cir.store align(4) %3, %4 : !s32i, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of a}}
+    // expected-remark at above {{last use of b}}
+    cir.return
+  }
+  cir.func dso_local @test_loop() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr", init] {alignment = 8 : i64}
+    cir.store align(8) %0, %2 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    cir.scope {
+      cir.while {
+        %3 = cir.load deref align(8) %2 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+        %4 = cir.load align(4) %3 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.const #cir.int<0> : !s32i
+        %6 = cir.cmp(gt, %4, %5) : !s32i, !cir.bool
+        cir.condition(%6)
+      } do {
+        cir.scope {
+          cir.store align(8) %1, %2 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+        }
+        cir.yield
+      }
+    }
+    cir.return
+  }
+  cir.func dso_local @test_ternary(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["arg", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load align(4) %0 : !cir.ptr<!s32i>, !s32i
+    %3 = cir.cast int_to_bool %2 : !s32i -> !cir.bool
+    %4 = cir.ternary(%3, true {
+      cir.yield %0 : !cir.ptr<!s32i>
+    }, false {
+      cir.yield %1 : !cir.ptr<!s32i>
+    }) : (!cir.bool) -> !cir.ptr<!s32i>
+    %5 = cir.const #cir.int<1> : !s32i
+    cir.store align(4) %5, %4 : !s32i, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of arg}}
+    // expected-remark at above {{last use of local}}
+    cir.return
+  }
+  cir.func no_proto dso_local @test_goto() -> !s32i extra(#fn_attr) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["local_ptr", init] {alignment = 8 : i64}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    cir.goto "SKIP"
+  ^bb1(%out: !s32i):  // 2 preds: ^bb2, ^bb3
+    cir.return %out : !s32i
+  ^bb2:  // no predecessors
+    %2 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of local_ptr}}
+    %3 = cir.load align(4) %2 : !cir.ptr<!s32i>, !s32i
+    // expected-remark at above {{last use of a}}
+    cir.br ^bb1(%3: !s32i)
+  ^bb3:  // no predecessors
+    cir.label "SKIP"
+    %4 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of local_ptr}}
+    %5 = cir.load align(4) %4 : !cir.ptr<!s32i>, !s32i
+    // expected-remark at above {{last use of a}}
+    cir.br ^bb1(%5: !s32i)
+  }
+  cir.func private @all_effects(!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i>
+  cir.func dso_local @test_call(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr", init] {alignment = 8 : i64}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    %2 = cir.call @all_effects(%1, %arg0) : (!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i>
+    %3 = cir.const #cir.int<1> : !s32i
+    %4 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    cir.store align(4) %3, %4 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.const #cir.int<2> : !s32i
+    cir.store align(4) %5, %2 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+  cir.func private @read_effects(!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i>
+  cir.func dso_local @test_call_pure(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr", init] {alignment = 8 : i64}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    %2 = cir.call @read_effects(%1, %arg0) : (!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i> side_effect(pure)
+    %3 = cir.const #cir.int<1> : !s32i
+    %4 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    cir.store align(4) %3, %4 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.const #cir.int<2> : !s32i
+    cir.store align(4) %5, %2 : !s32i, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of ptr}}
+    // expected-remark at above {{last use of local}}
+    cir.return
+  }
+  cir.func private @no_effects(!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i>
+  cir.func dso_local @test_call_const(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr", init] {alignment = 8 : i64}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+    %2 = cir.call @no_effects(%1, %arg0) : (!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i> side_effect(const)
+    %3 = cir.const #cir.int<1> : !s32i
+    %4 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of ptr}}
+    cir.store align(4) %3, %4 : !s32i, !cir.ptr<!s32i>
+    // expected-remark at above {{last use of local}}
+    %5 = cir.const #cir.int<2> : !s32i
+    cir.store align(4) %5, %2 : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/live-object-analysis.cpp b/clang/test/CIR/Incubator/Transforms/live-object-analysis.cpp
new file mode 100644
index 0000000000000..19742fc225b19
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/live-object-analysis.cpp
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | cir-opt  --pass-pipeline='builtin.module(cir.func(cir-live-object-diagnostics))' --verify-diagnostics -o /dev/null
+
+struct Node {
+  Node() = default;
+  Node(Node &&) = default;
+  // expected-remark at above {{last use of this}}
+  Node(const Node &) = default;
+  // expected-remark at above {{last use of this}}
+  Node &operator=(Node &&) = default;
+  // expected-remark at above {{last use of this}}
+  // expected-remark at above {{last use of __retval}}
+  Node &operator=(const Node &) = default;
+  // expected-remark at above {{last use of this}}
+  // expected-remark at above {{last use of __retval}}
+
+  int val;
+};
+
+int test_copy_ctor() {
+  Node orig;
+  Node copy(orig);
+  // expected-remark at above {{last use of orig}}
+
+  return copy.val;
+  // expected-remark at above {{last use of copy}}
+  // expected-remark at above {{last use of __retval}}
+}
+
+int test_move_ctor() {
+  Node orig;
+  Node move((Node &&)orig);
+  // expected-remark at above {{last use of orig}}
+
+  return move.val;
+  // expected-remark at above {{last use of move}}
+  // expected-remark at above {{last use of __retval}}
+}
+
+int test_copy_assign() {
+  Node orig, copy;
+  copy = orig;
+  // expected-remark at above {{last use of orig}}
+
+  return copy.val;
+  // expected-remark at above {{last use of copy}}
+  // expected-remark at above {{last use of __retval}}
+}
+
+int test_move_assign() {
+  Node orig, move;
+  move = (Node &&)orig;
+  // expected-remark at above {{last use of orig}}
+
+  return move.val;
+  // expected-remark at above {{last use of move}}
+  // expected-remark at above {{last use of __retval}}
+}
+
+int test_move_chain() {
+  Node first, second, third, fourth;
+
+  second = first;
+  // expected-remark at above {{last use of first}}
+  third = second;
+  // expected-remark at above {{last use of second}}
+  fourth = third;
+  // expected-remark at above {{last use of third}}
+
+  return fourth.val;
+  // expected-remark at above {{last use of fourth}}
+  // expected-remark at above {{last use of __retval}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/loop.cir b/clang/test/CIR/Incubator/Transforms/loop.cir
new file mode 100644
index 0000000000000..8204216b6f523
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/loop.cir
@@ -0,0 +1,122 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+
+  cir.func @testFor(%arg0 : !cir.bool) {
+    cir.for : cond {
+      cir.condition(%arg0)
+    } body {
+      cir.yield
+    } step {
+      cir.yield
+    }
+    cir.return
+  }
+// CHECK:  cir.func @testFor(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#STEP:]]
+// CHECK:  ^bb[[#STEP]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  // Test while cir.loop operation lowering.
+  cir.func @testWhile(%arg0 : !cir.bool) {
+    cir.while {
+      cir.condition(%arg0)
+    } do {
+      cir.yield
+    }
+    cir.return
+  }
+// CHECK:  cir.func @testWhile(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  // Test do-while cir.loop operation lowering.
+  cir.func @testDoWhile(%arg0 : !cir.bool) {
+    cir.do {
+      cir.yield
+    } while {
+      cir.condition(%arg0)
+    }
+    cir.return
+  }
+// CHECK:  cir.func @testDoWhile(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#BODY:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  // test corner case 
+  // while (1) {
+  //     break;
+  // }
+  cir.func @testWhileWithBreakTerminatedBody(%arg0 : !cir.bool) {
+    cir.while {
+      cir.condition(%arg0)
+    } do {
+      cir.break
+    }
+    cir.return
+  }
+// CHECK:  cir.func @testWhileWithBreakTerminatedBody(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  // test C only corner case - no fails during the lowering
+  // for (;;) {
+  //     break;
+  // }
+  cir.func @forWithBreakTerminatedScopeInBody(%arg0 : !cir.bool) {
+      cir.for : cond {
+        cir.condition(%arg0)
+      } body {
+        cir.scope { // FIXME(cir): Redundant scope emitted during C codegen.
+          cir.break
+        }
+        cir.yield
+      } step {
+        cir.yield
+      }
+    cir.return
+  }
+// CHECK:  cir.func @forWithBreakTerminatedScopeInBody(%arg0: !cir.bool) {
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#COND]]:
+// CHECK:    cir.brcond %arg0 ^bb[[#BODY:]], ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#BODY]]:
+// CHECK:    cir.br ^bb[[#EX_SCOPE_IN:]]
+// CHECK:  ^bb[[#EX_SCOPE_IN]]:
+// CHECK:    cir.br ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#EX_SCOPE_EXIT:]]:
+// CHECK:    cir.br ^bb[[#STEP:]]
+// CHECK:  ^bb[[#STEP]]:
+// CHECK:    cir.br ^bb[[#COND:]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+}  
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/mem2reg.c b/clang/test/CIR/Incubator/Transforms/mem2reg.c
new file mode 100644
index 0000000000000..c145b45f16142
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/mem2reg.c
@@ -0,0 +1,191 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=BEFORE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fclangir-mem2reg %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=MEM2REG
+
+int return_42() {
+  int y = 42;
+  return y;
+}
+
+// BEFORE: cir.func {{.*@return_42}}
+// BEFORE:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// BEFORE:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// BEFORE:   %2 = cir.const #cir.int<42> : !s32i
+// BEFORE:   cir.store{{.*}} %2, %1 : !s32i, !cir.ptr<!s32i>
+// BEFORE:   %3 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// BEFORE:   cir.store{{.*}} %3, %0 : !s32i, !cir.ptr<!s32i>
+// BEFORE:   %4 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:   cir.return %4 : !s32i
+
+// MEM2REG:  cir.func {{.*@return_42()}}
+// MEM2REG:    %0 = cir.const #cir.int<42> : !s32i
+// MEM2REG:    cir.return %0 : !s32i
+
+void alloca_in_loop(int* ar, int n) {
+  for (int i = 0; i < n; ++i) {
+    int a = 4;
+    ar[i] = a;
+  }
+}
+
+// BEFORE:  cir.func {{.*@alloca_in_loop}}
+// BEFORE:    %0 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ar", init] {alignment = 8 : i64}
+// BEFORE:    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["n", init] {alignment = 4 : i64}
+// BEFORE:    cir.store{{.*}} %arg0, %0 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// BEFORE:    cir.store{{.*}} %arg1, %1 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    cir.scope {
+// BEFORE:      %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+// BEFORE:      %3 = cir.const #cir.int<0> : !s32i
+// BEFORE:      cir.store{{.*}} %3, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:      cir.for : cond {
+// BEFORE:        %4 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %5 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %6 = cir.cmp(lt, %4, %5) : !s32i, !cir.bool
+// BEFORE:        cir.condition(%6)
+// BEFORE:      } body {
+// BEFORE:        cir.scope {
+// BEFORE:          %4 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// BEFORE:          %5 = cir.const #cir.int<4> : !s32i
+// BEFORE:          cir.store{{.*}} %5, %4 : !s32i, !cir.ptr<!s32i>
+// BEFORE:          %6 = cir.load{{.*}} %4 : !cir.ptr<!s32i>, !s32i
+// BEFORE:          %7 = cir.load{{.*}} %0 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// BEFORE:          %8 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:          %9 = cir.ptr_stride %7, %8 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// BEFORE:          cir.store{{.*}} %6, %9 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        }
+// BEFORE:        cir.yield
+// BEFORE:      } step {
+// BEFORE:        %4 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %5 = cir.unary(inc, %4) nsw : !s32i, !s32i
+// BEFORE:        cir.store{{.*}} %5, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        cir.yield
+// BEFORE:      }
+// BEFORE:    }
+// BEFORE:    cir.return
+
+// MEM2REG:  cir.func {{.*@alloca_in_loop}}
+// MEM2REG:    cir.br ^bb1
+// MEM2REG:  ^bb1:  // pred: ^bb0
+// MEM2REG:    %0 = cir.const #cir.int<0> : !s32i
+// MEM2REG:    cir.br ^bb2(%0 : !s32i)
+// MEM2REG:  ^bb2(%1: !s32i{{.*}}):  // 2 preds: ^bb1, ^bb6
+// MEM2REG:    %2 = cir.cmp(lt, %1, %arg1) : !s32i, !cir.bool
+// MEM2REG:    cir.brcond %2 ^bb3, ^bb7
+// MEM2REG:  ^bb3:  // pred: ^bb2
+// MEM2REG:    cir.br ^bb4
+// MEM2REG:  ^bb4:  // pred: ^bb3
+// MEM2REG:    %3 = cir.const #cir.int<4> : !s32i
+// MEM2REG:    %4 = cir.ptr_stride %arg0, %1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+// MEM2REG:    cir.store{{.*}} %3, %4 : !s32i, !cir.ptr<!s32i>
+// MEM2REG:    cir.br ^bb5
+// MEM2REG:  ^bb5:  // pred: ^bb4
+// MEM2REG:    cir.br ^bb6
+// MEM2REG:  ^bb6:  // pred: ^bb5
+// MEM2REG:    %5 = cir.unary(inc, %1) nsw : !s32i, !s32i
+// MEM2REG:    cir.br ^bb2(%5 : !s32i)
+// MEM2REG:  ^bb7:  // pred: ^bb2
+// MEM2REG:    cir.br ^bb8
+// MEM2REG:  ^bb8:  // pred: ^bb7
+// MEM2REG:    cir.return
+
+
+int alloca_in_ifelse(int x) {
+  int y = 0;
+  if (x > 42) {
+    int z = 2;
+    y = x * z;
+  } else  {
+    int z = 3;
+    y = x * z;
+  }
+
+  y = y + 1;
+  return y;
+}
+
+// BEFORE:  cir.func {{.*@alloca_in_ifelse}}
+// BEFORE:    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+// BEFORE:    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// BEFORE:    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// BEFORE:    cir.store{{.*}} %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    %3 = cir.const #cir.int<0> : !s32i
+// BEFORE:    cir.store{{.*}} %3, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    cir.scope {
+// BEFORE:      %9 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:      %10 = cir.const #cir.int<42> : !s32i
+// BEFORE:      %11 = cir.cmp(gt, %9, %10) : !s32i, !cir.bool
+// BEFORE:      cir.if %11 {
+// BEFORE:        %12 = cir.alloca !s32i, !cir.ptr<!s32i>, ["z", init] {alignment = 4 : i64}
+// BEFORE:        %13 = cir.const #cir.int<2> : !s32i
+// BEFORE:        cir.store{{.*}} %13, %12 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        %14 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %15 = cir.load{{.*}} %12 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %16 = cir.binop(mul, %14, %15) nsw : !s32i
+// BEFORE:        cir.store{{.*}} %16, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:      } else {
+// BEFORE:        %12 = cir.alloca !s32i, !cir.ptr<!s32i>, ["z", init] {alignment = 4 : i64}
+// BEFORE:        %13 = cir.const #cir.int<3> : !s32i
+// BEFORE:        cir.store{{.*}} %13, %12 : !s32i, !cir.ptr<!s32i>
+// BEFORE:        %14 = cir.load{{.*}} %0 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %15 = cir.load{{.*}} %12 : !cir.ptr<!s32i>, !s32i
+// BEFORE:        %16 = cir.binop(mul, %14, %15) nsw : !s32i
+// BEFORE:        cir.store{{.*}} %16, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:     }
+// BEFORE:    }
+// BEFORE:    %4 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:    %5 = cir.const #cir.int<1> : !s32i
+// BEFORE:    %6 = cir.binop(add, %4, %5) nsw : !s32i
+// BEFORE:    cir.store{{.*}} %6, %2 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    %7 = cir.load{{.*}} %2 : !cir.ptr<!s32i>, !s32i
+// BEFORE:    cir.store{{.*}} %7, %1 : !s32i, !cir.ptr<!s32i>
+// BEFORE:    %8 = cir.load{{.*}} %1 : !cir.ptr<!s32i>, !s32i
+// BEFORE:    cir.return %8 : !s32i
+
+// MEM2REG:  cir.func {{.*@alloca_in_ifelse}}
+// MEM2REG:    %0 = cir.const #cir.int<0> : !s32i
+// MEM2REG:    cir.br ^bb1
+// MEM2REG:  ^bb1:  // pred: ^bb0
+// MEM2REG:    %1 = cir.const #cir.int<42> : !s32i
+// MEM2REG:    %2 = cir.cmp(gt, %arg0, %1) : !s32i, !cir.bool
+// MEM2REG:    cir.brcond %2 ^bb2, ^bb3
+// MEM2REG:  ^bb2:  // pred: ^bb1
+// MEM2REG:    %3 = cir.const #cir.int<2> : !s32i
+// MEM2REG:    %4 = cir.binop(mul, %arg0, %3) nsw : !s32i
+// MEM2REG:    cir.br ^bb4(%4 : !s32i)
+// MEM2REG:  ^bb3:  // pred: ^bb1
+// MEM2REG:    %5 = cir.const #cir.int<3> : !s32i
+// MEM2REG:    %6 = cir.binop(mul, %arg0, %5) nsw : !s32i
+// MEM2REG:    cir.br ^bb4(%6 : !s32i)
+// MEM2REG:  ^bb4(%7: !s32i{{.*}}):  // 2 preds: ^bb2, ^bb3
+// MEM2REG:    cir.br ^bb5
+// MEM2REG:  ^bb5:  // pred: ^bb4
+// MEM2REG:    %8 = cir.const #cir.int<1> : !s32i
+// MEM2REG:    %9 = cir.binop(add, %7, %8) nsw : !s32i
+// MEM2REG:    cir.return %9 : !s32i
+// MEM2REG:  }
+
+
+
+
+typedef __SIZE_TYPE__ size_t;
+void *alloca(size_t size);
+
+void test_bitcast(size_t n) {
+  int *c1 = alloca(n);
+}
+
+// BEFORE:  cir.func {{.*@test_bitcast}}
+// BEFORE:    %0 = cir.alloca !u64i, !cir.ptr<!u64i>, ["n", init] {alignment = 8 : i64}
+// BEFORE:    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["c1", init] {alignment = 8 : i64}
+// BEFORE:    cir.store{{.*}} %arg0, %0 : !u64i, !cir.ptr<!u64i>
+// BEFORE:    %2 = cir.load{{.*}} %0 : !cir.ptr<!u64i>, !u64i
+// BEFORE:    %3 = cir.alloca !u8i, !cir.ptr<!u8i>, %2 : !u64i, ["bi_alloca"] {alignment = 16 : i64}
+// BEFORE:    %4 = cir.cast bitcast %3 : !cir.ptr<!u8i> -> !cir.ptr<!void>
+// BEFORE:    %5 = cir.cast bitcast %4 : !cir.ptr<!void> -> !cir.ptr<!s32i>
+// BEFORE:    cir.store{{.*}} %5, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
+// BEFORE:    cir.return
+
+// MEM2REG:  cir.func {{.*@test_bitcast}}
+// MEM2REG:    cir.return
+// MEM2REG:  }
diff --git a/clang/test/CIR/Incubator/Transforms/mem2reg.cir b/clang/test/CIR/Incubator/Transforms/mem2reg.cir
new file mode 100644
index 0000000000000..dca55d3c30684
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/mem2reg.cir
@@ -0,0 +1,31 @@
+// RUN: cir-opt %s -cir-flatten-cfg -mem2reg -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!u64i = !cir.int<u, 64>
+!u8i = !cir.int<u, 8>
+!void = !cir.void
+
+module {
+
+  //  ====   Simple case
+  // C code
+  // int return_42() {
+  //   int y = 42;
+  //   return y;  
+  // }
+  cir.func @return_42() -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %2 = cir.const #cir.int<42> : !s32i
+    cir.store %2, %1 : !s32i, !cir.ptr<!s32i> 
+    %3 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    cir.return %4 : !s32i
+  }
+  // CHECK:  cir.func @return_42() -> !s32i {
+  // CHECK:    %0 = cir.const #cir.int<42> : !s32i
+  // CHECK:    cir.return %0 : !s32i
+  // CHECK:  }
+
+} //module
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/Transforms/merge-cleanups.cir b/clang/test/CIR/Incubator/Transforms/merge-cleanups.cir
new file mode 100644
index 0000000000000..3e7fee9964e05
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/merge-cleanups.cir
@@ -0,0 +1,150 @@
+// RUN: cir-opt %s -cir-canonicalize -o %t.out.cir
+// RUN: FileCheck --input-file=%t.out.cir %s
+
+#false = #cir.bool<false> : !cir.bool
+#true = #cir.bool<true> : !cir.bool
+!s32i = !cir.int<s, 32>
+module  {
+  cir.func @sw1(%arg0: !s32i, %arg1: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["c", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init] {alignment = 4 : i64}
+      %3 = cir.const #cir.int<1> : !s32i
+      cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+      %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%4 : !s32i) {
+      cir.case (equal, [#cir.int<0> : !s32i])  {
+        %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %6 = cir.const #cir.int<1> : !s32i
+        %7 = cir.binop(add, %5, %6) : !s32i
+        cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+        cir.br ^bb1
+      ^bb1:  // pred: ^bb0
+        cir.return
+      }
+      cir.case (equal, [#cir.int<1> : !s32i])  {
+        cir.scope {
+          cir.scope {
+            %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+            %6 = cir.const #cir.int<3> : !s32i
+            %7 = cir.cmp(eq, %5, %6) : !s32i, !cir.bool
+            cir.if %7 {
+              cir.br ^bb1
+            ^bb1:  // pred: ^bb0
+              cir.return
+            }
+          }
+          cir.break
+        }
+        cir.yield
+      }
+      cir.case (equal, [#cir.int<2> : !s32i])  {
+        cir.scope {
+          %5 = cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init] {alignment = 4 : i64}
+          %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+          %7 = cir.const #cir.int<1> : !s32i
+          %8 = cir.binop(add, %6, %7) : !s32i
+          cir.store %8, %2 : !s32i, !cir.ptr<!s32i>
+          %9 = cir.const #cir.int<100> : !s32i
+          cir.store %9, %5 : !s32i, !cir.ptr<!s32i>
+          cir.br ^bb1
+        ^bb1:  // pred: ^bb0
+          cir.return
+        }
+        cir.yield
+      }
+      cir.yield
+      }
+    }
+    cir.return
+  }
+
+// CHECK: cir.switch (%4 : !s32i) {
+// CHECK-NEXT:   cir.case(equal, [#cir.int<0> : !s32i])  {
+// CHECK-NEXT:     %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:     %6 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:     %7 = cir.binop(add, %5, %6) : !s32i
+// CHECK-NEXT:     cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:     cir.return
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.case(equal, [#cir.int<1> : !s32i])  {
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       cir.scope {
+// CHECK-NEXT:         %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:         %6 = cir.const #cir.int<3> : !s32i
+// CHECK-NEXT:         %7 = cir.cmp(eq, %5, %6) : !s32i, !cir.bool
+// CHECK-NEXT:         cir.if %7 {
+// CHECK-NEXT:           cir.return
+// CHECK-NEXT:         }
+// CHECK-NEXT:       }
+// CHECK-NEXT:       cir.break
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.case(equal, [#cir.int<2> : !s32i])  {
+// CHECK-NEXT:     cir.scope {
+// CHECK-NEXT:       %5 = cir.alloca !s32i, !cir.ptr<!s32i>, ["yolo", init] {alignment = 4 : i64}
+// CHECK-NEXT:       %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+// CHECK-NEXT:       %7 = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT:       %8 = cir.binop(add, %6, %7) : !s32i
+// CHECK-NEXT:       cir.store %8, %2 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       %9 = cir.const #cir.int<100> : !s32i
+// CHECK-NEXT:       cir.store %9, %5 : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT:       cir.return
+// CHECK-NEXT:     }
+// CHECK-NEXT:     cir.yield
+// CHECK-NEXT:   }
+// CHECK-NEXT:   cir.yield
+// CHECK-NEXT: }
+
+  // Should remove empty scopes.
+  cir.func @removeEmptyScope() {
+    cir.scope {
+    }
+    cir.return
+  }
+  //      CHECK: cir.func @removeEmptyScope
+  // CHECK-NEXT: cir.return
+
+  // Should remove empty switch-case statements.
+  cir.func @removeEmptySwitch(%arg0: !s32i) {
+  //      CHECK: cir.func @removeEmptySwitch
+    cir.switch (%arg0 : !s32i) {
+      cir.yield
+    }
+    // CHECK-NOT: cir.switch
+    cir.return
+    // CHECK: cir.return
+  }
+
+  // Should remove redundant bitcasts.
+  // CHECK-LABEL: @ptrbitcastfold
+  //       CHECK:  %[[ARG0:.+]]: !cir.ptr<!s32i>
+  //       CHECK:  cir.return %[[ARG0]] : !cir.ptr<!s32i>
+  cir.func @ptrbitcastfold(%arg0: !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {
+    %0 = cir.cast bitcast %arg0 : !cir.ptr<!s32i> -> !cir.ptr<!s32i>
+    cir.return %0 : !cir.ptr<!s32i>
+  }
+
+  // Should remove redundant address space casts.
+  // CHECK-LABEL: @addrspacecastfold
+  //       CHECK:  %[[ARG0:.+]]: !cir.ptr<!s32i, target_address_space(2)>
+  //       CHECK:  cir.return %[[ARG0]] : !cir.ptr<!s32i, target_address_space(2)>
+  cir.func @addrspacecastfold(%arg0: !cir.ptr<!s32i, target_address_space(2)>) -> !cir.ptr<!s32i, target_address_space(2)> {
+    %0 = cir.cast address_space %arg0 : !cir.ptr<!s32i, target_address_space(2)> -> !cir.ptr<!s32i, target_address_space(2)>
+    cir.return %0 : !cir.ptr<!s32i, target_address_space(2)>
+  }
+
+  // Should remove scope with only yield
+  cir.func @removeBlockWithScopeYeild(%arg0: !s32i) {
+    cir.scope {
+      cir.yield
+    }
+    cir.return
+  }
+  //      CHECK: cir.func @removeBlockWithScopeYeild
+  // CHECK-NEXT: cir.return
+}
diff --git a/clang/test/CIR/Incubator/Transforms/move-opt.cpp b/clang/test/CIR/Incubator/Transforms/move-opt.cpp
new file mode 100644
index 0000000000000..a9d492c2ff1a0
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/move-opt.cpp
@@ -0,0 +1,159 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fclangir-move-opt -emit-cir -clangir-verify-diagnostics -mmlir --mlir-print-ir-after=cir-move-opt %s -o /dev/null 2>&1 | FileCheck %s
+
+namespace std {
+
+template<typename T> struct remove_reference       { typedef T type; };
+template<typename T> struct remove_reference<T &>  { typedef T type; };
+template<typename T> struct remove_reference<T &&> { typedef T type; };
+
+template<typename T>
+typename remove_reference<T>::type &&move(T &&t) noexcept;
+
+} // std namespace
+
+struct A {
+  A() = default;
+  A(A &&) = default;
+  A(const A &) = default;
+  // expected-remark at above {{move opt: copied object may be unknown}}
+  A &operator=(A &&) = default;
+  A &operator=(const A &) = default;
+
+  int i;
+};
+
+void test_ctor() {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z9test_ctorv()
+  // CHECK:           %[[SRC:.*]] = cir.alloca !rec_A
+  // CHECK:           %[[DST:.*]] = cir.alloca !rec_A
+  // CHECK:           cir.call @_ZN1AC2EOS_(%[[DST]], %[[SRC]])
+  // CHECK:           cir.return
+  // CHECK:         }
+
+  A a;
+  A b(a); // expected-remark {{move opt: transformed copy into move}}
+}
+
+void test_assign() {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z11test_assignv()
+  // CHECK:           %[[SRC:.*]] = cir.alloca !rec_A
+  // CHECK:           %[[DST:.*]] = cir.alloca !rec_A
+  // CHECK:           %[[RES:.*]] = cir.call @_ZN1AaSEOS_(%[[DST]], %[[SRC]])
+  // CHECK:           cir.return
+  // CHECK:         }
+
+  A a, b;
+  b = a; // expected-remark {{move opt: transformed copy into move}}
+}
+
+void test_may_be_unknown(A *unknown) {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z19test_may_be_unknownP1A(
+  // CHECK:           cir.call @_ZN1AaSERKS_(%{{.*}}, %{{.*}})
+  // CHECK:         }
+
+  A a, b;
+  A *maybe_a = unknown ? unknown : &a;
+  b = *maybe_a; // expected-remark {{move opt: copied object may be unknown}}
+}
+
+void test_escapes_by_ptr(A **han) {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z19test_escapes_by_ptrPP1A(
+  // CHECK:           cir.call @_ZN1AaSERKS_(%{{.*}}, %{{.*}})
+  // CHECK:         }
+
+  A a, b;
+  *han = &a;
+  b = a; // expected-remark {{move opt: copied object may have escaped}}
+}
+
+void escape_hatch(A *);
+void test_escapes_by_call() {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z20test_escapes_by_callv()
+  // CHECK:           cir.call @_ZN1AaSERKS_(%{{.*}}, %{{.*}})
+  // CHECK:         }
+
+  A a, b;
+  escape_hatch(&a);
+  b = a; // expected-remark {{move opt: copied object may have escaped}}
+}
+
+int test_live_after_use() {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z19test_live_after_usev()
+  // CHECK:           %[[VAL_3:.*]] = cir.call @_ZN1AaSERKS_(%{{.*}}, %{{.*}})
+  // CHECK:         }
+
+  A a, b;
+  b = a; // expected-remark {{move opt: copied object is alive after use}}
+  a.i = 10;
+  return a.i;
+}
+
+void test_move_after_copy_ctor() {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z25test_move_after_copy_ctorv()
+  // CHECK:           %[[VAR_A:.*]] = cir.alloca !rec_A
+  // CHECK:           %[[VAR_B:.*]] = cir.alloca !rec_A
+  // CHECK:           %[[VAR_C:.*]] = cir.alloca !rec_A
+  // CHECK:           cir.call @_ZN1AC1ERKS_(%[[VAR_B]], %[[VAR_A]])
+  // CHECK:           cir.call @_ZN1AC1EOS_(%[[VAR_C]], %[[VAR_A]])
+  // CHECK:         }
+
+  A a;
+  A b(a); // expected-remark {{move opt: copied object is alive after use}}
+  A c(std::move(a));
+}
+
+void test_move_after_copy_assign() {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z27test_move_after_copy_assignv()
+  // CHECK:           %[[VAR_A:.*]] = cir.alloca !rec_A
+  // CHECK:           %[[VAR_B:.*]] = cir.alloca !rec_A
+  // CHECK:           %[[VAR_C:.*]] = cir.alloca !rec_A
+  // CHECK:           %[[VAL_3:.*]] = cir.call @_ZN1AaSERKS_(%[[VAR_B]], %[[VAR_A]])
+  // CHECK:           %[[VAL_4:.*]] = cir.call @_ZN1AaSEOS_(%[[VAR_C]], %[[VAR_A]])
+  // CHECK:           cir.return
+  // CHECK:         }
+
+  A a, b, c;
+  b = a; // expected-remark {{move opt: copied object is alive after use}}
+  c = std::move(a);
+}
+
+void all_effects(A *, A *);
+void test_live_after_use_call(A *unknown) {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z24test_live_after_use_callP1A(
+  // CHECK:           cir.call @_ZN1AaSERKS_(
+  // CHECK:         }
+
+  A a, b;
+  b = a; // expected-remark {{move opt: copied object may have escaped}}
+  all_effects(&a, unknown);
+}
+
+__attribute__((pure))
+int read_effect(A *, A *);
+void test_live_after_use_pure_call(A *unknown) {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z29test_live_after_use_pure_callP1A(
+  // CHECK:           cir.call @_ZN1AaSERKS_(
+  // CHECK:         }
+
+  A a, b;
+  b = a; // expected-remark {{move opt: copied object is alive after use}}
+  read_effect(&a, unknown);
+}
+
+__attribute__((const))
+int no_effect(A *, A *);
+void test_live_after_use_const_call(A *unknown) {
+  // CHECK-LABEL:   cir.func {{.*}}dso_local @_Z30test_live_after_use_const_callP1A(
+  // CHECK:           cir.call @_ZN1AaSEOS_(
+  // CHECK:         }
+
+  A a, b;
+  b = a; // expected-remark {{move opt: transformed copy into move}}
+  no_effect(&a, unknown);
+}
+
+void test_alias(bool cond) {
+  A a, b, c;
+  A *ptr = cond ? &a : &b;
+  c = *ptr; // expected-remark {{move opt: transformed copy into move}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/points-to-analysis.cir b/clang/test/CIR/Incubator/Transforms/points-to-analysis.cir
new file mode 100644
index 0000000000000..972f26f9eb31d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/points-to-analysis.cir
@@ -0,0 +1,124 @@
+// RUN: cir-opt --cir-points-to-diagnostics --verify-diagnostics %s -o /dev/null
+
+!s32i = !cir.int<s, 32>
+#fn_attr = #cir<extra({nothrow = #cir.nothrow, uwtable = #cir.uwtable<async>})>
+module {
+cir.func dso_local @test_alloca() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr_to_local"] {alignment = 8 : i64}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> // expected-remark {{store { ptr_to_local }}}
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> // expected-remark {{load { ptr_to_local }}}
+    cir.store align(4) %2, %3 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { local }}}
+    cir.return
+  }
+  cir.func dso_local @test_arg(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["arg", init] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr_to_arg", init] {alignment = 8 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { arg }}}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> // expected-remark {{store { ptr_to_arg }}}
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> // expected-remark {{load { ptr_to_arg }}}
+    cir.store align(4) %2, %3 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { arg }}}
+    cir.return
+  }
+  cir.func dso_local @test_unknown(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.const #cir.int<1> : !s32i
+    cir.store align(4) %0, %arg0 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { :unknown: }}}
+    cir.return
+  }
+  cir.func dso_local @test_if_else() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr_to_a_or_b"] {alignment = 8 : i64}
+    cir.scope {
+      %5 = cir.load align(4) %0 : !cir.ptr<!s32i>, !s32i // expected-remark {{load { a }}}
+      %6 = cir.load align(4) %1 : !cir.ptr<!s32i>, !s32i // expected-remark {{load { b }}}
+      %7 = cir.cmp(lt, %5, %6) : !s32i, !cir.bool
+      cir.if %7 {
+        cir.store align(8) %0, %2 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> // expected-remark {{store { ptr_to_a_or_b }}}
+      } else {
+        cir.store align(8) %1, %2 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> // expected-remark {{store { ptr_to_a_or_b }}}
+      }
+    }
+    %3 = cir.const #cir.int<1> : !s32i
+    %4 = cir.load deref align(8) %2 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> // expected-remark {{load { ptr_to_a_or_b }}}
+    cir.store align(4) %3, %4 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { a, b }}}
+    cir.return
+  }
+  cir.func dso_local @test_loop() {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a"] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["b"] {alignment = 4 : i64}
+    %2 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr_to_a_or_b", init] {alignment = 8 : i64}
+    cir.store align(8) %0, %2 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> // expected-remark {{store { ptr_to_a_or_b }}}
+    cir.scope {
+      cir.while {
+        %3 = cir.load deref align(8) %2 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> // expected-remark {{load { ptr_to_a_or_b }}}
+        %4 = cir.load align(4) %3 : !cir.ptr<!s32i>, !s32i // expected-remark {{load { a, b }}}
+        %5 = cir.const #cir.int<0> : !s32i
+        %6 = cir.cmp(gt, %4, %5) : !s32i, !cir.bool
+        cir.condition(%6)
+      } do {
+        cir.scope {
+          cir.store align(8) %1, %2 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> // expected-remark {{store { ptr_to_a_or_b }}}
+        }
+        cir.yield
+      }
+    }
+    cir.return
+  }
+  cir.func dso_local @test_ternary(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["arg", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { arg }}}
+    %2 = cir.load align(4) %0 : !cir.ptr<!s32i>, !s32i // expected-remark {{load { arg }}}
+    %3 = cir.cast int_to_bool %2 : !s32i -> !cir.bool
+    %4 = cir.ternary(%3, true {
+      cir.yield %0 : !cir.ptr<!s32i>
+    }, false {
+      cir.yield %1 : !cir.ptr<!s32i>
+    }) : (!cir.bool) -> !cir.ptr<!s32i>
+    %5 = cir.const #cir.int<1> : !s32i
+    cir.store align(4) %5, %4 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { arg, local }}}
+    cir.return
+  }
+  cir.func private @all_effects(!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i>
+  cir.func dso_local @test_call(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr_to_local", init] {alignment = 8 : i64}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> // expected-remark {{store { ptr_to_local }}}
+    %2 = cir.call @all_effects(%1, %arg0) : (!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i>
+    %3 = cir.const #cir.int<1> : !s32i
+    %4 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> // expected-remark {{load { ptr_to_local }}}
+    cir.store align(4) %3, %4 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { :unknown:, local, ptr_to_local }}}
+    %5 = cir.const #cir.int<2> : !s32i
+    cir.store align(4) %5, %2 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { :unknown:, local, ptr_to_local }}}
+    cir.return
+  }
+  cir.func private @read_effects(!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i>
+  cir.func dso_local @test_call_pure(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr_to_local", init] {alignment = 8 : i64}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> // expected-remark {{store { ptr_to_local }}}
+    %2 = cir.call @read_effects(%1, %arg0) : (!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i> side_effect(pure)
+    %3 = cir.const #cir.int<1> : !s32i
+    %4 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> // expected-remark {{load { ptr_to_local }}}
+    cir.store align(4) %3, %4 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { local }}}
+    %5 = cir.const #cir.int<2> : !s32i
+    cir.store align(4) %5, %2 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { :unknown:, local, ptr_to_local }}}
+    cir.return
+  }
+  cir.func private @no_effects(!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i>
+  cir.func dso_local @test_call_const(%arg0: !cir.ptr<!s32i>) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["local"] {alignment = 4 : i64}
+    %1 = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["ptr_to_local", init] {alignment = 8 : i64}
+    cir.store align(8) %0, %1 : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>> // expected-remark {{store { ptr_to_local }}}
+    %2 = cir.call @no_effects(%1, %arg0) : (!cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>) -> !cir.ptr<!s32i> side_effect(const)
+    %3 = cir.const #cir.int<1> : !s32i
+    %4 = cir.load deref align(8) %1 : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> // expected-remark {{load { ptr_to_local }}}
+    cir.store align(4) %3, %4 : !s32i, !cir.ptr<!s32i> // expected-remark {{store { local }}}
+    %5 = cir.const #cir.int<2> : !s32i
+    cir.store align(4) %5, %2 : !s32i, !cir.ptr<!s32i> // expected-remark {{store {  }}}
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/points-to-analysis.cpp b/clang/test/CIR/Incubator/Transforms/points-to-analysis.cpp
new file mode 100644
index 0000000000000..14c7ab1e10b08
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/points-to-analysis.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | cir-opt --cir-points-to-diagnostics --verify-diagnostics -o /dev/null
+
+struct Node {
+  // expected-remark at above {{load { this }}}
+  // expected-remark at above {{store { this }}}
+  // expected-remark at above {{load {  }}}
+  // expected-remark at above {{store {  }}}
+  // expected-remark at above {{load { :unknown: }}}
+  // expected-remark at above {{store { :unknown: }}}
+  // expected-remark at above {{load { __retval }}}
+  // expected-remark at above {{store { __retval }}}
+
+  int val;
+  // expected-remark at above {{store { :unknown: }}}
+};
+
+int test_copy_ctor() {
+  Node orig;
+  Node copy(orig);
+
+  return copy.val;
+  // expected-remark at above {{load { copy }}}
+  // expected-remark at above {{load { __retval }}}
+  // expected-remark at above {{store { __retval }}}
+}
+
+int test_move_ctor() {
+  Node orig;
+  Node move((Node &&)orig);
+
+  return move.val;
+  // expected-remark at above {{load { move }}}
+  // expected-remark at above {{load { __retval }}}
+  // expected-remark at above {{store { __retval }}}
+}
+
+int test_copy_assign() {
+  Node orig, copy;
+  copy = orig;
+
+  return copy.val;
+  // expected-remark at above {{load { copy }}}
+  // expected-remark at above {{load { __retval }}}
+  // expected-remark at above {{store { __retval }}}
+}
+
+int test_move_assign() {
+  Node orig, move;
+  move = (Node &&)orig;
+
+  return move.val;
+  // expected-remark at above {{load { move }}}
+  // expected-remark at above {{load { __retval }}}
+  // expected-remark at above {{store { __retval }}}
+}
diff --git a/clang/test/CIR/Incubator/Transforms/scf-prepare.cir b/clang/test/CIR/Incubator/Transforms/scf-prepare.cir
new file mode 100644
index 0000000000000..b4eb9633ba57e
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/scf-prepare.cir
@@ -0,0 +1,206 @@
+// RUN: cir-opt %s -cir-mlir-scf-prepare -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.global "private" external @a : !cir.array<!s32i x 100>
+
+  // for (int i = l; u > i; ++i)
+  //   a[i] = 3;
+  //
+  // Check that the loop boundary been hoisted out of loop and the comparison
+  // been transferred from gt to lt.
+  cir.func @variableLoopBound(%arg0: !s32i, %arg1: !s32i) {
+    // CHECK: %[[BOUND:.*]] = cir.load %[[BOUND_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK: cir.for : cond {
+    // CHECK:   %[[IV:.*]] = cir.load %[[IV_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   %[[COND:.*]] = cir.cmp(lt, %[[IV]], %4) : !s32i, !cir.bool
+
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["l", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["u", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %6 = cir.cmp(gt, %4, %5) : !s32i, !cir.bool
+        cir.condition(%6)
+      } body {
+        %4 = cir.const #cir.int<3> : !s32i
+        %5 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+        %6 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %7 = cir.cast array_to_ptrdecay %5 : !cir.ptr<!cir.array<!s32i x 100>> -> !cir.ptr<!s32i>
+        %8 = cir.ptr_stride %7, %6 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+        cir.store %4, %8 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      } step {
+        %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.unary(inc, %4) : !s32i, !s32i
+        cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // for (int i = 0; 50 >= i; ++i)
+  //   a[i] = 3;
+  //
+  // Check that the loop boundary been hoisted out of loop and the comparison
+  // been transferred from ge to le.
+  cir.func @constantLoopBound() {
+    // CHECK: %[[BOUND:.*]] = cir.const #cir.int<50> : !s32i
+    // CHECK: cir.for : cond {
+    // CHECK:   %[[IV:.*]] = cir.load %[[IV_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   %[[COND:.*]] = cir.cmp(le, %[[IV]], %[[BOUND]]) : !s32i, !cir.bool
+
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<0> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.const #cir.int<50> : !s32i
+        %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %4 = cir.cmp(ge, %2, %3) : !s32i, !cir.bool
+        cir.condition(%4)
+      } body {
+        %2 = cir.const #cir.int<3> : !s32i
+        %3 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+        %4 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.cast array_to_ptrdecay %3 : !cir.ptr<!cir.array<!s32i x 100>> -> !cir.ptr<!s32i>
+        %6 = cir.ptr_stride %5, %4 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+        cir.store %2, %6 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // for (int i = l; u > i; ++i) {
+  //   --u;
+  //   a[i] = 3;
+  // }
+  //
+  // Check that the loop boundary not been hoisted because it's not loop
+  // invariant and the loop comparison been transferred from gt to lt.
+  cir.func @variableLoopBoundNotLoopInvariant(%arg0: !s32i, %arg1: !s32i) {
+    // CHECK: cir.store %[[IV_INIT:.*]], %[[IV_ADDR:.*]] : !s32i, !cir.ptr<!s32i>
+    // CHECK: cir.for : cond {
+    // CHECK:   %[[BOUND:.*]] = cir.load %[[BOUND_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   %[[IV:.*]] = cir.load %[[IV_ADDR:.*]] : !cir.ptr<!s32i>, !s32i
+    // CHECK:   %[[COND:.*]] = cir.cmp(lt, %[[IV]], %[[BOUND]]) : !s32i, !cir.bool
+
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["l", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["u", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %6 = cir.cmp(gt, %4, %5) : !s32i, !cir.bool
+        cir.condition(%6)
+      } body {
+        cir.scope {
+          %4 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+          %5 = cir.unary(dec, %4) : !s32i, !s32i
+          cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+          %6 = cir.const #cir.int<3> : !s32i
+          %7 = cir.get_global @a : !cir.ptr<!cir.array<!s32i x 100>>
+          %8 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+          %9 = cir.cast array_to_ptrdecay %7 : !cir.ptr<!cir.array<!s32i x 100>> -> !cir.ptr<!s32i>
+          %10 = cir.ptr_stride %9, %8 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+          cir.store %6, %10 : !s32i, !cir.ptr<!s32i>
+        }
+        cir.yield
+      } step {
+        %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+        %5 = cir.unary(inc, %4) : !s32i, !s32i
+        cir.store %5, %2 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // for (int i = 0; i < 100 - 1; ++i) {}
+  //
+  // Check that the loop upper bound operations(100 - 1) will be hoisted out
+  // of loop.
+  cir.func @loopInvariantBinOp() {
+    // CHECK: %[[C100:.*]] = cir.const #cir.int<100> : !s32i
+    // CHECK: %[[C1:.*]] = cir.const #cir.int<1> : !s32i
+    // CHECK: %[[UPPER_BOUND:.*]] = cir.binop(sub, %[[C100]], %[[C1]]) nsw : !s32i
+    // CHECK: cir.for : cond {
+
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<0> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<100> : !s32i
+        %4 = cir.const #cir.int<1> : !s32i
+        %5 = cir.binop(sub, %3, %4) nsw : !s32i
+        %6 = cir.cmp(lt, %2, %5) : !s32i, !cir.bool
+        cir.condition(%6)
+      } body {
+        cir.scope {
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+
+  // It's a hand-writing test case to check that the operation has block
+  // argument as operand won't be hoisted out of loop.
+  // Note that the current codegen will store the argument first and then
+  // load the value to user. This test case is manually created to check
+  // that the hoisting pass won't break when encounter block argument.
+  cir.func @loopInvariantBinOp_blockArg(%arg0: !s32i) {
+    // CHECK: cir.for : cond {
+    // CHECK: %[[C100:.*]] = cir.const #cir.int<100> : !s32i
+    // CHECK: %[[UPPER_BOUND:.*]] = cir.binop(sub, %[[C100]], %arg0) nsw : !s32i
+
+    cir.scope {
+      %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<0> : !s32i
+      cir.store %1, %0 : !s32i, !cir.ptr<!s32i>
+      cir.for : cond {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.const #cir.int<100> : !s32i
+        %5 = cir.binop(sub, %3, %arg0) nsw : !s32i
+        %6 = cir.cmp(lt, %2, %5) : !s32i, !cir.bool
+        cir.condition(%6)
+      } body {
+        cir.scope {
+        }
+        cir.yield
+      } step {
+        %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+        %3 = cir.unary(inc, %2) : !s32i, !s32i
+        cir.store %3, %0 : !s32i, !cir.ptr<!s32i>
+        cir.yield
+      }
+    }
+    cir.return
+  }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/scope.cir b/clang/test/CIR/Incubator/Transforms/scope.cir
new file mode 100644
index 0000000000000..2d14784c33f87
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/scope.cir
@@ -0,0 +1,60 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @foo() {
+    cir.scope {
+      %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+      %1 = cir.const #cir.int<4> : !u32i
+      cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+    }
+    cir.return
+  }
+// CHECK:  cir.func @foo() {
+// CHECK:    cir.br ^bb1
+// CHECK:  ^bb1:  // pred: ^bb0
+// CHECK:    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["a", init] {alignment = 4 : i64}
+// CHECK:    %1 = cir.const #cir.int<4> : !u32i
+// CHECK:    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:    cir.br ^bb2
+// CHECK:  ^bb2:  // pred: ^bb1
+// CHECK:    cir.return
+// CHECK:  }
+
+  // Should drop empty scopes.
+  cir.func @empty_scope() {
+    cir.scope {
+    }
+    cir.return
+  }
+// CHECK:  cir.func @empty_scope() {
+// CHECK:    cir.return
+// CHECK:  }
+
+  cir.func @scope_with_return() -> !u32i {
+    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"] {alignment = 4 : i64}
+    cir.scope {
+      %2 = cir.const #cir.int<0> : !u32i
+      cir.store %2, %0 : !u32i, !cir.ptr<!u32i>
+      %3 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+      cir.return %3 : !u32i
+    }
+    %1 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+    cir.return %1 : !u32i
+  }
+
+// CHECK:  cir.func @scope_with_return() -> !u32i {
+// CHECK:    %0 = cir.alloca !u32i, !cir.ptr<!u32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:    cir.br ^bb1
+// CHECK:  ^bb1:  // pred: ^bb0
+// CHECK:    %1 = cir.const #cir.int<0> : !u32i
+// CHECK:    cir.store %1, %0 : !u32i, !cir.ptr<!u32i>
+// CHECK:    %2 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:    cir.return %2 : !u32i
+// CHECK:  ^bb2:  // no predecessors
+// CHECK:    %3 = cir.load %0 : !cir.ptr<!u32i>, !u32i
+// CHECK:    cir.return %3 : !u32i
+// CHECK:  }
+
+}
diff --git a/clang/test/CIR/Incubator/Transforms/select.cir b/clang/test/CIR/Incubator/Transforms/select.cir
new file mode 100644
index 0000000000000..29a5d1ed1ddeb
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/select.cir
@@ -0,0 +1,60 @@
+// RUN: cir-opt -cir-canonicalize -cir-simplify -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @fold_true(%arg0 : !s32i, %arg1 : !s32i) -> !s32i {
+    %0 = cir.const #cir.bool<true> : !cir.bool
+    %1 = cir.select if %0 then %arg0 else %arg1 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_true(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   cir.return %[[ARG0]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @fold_false(%arg0 : !s32i, %arg1 : !s32i) -> !s32i {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.select if %0 then %arg0 else %arg1 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_false(%[[ARG0:.+]]: !s32i, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   cir.return %[[ARG1]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @fold_to_const(%arg0 : !cir.bool) -> !s32i {
+    %0 = cir.const #cir.int<42> : !s32i
+    %1 = cir.select if %arg0 then %0 else %0 : (!cir.bool, !s32i, !s32i) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_to_const(%{{.+}}: !cir.bool) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<42> : !s32i
+  // CHECK-NEXT:   cir.return %[[#A]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @simplify_1(%arg0 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<true> : !cir.bool
+    %1 = cir.const #cir.bool<false> : !cir.bool
+    %2 = cir.select if %arg0 then %0 else %1 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %2 : !cir.bool
+  }
+
+  //      CHECK: cir.func @simplify_1(%[[ARG0:.+]]: !cir.bool) -> !cir.bool {
+  // CHECK-NEXT:   cir.return %[[ARG0]] : !cir.bool
+  // CHECK-NEXT: }
+
+  cir.func @simplify_2(%arg0 : !cir.bool) -> !cir.bool {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.const #cir.bool<true> : !cir.bool
+    %2 = cir.select if %arg0 then %0 else %1 : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+    cir.return %2 : !cir.bool
+  }
+
+  //      CHECK: cir.func @simplify_2(%[[ARG0:.+]]: !cir.bool) -> !cir.bool {
+  // CHECK-NEXT:   %[[#A:]] = cir.unary(not, %[[ARG0]]) : !cir.bool, !cir.bool
+  // CHECK-NEXT:   cir.return %[[#A]] : !cir.bool
+  // CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/setjmp-longjmp-lower.c b/clang/test/CIR/Incubator/Transforms/setjmp-longjmp-lower.c
new file mode 100644
index 0000000000000..33b4643eb0818
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/setjmp-longjmp-lower.c
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o - 2>&1 | FileCheck %s -check-prefix=BEFORE-LOWERING-PREPARE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare %s -o - 2>&1 | FileCheck %s -check-prefix=AFTER-LOWERING-PREPARE
+void test_setjmp(void *env) {
+  // BEFORE-LOWERING-PREPARE-LABEL: test_setjmp
+  // BEFORE-LOWERING-PREPARE-SAME: [[ENV:%.*]]: 
+  // BEFORE-LOWERING-PREPARE-NEXT: [[ENV_ALLOCA:%[0-9]+]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>,
+  // BEFORE-LOWERING-PREPARE-NEXT: cir.store [[ENV]], [[ENV_ALLOCA]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // BEFORE-LOWERING-PREPARE-NEXT: [[ENV_LOAD:%[0-9]+]] = cir.load align(8) [[ENV_ALLOCA]]
+  // BEFORE-LOWERING-PREPARE-NEXT: [[CAST:%[0-9]+]] = cir.cast bitcast [[ENV_LOAD]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!void>>
+  // BEFORE-LOWERING-PREPARE-NEXT: [[ZERO:%[0-9]+]] = cir.const #cir.int<0>
+  // BEFORE-LOWERING-PREPARE-NEXT: [[FA:%[0-9]+]] = cir.frame_address([[ZERO]])
+  // BEFORE-LOWERING-PREPARE-NEXT: cir.store [[FA]], [[CAST]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // BEFORE-LOWERING-PREPARE-NEXT: [[SS:%[0-9]+]] = cir.stack_save
+  // BEFORE-LOWERING-PREPARE-NEXT: [[TWO:%[0-9]+]] = cir.const #cir.int<2>
+  // BEFORE-LOWERING-PREPARE-NEXT: [[GEP:%[0-9]+]] = cir.ptr_stride [[CAST]], [[TWO]] : (!cir.ptr<!cir.ptr<!void>>, !s32i) -> !cir.ptr<!cir.ptr<!void>>
+  // BEFORE-LOWERING-PREPARE-NEXT: cir.store [[SS]], [[GEP]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // BEFORE-LOWERING-PREPARE-NEXT: [[SJ:%[0-9]+]] = cir.eh.setjmp builtin [[CAST]] : (!cir.ptr<!cir.ptr<!void>>) -> !s32i
+
+  // AFTER-LOWERING-PREPARE-LABEL: test_setjmp
+  // AFTER-LOWERING-PREPARE-SAME: [[ENV:%.*]]: 
+  // AFTER-LOWERING-PREPARE-NEXT: [[ENV_ALLOCA:%[0-9]+]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>,
+  // AFTER-LOWERING-PREPARE-NEXT: cir.store [[ENV]], [[ENV_ALLOCA]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // AFTER-LOWERING-PREPARE-NEXT: [[ENV_LOAD:%[0-9]+]] = cir.load align(8) [[ENV_ALLOCA]]
+  // AFTER-LOWERING-PREPARE-NEXT: [[CAST:%[0-9]+]] = cir.cast bitcast [[ENV_LOAD]] : !cir.ptr<!void> -> !cir.ptr<!cir.ptr<!void>>
+  // AFTER-LOWERING-PREPARE-NEXT: [[ZERO:%[0-9]+]] = cir.const #cir.int<0>
+  // AFTER-LOWERING-PREPARE-NEXT: [[FA:%[0-9]+]] = cir.frame_address([[ZERO]])
+  // AFTER-LOWERING-PREPARE-NEXT: cir.store [[FA]], [[CAST]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // AFTER-LOWERING-PREPARE-NEXT: [[SS:%[0-9]+]] = cir.stack_save
+  // AFTER-LOWERING-PREPARE-NEXT: [[TWO:%[0-9]+]] = cir.const #cir.int<2>
+  // AFTER-LOWERING-PREPARE-NEXT: [[GEP:%[0-9]+]] = cir.ptr_stride [[CAST]], [[TWO]] : (!cir.ptr<!cir.ptr<!void>>, !s32i) -> !cir.ptr<!cir.ptr<!void>>
+  // AFTER-LOWERING-PREPARE-NEXT: cir.store [[SS]], [[GEP]] : !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>
+  // AFTER-LOWERING-PREPARE-NEXT: [[SJ:%[0-9]+]] = cir.eh.setjmp builtin [[CAST]] : (!cir.ptr<!cir.ptr<!void>>) -> !s32i
+  __builtin_setjmp(env);
+}
+
+extern int _setjmp(void *env);
+void test_setjmp2(void *env) {
+  // BEFORE-LOWERING-PREPARE-LABEL: test_setjmp2
+  // BEFORE-LOWERING-PREPARE-SAME: [[ENV:%.*]]:
+  // BEFORE-LOWERING-PREPARE-NEXT: [[ENV_ALLOCA:%.*]] = cir.alloca
+  // BEFORE-LOWERING-PREPARE-NEXT: cir.store [[ENV]], [[ENV_ALLOCA]]
+  // BEFORE-LOWERING-PREPARE-NEXT: [[ENV_LOAD:%.*]] = cir.load align(8) [[ENV_ALLOCA]]
+  // BEFORE-LOWERING-PREPARE-NEXT: [[CAST:%.*]] = cir.cast bitcast [[ENV_LOAD]]
+  // BEFORE-LOWERING-PREPARE-NEXT: cir.eh.setjmp [[CAST]] : (!cir.ptr<!cir.ptr<!void>>) -> !s32i
+
+  // AFTER-LOWERING-PREPARE-LABEL: test_setjmp2
+  // AFTER-LOWERING-PREPARE-SAME: [[ENV:%.*]]:
+  // AFTER-LOWERING-PREPARE-NEXT: [[ENV_ALLOCA:%.*]] = cir.alloca
+  // AFTER-LOWERING-PREPARE-NEXT: cir.store [[ENV]], [[ENV_ALLOCA]]
+  // AFTER-LOWERING-PREPARE-NEXT: [[ENV_LOAD:%.*]] = cir.load align(8) [[ENV_ALLOCA]]
+  // AFTER-LOWERING-PREPARE-NEXT: [[CAST:%.*]] = cir.cast bitcast [[ENV_LOAD]]
+  // AFTER-LOWERING-PREPARE-NEXT: cir.eh.setjmp [[CAST]] : (!cir.ptr<!cir.ptr<!void>>) -> !s32i
+  _setjmp (env);
+}
+void test_longjmp(void *env) {
+  // BEFORE-LOWERING-PREPARE-LABEL: test_longjmp
+  // BEFORE-LOWERING-PREPARE-SAME: [[ENV:%.*]]:
+  // BEFORE-LOWERING-PREPARE-NEXT: [[ENV_ALLOCA:%.*]] = cir.alloca
+  // BEFORE-LOWERING-PREPARE-NEXT: cir.store [[ENV]], [[ENV_ALLOCA]]
+  // BEFORE-LOWERING-PREPARE-NEXT: [[ENV_LOAD:%.*]] = cir.load align(8) [[ENV_ALLOCA]]
+  // BEFORE-LOWERING-PREPARE-NEXT: [[CAST:%.*]] = cir.cast bitcast [[ENV_LOAD]]
+  // BEFORE-LOWERING-PREPARE-NEXT: cir.eh.longjmp [[CAST]] : !cir.ptr<!cir.ptr<!void>>
+  // BEFORE-LOWERING-PREPARE-NEXT: cir.unreachable
+
+  // AFTER-LOWERING-PREPARE-LABEL: test_longjmp
+  // AFTER-LOWERING-PREPARE-SAME: [[ENV:%.*]]:
+  // AFTER-LOWERING-PREPARE-NEXT: [[ENV_ALLOCA:%.*]] = cir.alloca
+  // AFTER-LOWERING-PREPARE-NEXT: cir.store [[ENV]], [[ENV_ALLOCA]]
+  // AFTER-LOWERING-PREPARE-NEXT: [[ENV_LOAD:%.*]] = cir.load align(8) [[ENV_ALLOCA]]
+  // AFTER-LOWERING-PREPARE-NEXT: [[CAST:%.*]] = cir.cast bitcast [[ENV_LOAD]]
+  // AFTER-LOWERING-PREPARE-NEXT: cir.eh.longjmp [[CAST]] : !cir.ptr<!cir.ptr<!void>>
+  // AFTER-LOWERING-PREPARE-NEXT: cir.unreachable
+  __builtin_longjmp(env, 1);
+}
diff --git a/clang/test/CIR/Incubator/Transforms/simpl.c b/clang/test/CIR/Incubator/Transforms/simpl.c
new file mode 100644
index 0000000000000..b41c2b319ef72
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/simpl.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize %s -o %t1.cir 2>&1 | FileCheck -check-prefix=BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-canonicalize %s -o %t2.cir 2>&1 | FileCheck -check-prefix=AFTER %s
+
+
+#define CHECK_PTR(ptr)  \
+  do {                   \
+    if (__builtin_expect((!!((ptr) == 0)), 0))\
+      return -42; \
+  } while(0)
+
+int foo(int* ptr) {
+  CHECK_PTR(ptr);
+
+  (*ptr)++;
+  return 0;
+}
+
+// BEFORE:  cir.func {{.*@foo}}
+// BEFORE:  [[X0:%.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// BEFORE:  [[X1:%.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// BEFORE:  [[X2:%.*]] = cir.cmp(eq, [[X0]], [[X1]]) : !cir.ptr<!s32i>, !cir.bool
+// BEFORE:  [[BOOL_TO_INT:%.*]] = cir.cast bool_to_int [[X2]] : !cir.bool -> !s32i
+// BEFORE:  [[X3:%.*]] = cir.cast int_to_bool [[BOOL_TO_INT]] : !s32i -> !cir.bool
+// BEFORE:  [[X4:%.*]] = cir.unary(not, [[X3]]) : !cir.bool, !cir.bool
+// BEFORE:  [[X5:%.*]] = cir.cast bool_to_int [[X4]] : !cir.bool -> !s32i
+// BEFORE:  [[X6:%.*]] = cir.cast int_to_bool [[X5]] : !s32i -> !cir.bool
+// BEFORE:  [[X7:%.*]] = cir.unary(not, [[X6]]) : !cir.bool, !cir.bool
+// BEFORE:  [[X8:%.*]] = cir.cast bool_to_int [[X7]] : !cir.bool -> !s32i
+// BEFORE:  [[X9:%.*]] = cir.cast integral [[X8]] : !s32i -> !s64i
+// BEFORE:  [[X10:%.*]] = cir.const #cir.int<0> : !s32i
+// BEFORE:  [[X11:%.*]] = cir.cast integral [[X10]] : !s32i -> !s64i
+// BEFORE:  [[X12:%.*]] = cir.cast int_to_bool [[X9]] : !s64i -> !cir.bool
+// BEFORE:  cir.if [[X12]]
+
+// AFTER:   [[X0:%.*]] = cir.load {{.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+// AFTER:   [[X1:%.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+// AFTER:   [[X2:%.*]] = cir.cmp(eq, [[X0]], [[X1]]) : !cir.ptr<!s32i>, !cir.bool
+// AFTER:   cir.if [[X2]]
diff --git a/clang/test/CIR/Incubator/Transforms/simpl.cir b/clang/test/CIR/Incubator/Transforms/simpl.cir
new file mode 100644
index 0000000000000..34c27daeba345
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/simpl.cir
@@ -0,0 +1,55 @@
+// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+module {
+  cir.func @unary_not(%arg0: !cir.bool) -> !cir.bool {
+    %0 = cir.unary(not, %arg0) : !cir.bool, !cir.bool
+    %1 = cir.unary(not, %0) : !cir.bool, !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  // CHECK:  cir.func @unary_not(%arg0: !cir.bool) -> !cir.bool
+  // CHECK:     cir.return %arg0 : !cir.bool
+
+  cir.func @cast1(%arg0: !cir.bool) -> !cir.bool {
+    %0 = cir.cast bool_to_int %arg0 : !cir.bool -> !s32i
+    %1 = cir.cast int_to_bool %0 : !s32i -> !cir.bool
+    cir.return %1 : !cir.bool
+  }
+  // CHECK:  cir.func @cast1(%arg0: !cir.bool) -> !cir.bool
+  // CHECK:     cir.return %arg0 : !cir.bool
+
+  cir.func @cast2(%arg0: !s32i) -> !cir.bool {
+    %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    %1 = cir.cast bool_to_int %0 : !cir.bool -> !s32i
+    %2 = cir.cast integral %1 : !s32i -> !s64i
+    %3 = cir.cast int_to_bool %2 : !s64i -> !cir.bool
+    cir.return %3 : !cir.bool
+  }
+  // CHECK:  cir.func @cast2(%arg0: !s32i) -> !cir.bool
+  // CHECK:    %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+  // CHECK:    cir.return %0 : !cir.bool
+
+  cir.func @no_cast(%arg0: !s32i) -> !s64i {
+    %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+    %1 = cir.cast bool_to_int %0 : !cir.bool -> !s32i
+    %2 = cir.cast integral %1 : !s32i -> !s64i
+    cir.return %2 : !s64i
+  }
+  // CHECK:  cir.func @no_cast(%arg0: !s32i) -> !s64i
+  // CHECK:    %0 = cir.cast int_to_bool %arg0 : !s32i -> !cir.bool
+  // CHECK:    %1 = cir.cast bool_to_int %0 : !cir.bool -> !s32i
+  // CHECK:    %2 = cir.cast integral %1 : !s32i -> !s64i
+  // CHECK:    cir.return %2 : !s64i
+
+  cir.func @get_element(%arg0: !cir.ptr<!cir.array<!s32i x 5>>, %arg1: !s32i) -> !s32i {
+    %0 = cir.cast array_to_ptrdecay %arg0 : !cir.ptr<!cir.array<!s32i x 5>> -> !cir.ptr<!s32i>
+    %1 = cir.ptr_stride %0, %arg1 : (!cir.ptr<!s32i>, !s32i) -> !cir.ptr<!s32i>
+    %2 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %2 : !s32i
+  }
+  // CHECK: cir.func @get_element(%arg0: !cir.ptr<!cir.array<!s32i x 5>>, %arg1: !s32i) -> !s32i
+  // CHECK:   %0 = cir.get_element %arg0[%arg1] : (!cir.ptr<!cir.array<!s32i x 5>>, !s32i) -> !cir.ptr<!s32i>
+  // CHECK:   %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+  // CHECK:   cir.return %1 : !s32i
+}
diff --git a/clang/test/CIR/Incubator/Transforms/switch.cir b/clang/test/CIR/Incubator/Transforms/switch.cir
new file mode 100644
index 0000000000000..f88aea2c20bf4
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/switch.cir
@@ -0,0 +1,278 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!s8i = !cir.int<s, 8>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module {
+  cir.func @shouldFlatSwitchWithDefault(%arg0: !s8i) {
+    cir.switch (%arg0 : !s8i) {
+    cir.case (equal, [#cir.int<1> : !s8i]) {
+      cir.break
+    }
+    cir.case (default, []) {
+      cir.break
+    }
+    cir.yield
+    }
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithDefault(%arg0: !s8i) {
+// CHECK:    cir.switch.flat %arg0 : !s8i, ^bb[[#DEFAULT:]] [
+// CHECK:      1: ^bb[[#CASE1:]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1]]:
+// CHECK:    cir.br ^bb[[#EXIT:]]
+// CHECK:  ^bb[[#DEFAULT]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  cir.func @shouldFlatSwitchWithoutDefault(%arg0: !s32i) {
+    cir.switch (%arg0 : !s32i) {
+    cir.case (equal, [#cir.int<1> : !s32i]) {
+      cir.break
+    }
+    cir.yield
+    }
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithoutDefault(%arg0: !s32i) {
+// CHECK:    cir.switch.flat %arg0 : !s32i, ^bb[[#EXIT:]] [
+// CHECK:      1: ^bb[[#CASE1:]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+
+  cir.func @shouldFlatSwitchWithImplicitFallthrough(%arg0: !s64i) {
+    cir.switch (%arg0 : !s64i) {
+    cir.case (anyof, [#cir.int<1> : !s64i, #cir.int<2> : !s64i]) {
+      cir.break
+    }
+    cir.yield
+    }
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithImplicitFallthrough(%arg0: !s64i) {
+// CHECK:    cir.switch.flat %arg0 : !s64i, ^bb[[#EXIT:]] [
+// CHECK:      1: ^bb[[#CASE1N2:]],
+// CHECK:      2: ^bb[[#CASE1N2]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1N2]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+
+
+  cir.func @shouldFlatSwitchWithExplicitFallthrough(%arg0: !s64i) {
+      cir.switch (%arg0 : !s64i) {
+      cir.case (equal, [#cir.int<1> : !s64i]) { // case 1 has its own region
+        cir.yield // fallthrough to case 2
+      }
+      cir.case (equal, [#cir.int<2> : !s64i]) {
+        cir.break
+      }
+      cir.yield
+      }
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithExplicitFallthrough(%arg0: !s64i) {
+// CHECK:    cir.switch.flat %arg0 : !s64i, ^bb[[#EXIT:]] [
+// CHECK:      1: ^bb[[#CASE1:]],
+// CHECK:      2: ^bb[[#CASE2:]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1]]:
+// CHECK:    cir.br ^bb[[#CASE2]]
+// CHECK:  ^bb[[#CASE2]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  cir.func @shouldFlatSwitchWithFallthroughToExit(%arg0: !s64i) {
+      cir.switch (%arg0 : !s64i) {
+      cir.case (equal, [#cir.int<1> : !s64i]) {
+        cir.yield // fallthrough to exit
+      }
+      cir.yield
+      }
+    cir.return
+  }
+// CHECK:  cir.func @shouldFlatSwitchWithFallthroughToExit(%arg0: !s64i) {
+// CHECK:    cir.switch.flat %arg0 : !s64i, ^bb[[#EXIT:]] [
+// CHECK:      1: ^bb[[#CASE1:]]
+// CHECK:    ]
+// CHECK:  ^bb[[#CASE1]]:
+// CHECK:    cir.br ^bb[[#EXIT]]
+// CHECK:  ^bb[[#EXIT]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+  cir.func @shouldDropEmptySwitch(%arg0: !s64i) {
+    cir.switch (%arg0 : !s64i) {
+      cir.yield
+    }
+    // CHECK-NOT: llvm.switch
+    cir.return
+  }
+// CHECK:      cir.func @shouldDropEmptySwitch(%arg0: !s64i)
+// CHECK-NOT:    cir.switch.flat
+
+
+  cir.func @shouldFlatMultiBlockCase(%arg0: !s32i) {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%1 : !s32i) {
+      cir.case (equal, [#cir.int<3> : !s32i]) {
+        cir.return
+      ^bb1:  // no predecessors
+        cir.break
+      }
+      cir.yield
+      }
+    }
+    cir.return
+  }
+
+// CHECK:  cir.func @shouldFlatMultiBlockCase(%arg0: !s32i) {
+// CHECK:     %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init] {alignment = 4 : i64}
+// CHECK:     cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:     cir.br ^bb1
+// CHECK:   ^bb1:  // pred: ^bb0
+// CHECK:     %1 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:     cir.switch.flat %1 : !s32i, ^bb[[#DEFAULT:]] [
+// CHECK:       3: ^bb[[#BB1:]]
+// CHECK:     ]
+// CHECK:   ^bb[[#BB1]]:
+// CHECK:     cir.return
+// CHECK:   ^bb[[#DEFAULT]]:
+// CHECK:     cir.br ^bb[[#RET_BB:]]
+// CHECK:   ^bb[[#RET_BB]]:  // pred: ^bb[[#DEFAULT]]
+// CHECK:     cir.return
+// CHECK:   }
+
+
+  cir.func @shouldFlatNestedBreak(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    cir.store %arg1, %1 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %5 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%5 : !s32i) {
+      cir.case (equal, [#cir.int<0> : !s32i]) {
+        cir.scope {
+          %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+          %7 = cir.const #cir.int<0> : !s32i
+          %8 = cir.cmp(ge, %6, %7) : !s32i, !cir.bool
+          cir.if %8 {
+            cir.break
+          }
+        }
+        cir.break
+      }
+      cir.yield
+      }
+    }
+    %3 = cir.const #cir.int<3> : !s32i
+    cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+    %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.return %4 : !s32i
+  }
+// CHECK:  cir.func @shouldFlatNestedBreak(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+// CHECK:    cir.switch.flat %3 : !s32i, ^bb[[#DEFAULT_BB:]] [
+// CHECK:      0: ^bb[[#BB1:]]
+// CHECK:    ]
+// CHECK:  ^bb[[#BB1]]:
+// CHECK:    cir.br ^bb[[#COND_BB:]]
+// CHECK:  ^bb[[#COND_BB]]:
+// CHECK:    cir.brcond {{%.*}} ^bb[[#TRUE_BB:]], ^bb[[#FALSE_BB:]]
+// CHECK:  ^bb[[#TRUE_BB]]:
+// CHECK:    cir.br ^bb[[#DEFAULT_BB]]
+// CHECK:  ^bb[[#FALSE_BB]]:
+// CHECK:    cir.br ^bb[[#PRED_BB:]]
+// CHECK:  ^bb[[#PRED_BB]]:
+// CHECK:    cir.br ^bb[[#DEFAULT_BB]]
+// CHECK:  ^bb[[#DEFAULT_BB]]:
+// CHECK:    cir.br ^bb[[#RET_BB:]]
+// CHECK:  ^bb[[#RET_BB]]:
+// CHECK:    cir.return
+// CHECK:  }
+
+
+  cir.func @flatCaseRange(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    %2 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %3 = cir.const #cir.int<0> : !s32i
+    cir.store %3, %2 : !s32i, !cir.ptr<!s32i>
+    cir.scope {
+      %6 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.switch (%6 : !s32i) {
+      cir.case (equal, [#cir.int<-100> : !s32i]) {
+        %7 = cir.const #cir.int<1> : !s32i
+        cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+        cir.break
+      }
+      cir.case (range, [#cir.int<1> : !s32i, #cir.int<100> : !s32i]) {
+        %7 = cir.const #cir.int<2> : !s32i
+        cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+        cir.break
+      }
+      cir.case (default, []) {
+        %7 = cir.const #cir.int<3> : !s32i
+        cir.store %7, %2 : !s32i, !cir.ptr<!s32i>
+        cir.break
+      }
+      cir.yield
+      }
+    }
+    %4 = cir.load %2 : !cir.ptr<!s32i>, !s32i
+    cir.store %4, %1 : !s32i, !cir.ptr<!s32i>
+    %5 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %5 : !s32i
+  }
+//      CHECK:  cir.func @flatCaseRange(%arg0: !s32i) -> !s32i {
+//      CHECK:    cir.switch.flat %[[X:[0-9]+]] : !s32i, ^[[JUDGE_RANGE:bb[0-9]+]] [
+// CHECK-NEXT:      -100: ^[[CASE_EQUAL:bb[0-9]+]]
+// CHECK-NEXT:    ]
+// CHECK-NEXT:  ^[[UNRACHABLE_BB:.+]]:   // no predecessors
+// CHECK-NEXT:    cir.br ^[[CASE_EQUAL]]
+// CHECK-NEXT:  ^[[CASE_EQUAL]]:
+// CHECK-NEXT:    cir.int<1>
+// CHECK-NEXT:    cir.store
+// CHECK-NEXT:    cir.br ^[[EPILOG:bb[0-9]+]]
+// CHECK-NEXT:  ^[[CASE_RANGE:bb[0-9]+]]:
+// CHECK-NEXT:    cir.int<2>
+// CHECK-NEXT:    cir.store
+// CHECK-NEXT:    cir.br ^[[EPILOG]]
+// CHECK-NEXT:  ^[[JUDGE_RANGE]]:
+// CHECK-NEXT:     %[[RANGE:[0-9]+]] = cir.const #cir.int<99>
+// CHECK-NEXT:     %[[LOWER_BOUND:[0-9]+]] = cir.const #cir.int<1>
+// CHECK-NEXT:     %[[DIFF:[0-9]+]] = cir.binop(sub, %[[X]], %[[LOWER_BOUND]])
+// CHECK-NEXT:     %[[U_DIFF:[0-9]+]] = cir.cast integral %[[DIFF]] : !s32i -> !u32i
+// CHECK-NEXT:     %[[U_RANGE:[0-9]+]] = cir.cast integral %[[RANGE]] : !s32i -> !u32i
+// CHECK-NEXT:     %[[CMP_RESULT:[0-9]+]] = cir.cmp(le, %[[U_DIFF]], %[[U_RANGE]])
+// CHECK-NEXT:     cir.brcond %[[CMP_RESULT]] ^[[CASE_RANGE]], ^[[CASE_DEFAULT:bb[0-9]+]]
+// CHECK-NEXT:  ^[[CASE_DEFAULT]]:
+// CHECK-NEXT:    cir.int<3>
+// CHECK-NEXT:    cir.store
+// CHECK-NEXT:    cir.br ^[[EPILOG]]
+// CHECK-NEXT:  ^[[EPILOG]]:
+// CHECK-NEXT:    cir.br ^[[EPILOG_END:bb[0-9]+]]
+// CHECK-NEXT:  ^[[EPILOG_END]]:
+//      CHECK:    cir.return
+//      CHECK:  }
+
+}
diff --git a/clang/test/CIR/Incubator/Transforms/ternary-fold.cir b/clang/test/CIR/Incubator/Transforms/ternary-fold.cir
new file mode 100644
index 0000000000000..617a4e864b01f
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/ternary-fold.cir
@@ -0,0 +1,60 @@
+// RUN: cir-opt -cir-canonicalize -cir-simplify -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @fold_ternary(%arg0: !s32i, %arg1: !s32i) -> !s32i {
+    %0 = cir.const #cir.bool<false> : !cir.bool
+    %1 = cir.ternary (%0, true {
+      cir.yield %arg0 : !s32i
+    }, false {
+      cir.yield %arg1 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @fold_ternary(%{{.+}}: !s32i, %[[ARG:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   cir.return %[[ARG]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @simplify_ternary(%arg0 : !cir.bool, %arg1 : !s32i) -> !s32i {
+    %0 = cir.ternary (%arg0, true {
+      %1 = cir.const #cir.int<42> : !s32i
+      cir.yield %1 : !s32i
+    }, false {
+      cir.yield %arg1 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %0 : !s32i
+  }
+
+  //      CHECK: cir.func @simplify_ternary(%[[ARG0:.+]]: !cir.bool, %[[ARG1:.+]]: !s32i) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.const #cir.int<42> : !s32i
+  // CHECK-NEXT:   %[[#B:]] = cir.select if %[[ARG0]] then %[[#A]] else %[[ARG1]] : (!cir.bool, !s32i, !s32i) -> !s32i
+  // CHECK-NEXT:   cir.return %[[#B]] : !s32i
+  // CHECK-NEXT: }
+
+  cir.func @non_simplifiable_ternary(%arg0 : !cir.bool) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+    %1 = cir.ternary (%arg0, true {
+      %2 = cir.const #cir.int<42> : !s32i
+      cir.yield %2 : !s32i
+    }, false {
+      %3 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+      cir.yield %3 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.return %1 : !s32i
+  }
+
+  //      CHECK: cir.func @non_simplifiable_ternary(%[[ARG0:.+]]: !cir.bool) -> !s32i {
+  // CHECK-NEXT:   %[[#A:]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+  // CHECK-NEXT:   %[[#B:]] = cir.ternary(%[[ARG0]], true {
+  // CHECK-NEXT:     %[[#C:]] = cir.const #cir.int<42> : !s32i
+  // CHECK-NEXT:     cir.yield %[[#C]] : !s32i
+  // CHECK-NEXT:   }, false {
+  // CHECK-NEXT:     %[[#D:]] = cir.load{{.*}} %[[#A]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT:     cir.yield %[[#D]] : !s32i
+  // CHECK-NEXT:   }) : (!cir.bool) -> !s32i
+  // CHECK-NEXT:   cir.return %[[#B]] : !s32i
+  // CHECK-NEXT: }
+}
diff --git a/clang/test/CIR/Incubator/Transforms/ternary.cir b/clang/test/CIR/Incubator/Transforms/ternary.cir
new file mode 100644
index 0000000000000..0c22268495697
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/ternary.cir
@@ -0,0 +1,64 @@
+// RUN: cir-opt %s -cir-flatten-cfg -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @foo(%arg0: !s32i) -> !s32i {
+    %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+    %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+    cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+    %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+    %3 = cir.const #cir.int<0> : !s32i
+    %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+    %5 = cir.ternary(%4, true {
+      %7 = cir.const #cir.int<3> : !s32i
+      cir.yield %7 : !s32i
+    }, false {
+      %7 = cir.const #cir.int<5> : !s32i
+      cir.yield %7 : !s32i
+    }) : (!cir.bool) -> !s32i
+    cir.store %5, %1 : !s32i, !cir.ptr<!s32i>
+    %6 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+    cir.return %6 : !s32i
+  }
+
+// CHECK: cir.func @foo(%arg0: !s32i) -> !s32i {
+// CHECK:   %0 = cir.alloca !s32i, !cir.ptr<!s32i>, ["y", init] {alignment = 4 : i64}
+// CHECK:   %1 = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"] {alignment = 4 : i64}
+// CHECK:   cir.store %arg0, %0 : !s32i, !cir.ptr<!s32i>
+// CHECK:   %2 = cir.load %0 : !cir.ptr<!s32i>, !s32i
+// CHECK:   %3 = cir.const #cir.int<0> : !s32i
+// CHECK:   %4 = cir.cmp(gt, %2, %3) : !s32i, !cir.bool
+// CHECK:    cir.brcond %4 ^bb1, ^bb2
+// CHECK:  ^bb1:  // pred: ^bb0
+// CHECK:    %5 = cir.const #cir.int<3> : !s32i
+// CHECK:    cir.br ^bb3(%5 : !s32i)
+// CHECK:  ^bb2:  // pred: ^bb0
+// CHECK:    %6 = cir.const #cir.int<5> : !s32i
+// CHECK:    cir.br ^bb3(%6 : !s32i)
+// CHECK:  ^bb3(%7: !s32i):  // 2 preds: ^bb1, ^bb2
+// CHECK:    cir.store %7, %1 : !s32i, !cir.ptr<!s32i>
+// CHECK:    %8 = cir.load %1 : !cir.ptr<!s32i>, !s32i
+// CHECK:    cir.return %8 : !s32i
+// CHECK:  }
+
+  cir.func @foo2(%arg0: !cir.bool) {
+    cir.ternary(%arg0, true {
+      cir.yield
+    }, false {
+      cir.yield
+    }) : (!cir.bool) -> ()
+    cir.return
+  }
+
+// CHECK: cir.func @foo2(%arg0: !cir.bool) {
+// CHECK:   cir.brcond %arg0 ^bb1, ^bb2
+// CHECK: ^bb1:  // pred: ^bb0
+// CHECK:   cir.br ^bb3
+// CHECK: ^bb2:  // pred: ^bb0
+// CHECK:   cir.br ^bb3
+// CHECK: ^bb3:  // 2 preds: ^bb1, ^bb2
+// CHECK:   cir.return
+// CHECK: }
+
+}
diff --git a/clang/test/CIR/Incubator/Transforms/vector-cmp-fold.cir b/clang/test/CIR/Incubator/Transforms/vector-cmp-fold.cir
new file mode 100644
index 0000000000000..1839d8363f572
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/vector-cmp-fold.cir
@@ -0,0 +1,227 @@
+// RUN: cir-opt %s -cir-canonicalize -o - -split-input-file | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.cmp(eq, %vec_1, %vec_2) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i,
+  // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.cmp(ne, %vec_1, %vec_2) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i,
+  // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.cmp(lt, %vec_1, %vec_2) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i,
+  // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.cmp(le, %vec_1, %vec_2) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i,
+  // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.cmp(gt, %vec_1, %vec_2) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i,
+  // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.cmp(gt, %vec_1, %vec_2) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i,
+  // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00>
+      : !cir.float, #cir.fp<3.000000e+00> : !cir.float, #cir.fp<4.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.fp<5.000000e+00> : !cir.float, #cir.fp<6.000000e+00>
+      : !cir.float, #cir.fp<7.000000e+00> : !cir.float, #cir.fp<8.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %new_vec = cir.vec.cmp(eq, %vec_1, %vec_2) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i,
+  // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00>
+      : !cir.float, #cir.fp<3.000000e+00> : !cir.float, #cir.fp<4.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.fp<5.000000e+00> : !cir.float, #cir.fp<6.000000e+00>
+      : !cir.float, #cir.fp<7.000000e+00> : !cir.float, #cir.fp<8.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %new_vec = cir.vec.cmp(ne, %vec_1, %vec_2) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i,
+  // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00>
+      : !cir.float, #cir.fp<3.000000e+00> : !cir.float, #cir.fp<4.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.fp<5.000000e+00> : !cir.float, #cir.fp<6.000000e+00>
+      : !cir.float, #cir.fp<7.000000e+00> : !cir.float, #cir.fp<8.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %new_vec = cir.vec.cmp(lt, %vec_1, %vec_2) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i,
+  // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00>
+      : !cir.float, #cir.fp<3.000000e+00> : !cir.float, #cir.fp<4.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.fp<5.000000e+00> : !cir.float, #cir.fp<6.000000e+00>
+      : !cir.float, #cir.fp<7.000000e+00> : !cir.float, #cir.fp<8.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %new_vec = cir.vec.cmp(le, %vec_1, %vec_2) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<1> : !s32i,
+  // CHECK-SAME: #cir.int<1> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00>
+      : !cir.float, #cir.fp<3.000000e+00> : !cir.float, #cir.fp<4.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.fp<5.000000e+00> : !cir.float, #cir.fp<6.000000e+00>
+      : !cir.float, #cir.fp<7.000000e+00> : !cir.float, #cir.fp<8.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %new_vec = cir.vec.cmp(gt, %vec_1, %vec_2) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i,
+  // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.fp<1.000000e+00> : !cir.float, #cir.fp<2.000000e+00>
+      : !cir.float, #cir.fp<3.000000e+00> : !cir.float, #cir.fp<4.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.fp<5.000000e+00> : !cir.float, #cir.fp<6.000000e+00>
+      : !cir.float, #cir.fp<7.000000e+00> : !cir.float, #cir.fp<8.000000e+00> : !cir.float]> : !cir.vector<!cir.float x 4>
+    %new_vec = cir.vec.cmp(ge, %vec_1, %vec_2) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_cmp_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<0> : !s32i, #cir.int<0> : !s32i,
+  // CHECK-SAME: #cir.int<0> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
diff --git a/clang/test/CIR/Incubator/Transforms/vector-create-fold.cir b/clang/test/CIR/Incubator/Transforms/vector-create-fold.cir
new file mode 100644
index 0000000000000..65bc51d8c36c4
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/vector-create-fold.cir
@@ -0,0 +1,19 @@
+// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_create_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %2 = cir.const #cir.int<1> : !s32i
+    %3 = cir.const #cir.int<2> : !s32i
+    %4 = cir.const #cir.int<3> : !s32i
+    %5 = cir.const #cir.int<4> : !s32i
+    %vec = cir.vec.create(%2, %3, %4, %5 : !s32i, !s32i, !s32i, !s32i) : !cir.vector<!s32i x 4>
+    cir.return %vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK:  cir.func @fold_create_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[VEC:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i,
+  // CHECK-SAME: #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[VEC]] : !cir.vector<!s32i x 4>
+}
diff --git a/clang/test/CIR/Incubator/Transforms/vector-extract-fold.cir b/clang/test/CIR/Incubator/Transforms/vector-extract-fold.cir
new file mode 100644
index 0000000000000..1e4703756627d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/vector-extract-fold.cir
@@ -0,0 +1,33 @@
+// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @fold_extract_vector_op_test() {
+    %init = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
+    %const_vec = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+    %index = cir.const #cir.int<1> : !s32i
+    %ele = cir.vec.extract %const_vec[%index : !s32i] : !cir.vector<!s32i x 4>
+    cir.store %ele, %init : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+
+  // CHECK: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
+  // CHECK: %[[VALUE:.*]] = cir.const #cir.int<2> : !s32i
+  // CHECK: cir.store %[[VALUE]], %[[INIT]] : !s32i, !cir.ptr<!s32i>
+
+  cir.func @fold_extract_vector_op_index_out_of_bounds_test() {
+    %init = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
+    %const_vec = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+    %index = cir.const #cir.int<9> : !s32i
+    %ele = cir.vec.extract %const_vec[%index : !s32i] : !cir.vector<!s32i x 4>
+    cir.store %ele, %init : !s32i, !cir.ptr<!s32i>
+    cir.return
+  }
+
+  // CHECK: %[[INIT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["e", init]
+  // CHECK: %[[CONST_VEC:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK: %[[INDEX:.*]] = cir.const #cir.int<9> : !s32i
+  // CHECK: %[[ELE:.*]] = cir.vec.extract %[[CONST_VEC]][%[[INDEX]] : !s32i] : !cir.vector<!s32i x 4>
+  // CHECK: cir.store %[[ELE]], %[[INIT]] : !s32i, !cir.ptr<!s32i>
+}
diff --git a/clang/test/CIR/Incubator/Transforms/vector-shuffle-dynamic-fold.cir b/clang/test/CIR/Incubator/Transforms/vector-shuffle-dynamic-fold.cir
new file mode 100644
index 0000000000000..9725e3f162416
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/vector-shuffle-dynamic-fold.cir
@@ -0,0 +1,30 @@
+// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @fold_shuffle_dynamic_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+    %indices = cir.const #cir.const_vector<[#cir.int<8> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i, #cir.int<5> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<!s32i x 4>, %indices : !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // Masking indices [8, 7, 6, 5] AND 3 = [0, 3, 2, 1]
+  // CHECK: cir.func @fold_shuffle_dynamic_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[NEW_VEC:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[NEW_VEC:.*]] : !cir.vector<!s32i x 4>
+
+  cir.func @fold_shuffle_dynamic_vector_op_test_2() -> !cir.vector<!s32i x 4> {
+    %vec = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+    %indices = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<!s32i x 4>, %indices : !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // Masking indices [3, 2, 1, 0] AND 3 = [3, 2, 1, 0]
+  // CHECK: cir.func @fold_shuffle_dynamic_vector_op_test_2() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[NEW_VEC:.*]] = cir.const #cir.const_vector<[#cir.int<4> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<1> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[NEW_VEC:.*]] : !cir.vector<!s32i x 4>
+}
+
diff --git a/clang/test/CIR/Incubator/Transforms/vector-shuffle.fold.cir b/clang/test/CIR/Incubator/Transforms/vector-shuffle.fold.cir
new file mode 100644
index 0000000000000..9c9a473d8ec5d
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/vector-shuffle.fold.cir
@@ -0,0 +1,59 @@
+// RUN: cir-opt %s -cir-canonicalize -o - -split-input-file | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module  {
+  cir.func @fold_shuffle_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.shuffle(%vec_1, %vec_2 : !cir.vector<!s32i x 4>) [#cir.int<0> : !s64i, #cir.int<4> : !s64i,
+      #cir.int<1> : !s64i, #cir.int<5> : !s64i] : !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK: cir.func @fold_shuffle_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i,
+  // CHECK-SAME: #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module  {
+  cir.func @fold_shuffle_vector_op_test() -> !cir.vector<!s32i x 6> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.shuffle(%vec_1, %vec_2 : !cir.vector<!s32i x 4>) [#cir.int<0> : !s64i, #cir.int<4> : !s64i,
+      #cir.int<1> : !s64i, #cir.int<5> : !s64i, #cir.int<2> : !s64i, #cir.int<6> : !s64i] : !cir.vector<!s32i x 6>
+    cir.return %new_vec : !cir.vector<!s32i x 6>
+  }
+
+  // CHECK: cir.func @fold_shuffle_vector_op_test() -> !cir.vector<!s32i x 6> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i,
+  // CHECK-SAME: #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i]> : !cir.vector<!s32i x 6>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 6>
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+
+module  {
+  cir.func @fold_shuffle_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %vec_1 = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<7> : !s32i]> : !cir.vector<!s32i x 4>
+    %vec_2 = cir.const #cir.const_vector<[#cir.int<2> : !s32i, #cir.int<4> : !s32i, #cir.int<6> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %new_vec = cir.vec.shuffle(%vec_1, %vec_2 : !cir.vector<!s32i x 4>) [#cir.int<-1> : !s64i, #cir.int<4> : !s64i,
+      #cir.int<1> : !s64i, #cir.int<5> : !s64i] : !cir.vector<!s32i x 4>
+    cir.return %new_vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK: cir.func @fold_shuffle_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK: cir.const #cir.const_vector<[#cir.undef : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i,
+  // CHECK-SAME: #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
diff --git a/clang/test/CIR/Incubator/Transforms/vector-splat.cir b/clang/test/CIR/Incubator/Transforms/vector-splat.cir
new file mode 100644
index 0000000000000..76195c8a289ef
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/vector-splat.cir
@@ -0,0 +1,16 @@
+// RUN: cir-opt %s -cir-simplify -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module  {
+  cir.func @fold_splat_vector_op_test() -> !cir.vector<!s32i x 4> {
+    %v = cir.const #cir.int<3> : !s32i
+    %vec = cir.vec.splat %v : !s32i, !cir.vector<!s32i x 4>
+    cir.return %vec : !cir.vector<!s32i x 4>
+  }
+
+  // CHECK: cir.func @fold_splat_vector_op_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %0 = cir.const #cir.const_vector<[#cir.int<3> : !s32i, #cir.int<3> : !s32i,
+  // CHECK-SAME: #cir.int<3> : !s32i, #cir.int<3> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %0 : !cir.vector<!s32i x 4>
+}
diff --git a/clang/test/CIR/Incubator/Transforms/vector-ternary-fold.cir b/clang/test/CIR/Incubator/Transforms/vector-ternary-fold.cir
new file mode 100644
index 0000000000000..49ee195ad5455
--- /dev/null
+++ b/clang/test/CIR/Incubator/Transforms/vector-ternary-fold.cir
@@ -0,0 +1,19 @@
+// RUN: cir-opt %s -cir-canonicalize -o - | FileCheck %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @vector_ternary_fold_test() -> !cir.vector<!s32i x 4> {
+    %cond = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i]> : !cir.vector<!s32i x 4>
+    %lhs = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i]> : !cir.vector<!s32i x 4>
+    %rhs = cir.const #cir.const_vector<[#cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+    %res = cir.vec.ternary(%cond, %lhs, %rhs) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4>
+    cir.return %res : !cir.vector<!s32i x 4>
+  }
+
+  // [1, 0, 1, 0] ? [1, 2, 3, 4] : [5, 6, 7, 8] Will be fold to [1, 6, 3, 8]
+  // CHECK: cir.func @vector_ternary_fold_test() -> !cir.vector<!s32i x 4> {
+  // CHECK-NEXT: %[[RES:.*]] = cir.const #cir.const_vector<[#cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i, #cir.int<8> : !s32i]> : !cir.vector<!s32i x 4>
+  // CHECK-NEXT: cir.return %[[RES]] : !cir.vector<!s32i x 4>
+}
+
diff --git a/clang/test/CIR/Incubator/analysis-only.cpp b/clang/test/CIR/Incubator/analysis-only.cpp
new file mode 100644
index 0000000000000..7dc58250b91bd
--- /dev/null
+++ b/clang/test/CIR/Incubator/analysis-only.cpp
@@ -0,0 +1,2 @@
+// RUN: %clang %s -fclangir-analysis-only -### -c %s 2>&1 | FileCheck %s
+// CHECK: "-fclangir-analysis-only"
diff --git a/clang/test/CIR/Incubator/cc1.c b/clang/test/CIR/Incubator/cc1.c
new file mode 100644
index 0000000000000..5461562e09f69
--- /dev/null
+++ b/clang/test/CIR/Incubator/cc1.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -emit-mlir=core %s -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm-bc %s -o %t.bc
+// RUN: llvm-dis %t.bc -o %t.bc.ll
+// RUN: FileCheck --input-file=%t.bc.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -S %s -o %t.s
+// RUN: FileCheck --input-file=%t.s %s -check-prefix=ASM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-obj %s -o %t.o
+// RUN: llvm-objdump -d %t.o | FileCheck %s -check-prefix=OBJ
+
+void foo() {}
+
+//      MLIR: func.func @foo() {
+// MLIR-NEXT:   return
+// MLIR-NEXT: }
+
+//      LLVM: define dso_local void @foo()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+
+//      ASM: .globl  foo
+// ASM-NEXT: .p2align
+// ASM-NEXT: .type foo, at function
+// ASM-NEXT: foo:
+//      ASM: retq
+
+// OBJ: 0: c3 retq
diff --git a/clang/test/CIR/Incubator/cc1.cir b/clang/test/CIR/Incubator/cc1.cir
new file mode 100644
index 0000000000000..9ea923faff0c3
--- /dev/null
+++ b/clang/test/CIR/Incubator/cc1.cir
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=LLVM
+
+module {
+  cir.func @foo() {
+    cir.return
+  }
+}
+
+//      LLVM: define void @foo()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
diff --git a/clang/test/CIR/Incubator/cir-output.c b/clang/test/CIR/Incubator/cir-output.c
new file mode 100644
index 0000000000000..8f7963425b0e4
--- /dev/null
+++ b/clang/test/CIR/Incubator/cir-output.c
@@ -0,0 +1,21 @@
+// RUN: cp %s %t.c
+// RUN: %clang -fclangir -Werror -fcir-output=%t.explicit.cir -c %t.c
+// RUN: FileCheck %s --input-file=%t.explicit.cir --check-prefix=CIR
+// RUN: rm -f %t.cir
+// RUN: %clang -fclangir -Werror -fcir-output %t.c -c -o %t.obj
+// RUN: FileCheck %s --input-file=%t.cir --check-prefix=CIR
+
+// NYI on invalid C++ ABI kind
+// XFAIL: host={{.*}}-windows-msvc
+
+struct S {
+  int x;
+};
+
+int foo(void) {
+  struct S s = {42};
+  return s.x;
+}
+
+// CIR: module
+// CIR: cir.func{{.*}}@foo
diff --git a/clang/test/CIR/Incubator/cirtool.cir b/clang/test/CIR/Incubator/cirtool.cir
new file mode 100644
index 0000000000000..c68e2dd649d2c
--- /dev/null
+++ b/clang/test/CIR/Incubator/cirtool.cir
@@ -0,0 +1,20 @@
+// RUN: cir-opt %s -cir-to-mlir -cir-mlir-to-llvm -o %t.mlir
+// RUN: FileCheck --input-file=%t.mlir %s -check-prefix=MLIR
+// RUN: mlir-translate -mlir-to-llvmir %t.mlir -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM
+
+module {
+  cir.func dso_local @foo() {
+    cir.return
+  }
+}
+
+//      LLVM: define void @foo()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+
+//      MLIR: module {
+// MLIR-NEXT:   llvm.func @foo() {
+// MLIR-NEXT:     llvm.return
+// MLIR-NEXT:   }
+// MLIR-NEXT: }
diff --git a/clang/test/CIR/Incubator/crashes/apvalue-constexpr-init.cpp b/clang/test/CIR/Incubator/crashes/apvalue-constexpr-init.cpp
new file mode 100644
index 0000000000000..b171756968831
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/apvalue-constexpr-init.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// APValue emission not implemented - assertion failure
+// Location: CIRGenExprConst.cpp:2075
+//
+// Original failure: assertion_apvalue from LLVM build
+// Reduced from /tmp/MSP430AttributeParser-875bbc.cpp
+
+template <typename a, int b> struct c {
+  typedef a d[b];
+};
+template <typename a, int b> struct h {
+  c<a, b>::d e;
+};
+enum f { g };
+class i {
+  struct m {
+    f j;
+    int (i::*k)(f);
+  };
+  static const h<m, 4> l;
+  int n(f);
+};
+constexpr h<i::m, 4> i::l{g, &i::n, {}, {}, {}};
diff --git a/clang/test/CIR/Incubator/crashes/array-new-default-arg.cpp b/clang/test/CIR/Incubator/crashes/array-new-default-arg.cpp
new file mode 100644
index 0000000000000..04d6eebed8b65
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/array-new-default-arg.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR crashes when using array new with default constructor arguments.
+//
+// Array new requires calling the constructor for each element. When the
+// constructor has default arguments, CIR's lowering fails with:
+//   'cir.const' op operation destroyed but still has uses
+//   fatal error: error in backend: operation destroyed but still has uses
+//
+// The issue is in how CIR handles default argument values when generating
+// the loop to initialize array elements.
+//
+// This affects any array new expression where the class has a constructor
+// with default parameters.
+
+struct S {
+    int x;
+    S(int v = 0) : x(v) {}  // Default argument triggers the bug
+    ~S() {}
+};
+
+S* test_array_new() {
+    return new S[10];  // Crashes during lowering
+}
+
+// LLVM: Should generate array new
+// LLVM: define {{.*}} @_Z14test_array_newv()
+
+// OGCG: Should generate array new with cookie and element loop
+// OGCG: define {{.*}} @_Z14test_array_newv()
+// OGCG: call {{.*}} @_Znam  // operator new[]
diff --git a/clang/test/CIR/Incubator/crashes/async-future.cpp b/clang/test/CIR/Incubator/crashes/async-future.cpp
new file mode 100644
index 0000000000000..4bff63df3fa2b
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/async-future.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -fcxx-exceptions -fexceptions
+// XFAIL: *
+//
+// std::async/std::future crashes - exception handling NYI
+// Related to exception system design
+
+#include <future>
+
+void test() {
+  auto f = std::async(std::launch::async, []{ return 42; });
+  int result = f.get();
+}
diff --git a/clang/test/CIR/Incubator/crashes/bitfield-bool-int-cast.cpp b/clang/test/CIR/Incubator/crashes/bitfield-bool-int-cast.cpp
new file mode 100644
index 0000000000000..104791e0db0d7
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/bitfield-bool-int-cast.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// Bitfield bool to int conversion - type cast assertion failure
+// Location: Casting.h:560
+
+struct a {
+  bool b : 1;
+};
+class c {
+public:
+  void operator<<(int);
+};
+void d(c e, a f) { e << f.b; }
diff --git a/clang/test/CIR/Incubator/crashes/cleanup-892-null-fixups.cpp b/clang/test/CIR/Incubator/crashes/cleanup-892-null-fixups.cpp
new file mode 100644
index 0000000000000..5a8d99c0bec8c
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/cleanup-892-null-fixups.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// Test for UNREACHABLE at CIRGenCleanup.cpp:892
+// Null fixups popping not yet implemented
+//
+// This test triggers the error:
+// "UNREACHABLE executed at CIRGenCleanup.cpp:892!"
+//
+// Original failure: cleanup_892 from LLVM build
+// Reduced from /tmp/Regex-8cd677.cpp
+
+inline namespace a {
+class c {
+public:
+  template <typename b> c(b);
+  ~c();
+};
+} // namespace a
+class d {
+  c e() const;
+};
+class aj {
+public:
+  ~aj();
+} an;
+c d::e() const {
+  aj ao;
+  return an;
+  c(0);
+}
diff --git a/clang/test/CIR/Incubator/crashes/cleanup-unreachable.cpp b/clang/test/CIR/Incubator/crashes/cleanup-unreachable.cpp
new file mode 100644
index 0000000000000..cebe995f2e7b4
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/cleanup-unreachable.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// Branch-through cleanups NYI
+// Location: CIRGenCleanup.cpp:527
+//
+// Original failure: cleanup_527 from LLVM build
+// Reduced from /tmp/MicrosoftDemangleNodes-acf44f.cpp
+
+class c {
+public:
+  ~c();
+};
+struct d {
+  template <typename> using ac = c;
+};
+struct e {
+  typedef d::ac<int> ae;
+};
+class f {
+public:
+  e::ae ak;
+  template <typename g> f(g, g);
+};
+struct h {
+  f i() const;
+};
+class j {
+public:
+  ~j();
+};
+f h::i() const {
+  j a;
+  f b(0, 0);
+  return b;
+}
diff --git a/clang/test/CIR/Incubator/crashes/computed-goto-nyi.cpp b/clang/test/CIR/Incubator/crashes/computed-goto-nyi.cpp
new file mode 100644
index 0000000000000..7fffc643da519
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/computed-goto-nyi.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR crashes when using computed goto (GNU extension).
+//
+// Computed goto allows taking the address of a label and jumping to it
+// dynamically. This is implemented via the AddrLabelExpr AST node.
+//
+// Currently, CIR crashes with:
+//   Assertion `0 && "NYI"' failed
+//   at CIRGenExprConst.cpp:1634 in ConstantLValueEmitter::VisitAddrLabelExpr
+//
+// The issue is that CIR's constant expression emitter doesn't handle
+// AddrLabelExpr (&&label syntax).
+//
+// This affects code using computed goto, which is common in interpreters,
+// state machines, and performance-critical dispatch code.
+
+int test_computed_goto(int x) {
+    void* labels[] = {&&label0, &&label1, &&label2};
+
+    if (x >= 0 && x <= 2)
+        goto *labels[x];
+    return -1;
+
+label0:
+    return 0;
+label1:
+    return 10;
+label2:
+    return 20;
+}
+
+// LLVM: Should generate indirectbr
+// LLVM: define {{.*}} @_Z18test_computed_gotoi({{.*}})
+
+// OGCG: Should use blockaddress and indirectbr
+// OGCG: define {{.*}} @_Z18test_computed_gotoi({{.*}})
+// OGCG: blockaddress(@_Z18test_computed_gotoi
+// OGCG: indirectbr
diff --git a/clang/test/CIR/Incubator/crashes/conditional-return-destructors.cpp b/clang/test/CIR/Incubator/crashes/conditional-return-destructors.cpp
new file mode 100644
index 0000000000000..ff84a1b53454a
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/conditional-return-destructors.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+//
+// XFAIL: *
+//
+// Issue: Conditional return with multiple destructors in different scopes
+//
+// When a function has:
+// - Multiple local variables with destructors in different scopes
+// - A conditional return statement
+// - Return value containing a member with a destructor
+// CIR fails to properly manage the cleanup scope stack for all destructors
+// that need to run on each exit path.
+
+class a {
+public:
+  ~a();
+};
+struct b {
+  a c;
+};
+b fn1(bool e) {
+  a d;
+  b f;
+  if (e) {
+    a d;
+    return f;
+  }
+}
diff --git a/clang/test/CIR/Incubator/crashes/constexpr-cast.cpp b/clang/test/CIR/Incubator/crashes/constexpr-cast.cpp
new file mode 100644
index 0000000000000..879d282f340cd
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/constexpr-cast.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// Constant expression NYI
+// Location: CIRGenExprConst.cpp:1006
+//
+// Original failure: exprconst_1006 from LLVM build
+// Reduced from /tmp/HexagonAttributeParser-40f1ed.cpp
+
+class a {
+public:
+  int b(unsigned);
+};
+class c : a {
+  struct d {
+    int (c::*e)(unsigned);
+  } static const f[];
+};
+const c::d c::f[]{&a::b};
diff --git a/clang/test/CIR/Incubator/crashes/constexpr-complex-template-metaprog.cpp b/clang/test/CIR/Incubator/crashes/constexpr-complex-template-metaprog.cpp
new file mode 100644
index 0000000000000..77af8e7af2d5a
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/constexpr-complex-template-metaprog.cpp
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+//
+// XFAIL: *
+//
+// Issue: Complex template metaprogramming with __builtin_is_constant_evaluated
+//
+// When using advanced template metaprogramming involving:
+// - Template aliases with variadic templates
+// - decltype expressions in template parameters
+// - __builtin_is_constant_evaluated() in constexpr context
+// - Complex member function template instantiation chains
+// CIR fails during constant expression evaluation or template instantiation.
+
+template <typename a> struct b {
+  typedef a c;
+};
+template <typename a> class d {
+public:
+  typedef a c;
+};
+template <typename a> using ad = d<a>::c;
+template <typename...> struct g;
+struct i {
+  template <typename, typename e> using f = decltype(e());
+  template <typename a, typename e> static b<ad<f<a, e>>> k(int);
+};
+template <typename h> struct l : i {
+  using c = decltype(k<int, h>(0));
+};
+template <typename j, typename h> struct g<j, h> : l<h>::c {};
+template <typename... a> using ah = g<a...>::c;
+class m;
+class n {
+  void o(m &) const;
+};
+template <typename = void> struct al;
+template <typename a> struct al<a *> {
+  void operator()(a *, a *) {
+    if (__builtin_is_constant_evaluated())
+      ;
+  }
+};
+template <> struct al<> {
+  template <typename a, typename e> void operator()(a p, e *p2) {
+    al<ah<a, e *>>{}(p, p2);
+  }
+};
+class q {
+  void *aq;
+  void *ar;
+  template <class au> void r(au, int, long) {
+    al a;
+    a(aq, ar);
+  }
+};
+template <typename> class s : q {
+  int az;
+  long ba;
+
+public:
+  void t() { r(this, az, ba); }
+};
+class m {
+  s<int> bd;
+
+public:
+  void m_fn5() { bd.t(); }
+};
+void n::o(m &p) const { p.m_fn5(); }
diff --git a/clang/test/CIR/Incubator/crashes/copy-on-catch.cpp b/clang/test/CIR/Incubator/crashes/copy-on-catch.cpp
new file mode 100644
index 0000000000000..9bf11f7a2408b
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/copy-on-catch.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir -fcxx-exceptions -fexceptions | FileCheck %s
+// XFAIL: *
+
+// CHECK: cir.func
+
+struct E {};
+E e;
+
+void throws() { throw e; }
+
+void bar() {
+  try {
+    throws();
+  } catch (E e) {
+  }
+}
diff --git a/clang/test/CIR/Incubator/crashes/dyncast-assertion.cpp b/clang/test/CIR/Incubator/crashes/dyncast-assertion.cpp
new file mode 100644
index 0000000000000..ae2359a27622f
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/dyncast-assertion.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+//
+// XFAIL: *
+//
+// Crash from LLVM build with ClangIR. Creduced from llvm::formLCSSAForInstructions
+//
+// Issue: dyn_cast assertion failure
+// Location: Casting.h:644
+// Error: Assertion `isa<X>(Val) && "cast<Ty>() argument of incompatible type!"`
+//
+// When initializing aggregate members in a constructor with template parameters,
+// CIR attempts an invalid cast operation.
+
+struct a {
+  template <typename b, typename c> a(b, c);
+};
+class d {
+  a e;
+
+public:
+  d(int) : e(0, 0) {}
+};
+void f() { static d g(0); }
diff --git a/clang/test/CIR/Incubator/crashes/exception-handling-nyi.cpp b/clang/test/CIR/Incubator/crashes/exception-handling-nyi.cpp
new file mode 100644
index 0000000000000..55e2e4f1a920a
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/exception-handling-nyi.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fcxx-exceptions -fexceptions %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcxx-exceptions -fexceptions %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR crashes when handling C++ exceptions (try/catch blocks).
+//
+// Exception handling requires:
+// - Generating personality functions
+// - Landing pads for catch blocks
+// - Invoke instructions instead of call for functions that may throw
+// - Exception object allocation and cleanup
+//
+// Currently, CIR crashes with:
+//   NYI
+//   UNREACHABLE executed at CIRGenItaniumCXXABI.cpp:814
+//   at emitBeginCatch
+//
+// This affects any code using try/catch/throw.
+
+struct Exception {
+    int code;
+    Exception(int c) : code(c) {}
+    ~Exception() {}
+};
+
+void may_throw() {
+    throw Exception(42);
+}
+
+int catch_exception() {
+    try {
+        may_throw();
+        return 0;
+    } catch (const Exception& e) {
+        return e.code;
+    }
+}
+
+// LLVM: Should generate exception handling code
+// LLVM: define {{.*}} @_Z15catch_exceptionv()
+
+// OGCG: Should use invoke and landing pads
+// OGCG: define {{.*}} @_Z15catch_exceptionv() {{.*}} personality
+// OGCG: invoke {{.*}} @_Z9may_throwv()
+// OGCG: landingpad
diff --git a/clang/test/CIR/Incubator/crashes/exception-ptr.cpp b/clang/test/CIR/Incubator/crashes/exception-ptr.cpp
new file mode 100644
index 0000000000000..d74e439017121
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/exception-ptr.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -fcxx-exceptions -fexceptions
+// XFAIL: *
+//
+// std::make_exception_ptr crashes - exception handling NYI
+// Related to exception system design
+
+#include <exception>
+#include <stdexcept>
+
+void test() {
+  std::exception_ptr ep = std::make_exception_ptr(std::runtime_error("test"));
+}
diff --git a/clang/test/CIR/Incubator/crashes/filesystem-sd-automatic.cpp b/clang/test/CIR/Incubator/crashes/filesystem-sd-automatic.cpp
new file mode 100644
index 0000000000000..d4e43acea55f4
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/filesystem-sd-automatic.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir -std=c++17
+// XFAIL: *
+//
+// std::filesystem with range-for crashes - SD_Automatic not implemented
+// Location: CIRGenExpr.cpp:2356
+
+#include <filesystem>
+
+void test() {
+  namespace fs = std::filesystem;
+
+  // This triggers SD_Automatic for the directory_iterator temporary
+  for (const auto& entry : fs::directory_iterator("/tmp")) {
+    auto path = entry.path();
+  }
+}
diff --git a/clang/test/CIR/Incubator/crashes/function-ref-pointer-params.cpp b/clang/test/CIR/Incubator/crashes/function-ref-pointer-params.cpp
new file mode 100644
index 0000000000000..0c382f751d19b
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/function-ref-pointer-params.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+//
+// XFAIL: *
+//
+// Issue: Function reference parameter with multiple pointer indirections
+//
+// When passing a reference to a function type as a parameter, where the function
+// signature contains multiple levels of pointer indirection in its parameters,
+// CIR fails during type lowering or function call code generation.
+
+const char *a;
+unsigned b;
+unsigned char c;
+void d(int (&e)(unsigned char *, unsigned *, char, const char **)) {
+  e(&c, &b, 0, &a);
+}
diff --git a/clang/test/CIR/Incubator/crashes/multi-inheritance-thunk-crash.cpp b/clang/test/CIR/Incubator/crashes/multi-inheritance-thunk-crash.cpp
new file mode 100644
index 0000000000000..aba04fae05f94
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/multi-inheritance-thunk-crash.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR crashes when generating thunks for multiple inheritance.
+//
+// Multiple inheritance requires generating thunks to adjust the 'this' pointer
+// when calling virtual functions through a base class pointer that is not the
+// primary base. The thunk adjusts 'this' and then calls the actual implementation.
+//
+// Currently, CIR crashes with:
+//   Assertion `isValid()' failed in Address::getPointer()
+//   at clang::CIRGen::CIRGenFunction::emitReturnOfRValue
+//
+// This affects any class using multiple inheritance with virtual functions.
+
+struct A {
+    virtual ~A() {}
+    virtual int foo() { return 1; }
+    int a;
+};
+
+struct B {
+    virtual ~B() {}
+    virtual int bar() { return 2; }
+    int b;
+};
+
+struct C : A, B {
+    int foo() override { return 3; }
+    int bar() override { return 4; }
+};
+
+C* make_c() {
+    return new C();
+}
+
+// LLVM: Should generate thunks for B's vtable in C
+// LLVM: define {{.*}} @_Z6make_cv()
+
+// OGCG: Should generate thunks for B's vtable in C
+// OGCG: define {{.*}} @_Z6make_cv()
+// OGCG: define {{.*}} @_ZThn{{[0-9]+}}_N1C3barEv
diff --git a/clang/test/CIR/Incubator/crashes/range-for-temp-automatic.cpp b/clang/test/CIR/Incubator/crashes/range-for-temp-automatic.cpp
new file mode 100644
index 0000000000000..9e579f94236cc
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/range-for-temp-automatic.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// SD_Automatic storage duration for reference temporaries in range-based for loops
+// Location: CIRGenExpr.cpp:2435
+
+template <typename> struct b;
+template <typename> struct f;
+template <typename c> struct f<c *> {
+  typedef c d;
+};
+template <typename e, typename> class j {
+public:
+  f<e>::d operator*();
+  void operator++();
+};
+template <typename e, typename g> bool operator!=(j<e, g>, j<e, g>);
+template <typename> class k;
+template <typename c> struct b<k<c>> {
+  using h = c *;
+};
+template <typename i> struct F {
+  typedef b<i>::h h;
+  ~F();
+};
+template <typename c, typename i = k<c>> class G : F<i> {
+public:
+  typedef j<typename F<i>::h, int> iterator;
+  iterator begin();
+  iterator end();
+};
+template <typename l> class m {
+public:
+  using n = l;
+  using o = n *;
+  using iterator = o;
+  iterator begin();
+  iterator end();
+};
+class p {
+public:
+  G<p *> u();
+  m<p *> r();
+} q;
+void s() {
+  m a = q.r();
+  for (p *v : a)
+    for (p *t : v->u())
+      ;
+}
diff --git a/clang/test/CIR/Incubator/crashes/ref-temp-automatic.cpp b/clang/test/CIR/Incubator/crashes/ref-temp-automatic.cpp
new file mode 100644
index 0000000000000..50784aeef0acc
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/ref-temp-automatic.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// SD_Automatic storage duration for reference temporaries not implemented
+// Location: CIRGenExpr.cpp:2356
+
+struct S {
+  S();
+  ~S();
+};
+
+S create();
+
+void f() {
+  auto&& s = create();
+}
diff --git a/clang/test/CIR/Incubator/crashes/static-init-recursion.cpp b/clang/test/CIR/Incubator/crashes/static-init-recursion.cpp
new file mode 100644
index 0000000000000..49811b1a82542
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/static-init-recursion.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+//
+// XFAIL: *
+//
+// Issue: Static local variable with recursive initialization
+//
+// When a static local variable is initialized by calling the function that
+// contains it, CIR fails during initialization code generation. This pattern
+// requires special guard variable handling to prevent infinite recursion at
+// runtime and detect the recursion during initialization.
+
+int a() { static int b = a(); }
diff --git a/clang/test/CIR/Incubator/crashes/static-local-destructor.cpp b/clang/test/CIR/Incubator/crashes/static-local-destructor.cpp
new file mode 100644
index 0000000000000..6bb575471b21f
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/static-local-destructor.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+//
+// XFAIL: *
+//
+// Crash from LLVM build with ClangIR
+//
+// Issue: Static local variable with destructor
+// Location: CIRGenDecl.cpp:616
+// Error: UNREACHABLE: NYI
+//
+// When a static local variable has a non-trivial destructor, CIR must
+// register the destructor to run at program exit. This is not yet implemented.
+
+class a {
+public:
+  ~a();
+};
+void b() { static a c; }
diff --git a/clang/test/CIR/Incubator/crashes/static-local-guard-nyi.cpp b/clang/test/CIR/Incubator/crashes/static-local-guard-nyi.cpp
new file mode 100644
index 0000000000000..3251810dae9e1
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/static-local-guard-nyi.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR crashes when handling static local variables with constructors.
+//
+// Static locals with non-trivial constructors require thread-safe initialization
+// using guard variables and the __cxa_guard_acquire/__cxa_guard_release ABI.
+//
+// Per the Itanium C++ ABI:
+// - A guard variable tracks initialization state
+// - __cxa_guard_acquire checks if already initialized (returns 0 if so)
+// - Constructor runs once
+// - __cxa_guard_release marks as initialized
+//
+// Currently, CIR crashes with:
+//   NYI: thread-safe guards with __cxa_guard_acquire/release
+//   UNREACHABLE executed at LoweringPrepare.cpp:938
+//   at lowerGuardedInitOp
+//
+// This affects any function with static local variables that have constructors.
+
+struct GlobalClass {
+    int value;
+    GlobalClass(int v) : value(v) {}
+    ~GlobalClass() {}
+};
+
+// Static local with constructor
+int get_static_local() {
+    static GlobalClass local(123);
+    return local.value;
+}
+
+// LLVM: Should have function definition
+// LLVM: define {{.*}} @_Z16get_static_localv()
+
+// OGCG: Should use guard variables and cxa_guard functions
+// OGCG: define {{.*}} @_Z16get_static_localv()
+// OGCG: call {{.*}} @__cxa_guard_acquire
+// OGCG: call {{.*}} @__cxa_guard_release
diff --git a/clang/test/CIR/Incubator/crashes/static-local-used-attribute.cpp b/clang/test/CIR/Incubator/crashes/static-local-used-attribute.cpp
new file mode 100644
index 0000000000000..71b7be741ba7d
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/static-local-used-attribute.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+//
+// XFAIL: *
+//
+// Issue: Static local variable with __used__ attribute
+//
+// When a function contains a static local variable with the __used__ attribute,
+// CIR fails to properly handle the attribute during code generation.
+// The __used__ attribute prevents the variable from being optimized away even
+// if it appears unused.
+
+void a() { __attribute__((__used__)) static void *b; }
diff --git a/clang/test/CIR/Incubator/crashes/static-var-dyn-cast.cpp b/clang/test/CIR/Incubator/crashes/static-var-dyn-cast.cpp
new file mode 100644
index 0000000000000..9ca2c610b43f9
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/static-var-dyn-cast.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// dyn_cast on non-existent value - assertion failure
+// Location: Casting.h:644
+//
+// Original failure: assertion_dyncast from LLVM build
+// Reduced from /tmp/FormattedStream-a19c5f.cpp
+
+struct a {
+  template <typename b, typename c> a(b, c);
+};
+class d {
+  a e;
+
+public:
+  d(int) : e(0, 0) {}
+};
+void f() { static d g(0); }
diff --git a/clang/test/CIR/Incubator/crashes/static-var-guarded-init.cpp b/clang/test/CIR/Incubator/crashes/static-var-guarded-init.cpp
new file mode 100644
index 0000000000000..45f42b1279065
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/static-var-guarded-init.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// Declaration handling NYI
+// Location: CIRGenDecl.cpp:616
+//
+// Original failure: decl_616 from LLVM build
+// Reduced from /tmp/MSFError-102e4d.cpp
+
+class a {
+public:
+  ~a();
+};
+void b() { static a c; }
diff --git a/clang/test/CIR/Incubator/crashes/template-syntax-error.cpp b/clang/test/CIR/Incubator/crashes/template-syntax-error.cpp
new file mode 100644
index 0000000000000..797e7996fd70e
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/template-syntax-error.cpp
@@ -0,0 +1,15 @@
+// RUN: not %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir 2>&1 | FileCheck %s
+//
+// This test captures malformed C++ that creduce generated.
+// It documents that our crash reproducer reduction process needs improvement.
+//
+// CHECK: error: expected
+//
+// Issue: Creduce produced syntactically invalid C++ during reduction
+//
+// This is a creduce artifact showing incomplete template syntax.
+// The original crash involved template metaprogramming, but creduce
+// reduced it too aggressively, producing invalid syntax.
+
+template <a> b() struct c {
+  c::b::
diff --git a/clang/test/CIR/Incubator/crashes/tls-destructor.cpp b/clang/test/CIR/Incubator/crashes/tls-destructor.cpp
new file mode 100644
index 0000000000000..353a9301edbf9
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/tls-destructor.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// thread_local with non-trivial destructor not implemented
+// Location: CIRGenCXX.cpp:264
+// Note: Simple TLS works; only destructors are NYI
+
+#include <string>
+
+thread_local std::string tls_string = "hello";
+
+int test() {
+  return tls_string.length();
+}
diff --git a/clang/test/CIR/Incubator/crashes/verification-block-terminator.cpp b/clang/test/CIR/Incubator/crashes/verification-block-terminator.cpp
new file mode 100644
index 0000000000000..73d973a30db91
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/verification-block-terminator.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// XFAIL: *
+//
+// CIR module verification error before passes
+// Location: Module verification
+//
+// Original failure: verification_error from LLVM build
+// Reduced from /tmp/Errno-48253a.cpp
+
+inline namespace a {
+class b {
+public:
+  ~b();
+};
+} // namespace a
+b c() {
+  b d;
+  if (0)
+    return d;
+}
diff --git a/clang/test/CIR/Incubator/crashes/virtual-base-constructor.cpp b/clang/test/CIR/Incubator/crashes/virtual-base-constructor.cpp
new file mode 100644
index 0000000000000..fb9f1f17c8764
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/virtual-base-constructor.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+//
+// XFAIL: *
+//
+// Issue: Virtual base class constructor call
+//
+// When a derived class constructor explicitly calls a virtual base class constructor,
+// CIR fails during code generation. Virtual base class constructors require special
+// handling as they are initialized by the most derived class, not intermediate classes.
+
+class a {};
+class b : virtual a {};
+class c : b {
+public:
+  c() : b() {}
+};
+void d() { c e; }
diff --git a/clang/test/CIR/Incubator/crashes/virtual-inheritance-crash.cpp b/clang/test/CIR/Incubator/crashes/virtual-inheritance-crash.cpp
new file mode 100644
index 0000000000000..bf0f595e110aa
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/virtual-inheritance-crash.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR crashes when handling virtual inheritance with thunks.
+//
+// Virtual inheritance requires:
+// - VTT (Virtual Table Table) for construction
+// - Virtual base pointer adjustments in thunks
+// - vtable offset lookups for dynamic adjustment
+//
+// Currently, CIR crashes with:
+//   Virtual adjustment NYI - requires vtable offset lookup
+//   UNREACHABLE executed at CIRGenItaniumCXXABI.cpp:2203
+//   at performTypeAdjustment during thunk generation
+//
+// This affects any class hierarchy using virtual inheritance.
+
+struct Base {
+    virtual ~Base() {}
+    int b;
+};
+
+struct A : virtual Base {
+    int a;
+};
+
+struct B : virtual Base {
+    int b;
+};
+
+struct C : A, B {
+    int c;
+};
+
+C* make_c() {
+    return new C();
+}
+
+// LLVM: Should generate class with virtual inheritance
+// LLVM: define {{.*}} @_Z6make_cv()
+
+// OGCG: Should generate VTT and virtual base thunks
+// OGCG: define {{.*}} @_Z6make_cv()
+// OGCG: @_ZTT1C = {{.*}} VTT for C
diff --git a/clang/test/CIR/Incubator/crashes/virtual-method-global-dtor.cpp b/clang/test/CIR/Incubator/crashes/virtual-method-global-dtor.cpp
new file mode 100644
index 0000000000000..7b6b6ff4b1065
--- /dev/null
+++ b/clang/test/CIR/Incubator/crashes/virtual-method-global-dtor.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+//
+// XFAIL: *
+//
+// Issue: Global variable with virtual method and destructor
+//
+// When a global variable has both:
+// - A non-trivial destructor requiring registration for cleanup at program exit
+// - A virtual method requiring vtable generation
+// CIR fails to properly coordinate the vtable setup with destructor registration.
+
+class a {
+public:
+  ~a();
+  virtual char b();
+} c;
diff --git a/clang/test/CIR/Incubator/divergences/array-new-delete-divergences.cpp b/clang/test/CIR/Incubator/divergences/array-new-delete-divergences.cpp
new file mode 100644
index 0000000000000..2ea8d59c4a6dc
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/array-new-delete-divergences.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Array new/delete divergences:
+// 1. Missing nobuiltin attribute on operator new[] and operator delete[]
+// 2. Missing allocsize(0) attribute on operator new[]
+// 3. Missing null check before delete[]
+// 4. Missing inbounds on getelementptr
+// 5. Missing noundef/nonnull on function declarations
+//
+// CodeGen:
+//   declare noundef nonnull ptr @_Znam(i64 noundef) #1
+//   ; Function Attrs: nobuiltin allocsize(0)
+//
+//   declare void @_ZdaPv(ptr noundef) #2
+//   ; Function Attrs: nobuiltin nounwind
+//
+//   %isnull = icmp eq ptr %arr, null
+//   br i1 %isnull, label %delete.end, label %delete.notnull
+//   delete.notnull:
+//     call void @_ZdaPv(ptr noundef %arr)
+//
+//   %arrayidx = getelementptr inbounds i32, ptr %arr, i64 0
+//
+// CIR:
+//   declare ptr @_Znam(i64)  (missing noundef, nonnull, nobuiltin, allocsize)
+//   declare void @_ZdaPv(ptr)  (missing noundef, nobuiltin, nounwind)
+//
+//   call void @_ZdaPv(ptr %arr)  (no null check)
+//
+//   %arrayidx = getelementptr i32, ptr %arr, i64 0  (missing inbounds)
+
+// DIFF: -declare noundef nonnull ptr @_Znam(i64 noundef)
+// DIFF: +declare ptr @_Znam(i64)
+// DIFF: -; Function Attrs: nobuiltin allocsize(0)
+// DIFF: -declare void @_ZdaPv(ptr noundef)
+// DIFF: +declare void @_ZdaPv(ptr)
+// DIFF: -; Function Attrs: nobuiltin nounwind
+// DIFF: -%isnull = icmp eq ptr
+// DIFF: -br i1 %isnull
+// DIFF: -delete.notnull:
+// DIFF: getelementptr inbounds
+// DIFF: -getelementptr inbounds
+// DIFF: +getelementptr
+
+int test() {
+    int* arr = new int[10];
+    arr[0] = 42;
+    int result = arr[0];
+    delete[] arr;
+    return result;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-12byte-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-12byte-struct.cpp
new file mode 100644
index 0000000000000..b353817e6d926
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-12byte-struct.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// 12-byte structs (three ints) should be coerced to { i64, i32 } per x86_64 ABI.
+//
+// CodeGen coerces to two registers:
+//   define { i64, i32 } @return_three_ints()
+//
+// CIR returns the struct directly:
+//   define %struct.ThreeInts @return_three_ints()
+
+// DIFF: -define {{.*}} { i64, i32 } @{{.*}}return_three_ints
+// DIFF: +define {{.*}} %struct.ThreeInts @{{.*}}return_three_ints
+
+struct ThreeInts {
+    int a, b, c;  // 12 bytes total
+};
+
+ThreeInts return_three_ints() {
+    return {1, 2, 3};
+}
+
+int take_three_ints(ThreeInts s) {
+    return s.a + s.b + s.c;
+}
+
+int test() {
+    ThreeInts s = return_three_ints();
+    return take_three_ints(s);
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-16byte-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-16byte-struct.cpp
new file mode 100644
index 0000000000000..3e11172cc2a77
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-16byte-struct.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// 16-byte structs are at the boundary. On x86_64, they should be coerced to { i64, i64 }.
+//
+// CodeGen coerces to two i64 registers:
+//   define { i64, i64 } @return_four_ints()
+//
+// CIR returns the struct directly:
+//   define %struct.FourInts @return_four_ints()
+
+// DIFF: -define {{.*}} { i64, i64 } @{{.*}}return_four_ints
+// DIFF: +define {{.*}} %struct.FourInts @{{.*}}return_four_ints
+
+struct FourInts {
+    int a, b, c, d;  // 16 bytes - boundary case
+};
+
+FourInts return_four_ints() {
+    return {1, 2, 3, 4};
+}
+
+int take_four_ints(FourInts s) {
+    return s.a + s.b + s.c + s.d;
+}
+
+int test() {
+    FourInts s = return_four_ints();
+    return take_four_ints(s);
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-20byte-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-20byte-struct.cpp
new file mode 100644
index 0000000000000..3d8c31f90d2f6
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-20byte-struct.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Structs larger than 16 bytes need sret (structured return via hidden pointer).
+//
+// CodeGen uses sret:
+//   define void @return_five_ints(ptr sret(%struct.FiveInts) %result)
+//
+// CIR incorrectly returns by value:
+//   define %struct.FiveInts @return_five_ints()
+
+// DIFF: -define void @{{.*}}return_five_ints(ptr sret(%struct.FiveInts)
+// DIFF: +define {{.*}} %struct.FiveInts @{{.*}}return_five_ints()
+
+struct FiveInts {
+    int a, b, c, d, e;  // 20 bytes - over the limit
+};
+
+FiveInts return_five_ints() {
+    return {1, 2, 3, 4, 5};
+}
+
+int test() {
+    FiveInts s = return_five_ints();
+    return s.a + s.e;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-4byte-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-4byte-struct.cpp
new file mode 100644
index 0000000000000..8dee375aa9b16
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-4byte-struct.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// 4-byte structs should be coerced to i32 per x86_64 ABI.
+//
+// CodeGen correctly coerces to i32:
+//   define i32 @return_tiny()
+//
+// CIR incorrectly returns the struct:
+//   define %struct.TinyStruct @return_tiny()
+
+// DIFF: -define {{.*}} i32 @{{.*}}return_tiny
+// DIFF: +define {{.*}} %struct.TinyStruct @{{.*}}return_tiny
+
+struct TinyStruct {
+    int x;  // 4 bytes
+};
+
+TinyStruct return_tiny() {
+    return {42};
+}
+
+int take_tiny(TinyStruct s) {
+    return s.x;
+}
+
+int test() {
+    TinyStruct s = return_tiny();
+    return take_tiny(s);
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-aligned-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-aligned-struct.cpp
new file mode 100644
index 0000000000000..159bd49883827
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-aligned-struct.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Over-aligned structs (alignas > natural alignment) affect ABI classification.
+// This struct has only 4 bytes but requires 32-byte alignment.
+//
+// CodeGen handles alignment in calling convention
+// CIR may not properly handle over-aligned struct returns
+
+// DIFF: Check for alignment handling differences
+
+struct alignas(32) AlignedStruct {
+    int x;  // 4 bytes but 32-byte aligned
+};
+
+AlignedStruct return_aligned() {
+    return {42};
+}
+
+int test() {
+    AlignedStruct s = return_aligned();
+    return s.x;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-array-in-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-array-in-struct.cpp
new file mode 100644
index 0000000000000..5ffb9af08653d
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-array-in-struct.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Structs containing arrays should follow aggregate ABI rules.
+// This struct has 8 bytes (array of 2 ints) and should be coerced to i64.
+//
+// CodeGen coerces:
+//   define i64 @return_array_struct()
+//
+// CIR returns struct:
+//   define %struct.ArrayStruct @return_array_struct()
+
+// DIFF: -define {{.*}} i64 @{{.*}}return_array_struct
+// DIFF: +define {{.*}} %struct.ArrayStruct @{{.*}}return_array_struct
+
+struct ArrayStruct {
+    int arr[2];  // 8 bytes
+};
+
+ArrayStruct return_array_struct() {
+    return {{1, 2}};
+}
+
+int test() {
+    ArrayStruct s = return_array_struct();
+    return s.arr[0] + s.arr[1];
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-bitfield-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-bitfield-struct.cpp
new file mode 100644
index 0000000000000..56840c3f3978d
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-bitfield-struct.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Structs with bitfields have special layout and ABI classification.
+//
+// CodeGen handles bitfield packing in calling convention
+// CIR may have different bitfield struct handling
+
+// DIFF: Check for bitfield struct differences
+
+struct BitfieldStruct {
+    unsigned int a : 3;  // 3 bits
+    unsigned int b : 5;  // 5 bits
+    // Total: 1 byte with padding
+};
+
+BitfieldStruct return_bitfield() {
+    return {1, 2};
+}
+
+int test() {
+    BitfieldStruct s = return_bitfield();
+    return s.a + s.b;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-bool-in-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-bool-in-struct.cpp
new file mode 100644
index 0000000000000..f7bb26ae1d8b0
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-bool-in-struct.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Structs with bool are classified as INTEGER class.
+// This 8-byte struct should be coerced to i64.
+//
+// CodeGen coerces:
+//   define i64 @return_bool_struct()
+//
+// CIR returns struct:
+//   define %struct.BoolStruct @return_bool_struct()
+
+// DIFF: -define {{.*}} i64 @{{.*}}return_bool_struct
+// DIFF: +define {{.*}} %struct.BoolStruct @{{.*}}return_bool_struct
+
+struct BoolStruct {
+    bool b;    // 1 byte
+    int x;     // 4 bytes (with padding = 8 bytes total)
+};
+
+BoolStruct return_bool_struct() {
+    return {true, 42};
+}
+
+int test() {
+    BoolStruct s = return_bool_struct();
+    return s.b ? s.x : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-empty-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-empty-struct.cpp
new file mode 100644
index 0000000000000..531fd9304851c
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-empty-struct.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Empty structs have size 1 in C++ (cannot be zero-sized).
+// Per ABI, they may be ignored in parameter passing.
+//
+// CodeGen may omit or handle specially
+// CIR may treat as regular struct
+
+// DIFF: Check for empty struct handling
+
+struct EmptyStruct {};
+
+EmptyStruct return_empty() {
+    return {};
+}
+
+void take_empty(EmptyStruct s) {}
+
+int test() {
+    EmptyStruct s = return_empty();
+    take_empty(s);
+    return 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-longlong-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-longlong-struct.cpp
new file mode 100644
index 0000000000000..cfa346d3272b6
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-longlong-struct.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Structs with long long (8 bytes) should be coerced to i64.
+//
+// CodeGen coerces:
+//   define i64 @return_longlong()
+//
+// CIR returns struct:
+//   define %struct.LongLongStruct @return_longlong()
+
+// DIFF: -define {{.*}} i64 @{{.*}}return_longlong
+// DIFF: +define {{.*}} %struct.LongLongStruct @{{.*}}return_longlong
+
+struct LongLongStruct {
+    long long ll;  // 8 bytes
+};
+
+LongLongStruct return_longlong() {
+    return {123456789LL};
+}
+
+int test() {
+    LongLongStruct s = return_longlong();
+    return s.ll > 0 ? 1 : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-multiple-struct-params.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-multiple-struct-params.cpp
new file mode 100644
index 0000000000000..c1305067f91b3
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-multiple-struct-params.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Multiple struct parameters should each follow ABI rules.
+// Two 8-byte structs should each be coerced to i64.
+//
+// CodeGen coerces both:
+//   define i32 @take_two_structs(i64 %p1.coerce, i64 %p2.coerce)
+//
+// CIR passes structs directly:
+//   define i32 @take_two_structs(%struct.Pair %p1, %struct.Pair %p2)
+
+// DIFF: -define {{.*}} @{{.*}}take_two_structs(i64{{.*}}, i64
+// DIFF: +define {{.*}} @{{.*}}take_two_structs(%struct.Pair{{.*}}, %struct.Pair
+
+struct Pair {
+    int a, b;  // 8 bytes
+};
+
+int take_two_structs(Pair p1, Pair p2) {
+    return p1.a + p2.b;
+}
+
+int test() {
+    return take_two_structs({1, 2}, {3, 4});
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-nested-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-nested-struct.cpp
new file mode 100644
index 0000000000000..1a01f88a3db31
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-nested-struct.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Nested structs should follow the same ABI rules as flat structs.
+// This 16-byte nested struct should be coerced to { i64, i64 }.
+//
+// CodeGen coerces:
+//   define { i64, i64 } @return_nested()
+//
+// CIR returns struct directly:
+//   define %struct.Outer @return_nested()
+
+// DIFF: -define {{.*}} { i64, i64 } @{{.*}}return_nested
+// DIFF: +define {{.*}} %struct.Outer @{{.*}}return_nested
+
+struct Inner {
+    int x, y;  // 8 bytes
+};
+
+struct Outer {
+    Inner i1, i2;  // 16 bytes total
+};
+
+Outer return_nested() {
+    return {{1, 2}, {3, 4}};
+}
+
+int test() {
+    Outer o = return_nested();
+    return o.i1.x + o.i2.y;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-packed-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-packed-struct.cpp
new file mode 100644
index 0000000000000..151de22065cbc
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-packed-struct.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Packed structs have altered alignment which affects ABI classification.
+// This packed struct is 5 bytes and should be coerced differently.
+//
+// CodeGen may coerce or use sret depending on classification:
+//   (check specific LLVM IR)
+//
+// CIR returns struct:
+//   define %struct.PackedStruct @return_packed()
+
+// DIFF: -define {{.*}} @{{.*}}return_packed
+// DIFF: +define {{.*}} %struct.PackedStruct @{{.*}}return_packed
+
+struct __attribute__((packed)) PackedStruct {
+    char c;    // 1 byte
+    int i;     // 4 bytes, no padding - total 5 bytes packed
+};
+
+PackedStruct return_packed() {
+    return {1, 2};
+}
+
+int test() {
+    PackedStruct s = return_packed();
+    return s.i;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-pointer-in-struct.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-pointer-in-struct.cpp
new file mode 100644
index 0000000000000..e5dbe43ae3ea9
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-pointer-in-struct.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Structs containing pointers (8 bytes on x86_64) should be coerced to i64.
+//
+// CodeGen coerces:
+//   define i64 @return_ptr_struct()
+//
+// CIR returns struct:
+//   define %struct.PtrStruct @return_ptr_struct()
+
+// DIFF: -define {{.*}} i64 @{{.*}}return_ptr_struct
+// DIFF: +define {{.*}} %struct.PtrStruct @{{.*}}return_ptr_struct
+
+struct PtrStruct {
+    int* ptr;  // 8 bytes on x86_64
+};
+
+PtrStruct return_ptr_struct() {
+    static int x = 42;
+    return {&x};
+}
+
+int test() {
+    PtrStruct s = return_ptr_struct();
+    return *s.ptr;
+}
diff --git a/clang/test/CIR/Incubator/divergences/calling-conv-two-longlongs.cpp b/clang/test/CIR/Incubator/divergences/calling-conv-two-longlongs.cpp
new file mode 100644
index 0000000000000..59414af9dc7d2
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/calling-conv-two-longlongs.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Two long longs (16 bytes total) should be coerced to { i64, i64 }.
+//
+// CodeGen coerces:
+//   define { i64, i64 } @return_two_longlongs()
+//
+// CIR returns struct:
+//   define %struct.TwoLongLongs @return_two_longlongs()
+
+// DIFF: -define {{.*}} { i64, i64 } @{{.*}}return_two_longlongs
+// DIFF: +define {{.*}} %struct.TwoLongLongs @{{.*}}return_two_longlongs
+
+struct TwoLongLongs {
+    long long a, b;  // 16 bytes
+};
+
+TwoLongLongs return_two_longlongs() {
+    return {123LL, 456LL};
+}
+
+int test() {
+    TwoLongLongs s = return_two_longlongs();
+    return s.a + s.b > 0 ? 1 : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/ctor-copy.cpp b/clang/test/CIR/Incubator/divergences/ctor-copy.cpp
new file mode 100644
index 0000000000000..8452269aed87b
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/ctor-copy.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Copy constructor missing comdat.
+//
+// CodeGen:
+//   $_ZN1SC1ERKS_ = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN1SC1ERKS_ = comdat any
+
+struct S {
+    int x;
+    S(int val) : x(val) {}
+    S(const S& other) : x(other.x * 2) {}
+};
+
+int test() {
+    S s1(10);
+    S s2(s1);
+    return s2.x;
+}
diff --git a/clang/test/CIR/Incubator/divergences/ctor-deep-inheritance.cpp b/clang/test/CIR/Incubator/divergences/ctor-deep-inheritance.cpp
new file mode 100644
index 0000000000000..2e92da43bef5e
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/ctor-deep-inheritance.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Deep inheritance chain constructors missing comdat.
+//
+// CodeGen:
+//   $_ZN5ChildC1Ev = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN5ChildC1Ev = comdat any
+
+struct GrandParent {
+    int a = 1;
+};
+
+struct Parent : GrandParent {
+    int b = 2;
+};
+
+struct Child : Parent {
+    int c = 3;
+};
+
+int test() {
+    Child ch;
+    return ch.a + ch.b + ch.c;
+}
diff --git a/clang/test/CIR/Incubator/divergences/ctor-delegating.cpp b/clang/test/CIR/Incubator/divergences/ctor-delegating.cpp
new file mode 100644
index 0000000000000..41d956617af9c
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/ctor-delegating.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Delegating constructor missing comdat for both variants.
+//
+// CodeGen:
+//   $_ZN1SC1Ei = comdat any
+//   $_ZN1SC1Eii = comdat any
+//
+// CIR:
+//   Both missing comdat
+
+// DIFF: -$_ZN1SC1Ei = comdat any
+// DIFF: -$_ZN1SC1Eii = comdat any
+
+struct S {
+    int x, y;
+    S(int a) : S(a, a * 2) {}  // Delegating
+    S(int a, int b) : x(a), y(b) {}
+};
+
+int test() {
+    S s(5);
+    return s.x + s.y;
+}
diff --git a/clang/test/CIR/Incubator/divergences/ctor-inherited.cpp b/clang/test/CIR/Incubator/divergences/ctor-inherited.cpp
new file mode 100644
index 0000000000000..e05bb540e06bb
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/ctor-inherited.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Constructor inheritance missing comdat.
+//
+// CodeGen:
+//   $_ZN7DerivedC1Ei = comdat any (inherited)
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN7DerivedC1Ei = comdat any
+
+struct Base {
+    int x;
+    Base(int val) : x(val) {}
+};
+
+struct Derived : Base {
+    using Base::Base;  // Inherit constructors
+};
+
+int test() {
+    Derived d(42);
+    return d.x;
+}
diff --git a/clang/test/CIR/Incubator/divergences/ctor-member-init-list.cpp b/clang/test/CIR/Incubator/divergences/ctor-member-init-list.cpp
new file mode 100644
index 0000000000000..f91da3a7ec318
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/ctor-member-init-list.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Constructor with member initializer list missing comdat.
+//
+// CodeGen:
+//   $_ZN5OuterC1Ev = comdat any
+//   $_ZN5InnerC1Ei = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN5OuterC1Ev = comdat any
+
+struct Inner {
+    int val;
+    Inner(int v) : val(v) {}
+};
+
+struct Outer {
+    Inner i1, i2;
+    Outer() : i1(10), i2(20) {}
+};
+
+int test() {
+    Outer o;
+    return o.i1.val + o.i2.val;
+}
diff --git a/clang/test/CIR/Incubator/divergences/ctor-move.cpp b/clang/test/CIR/Incubator/divergences/ctor-move.cpp
new file mode 100644
index 0000000000000..a49e6f7ff5306
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/ctor-move.cpp
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Move constructor missing comdat.
+//
+// CodeGen:
+//   $_ZN1SC1EOS_ = comdat any
+//   $_ZN1SD1Ev = comdat any
+//
+// CIR:
+//   Both missing comdat
+
+// DIFF: -$_ZN1SC1EOS_ = comdat any
+// DIFF: -$_ZN1SD1Ev = comdat any
+
+struct S {
+    int* ptr;
+    S(int val) : ptr(new int(val)) {}
+    S(S&& other) : ptr(other.ptr) { other.ptr = nullptr; }
+    ~S() { delete ptr; }
+    int get() const { return ptr ? *ptr : 0; }
+};
+
+int test() {
+    S s1(42);
+    S s2(static_cast<S&&>(s1));
+    return s2.get();
+}
diff --git a/clang/test/CIR/Incubator/divergences/ctor-multiple-inheritance.cpp b/clang/test/CIR/Incubator/divergences/ctor-multiple-inheritance.cpp
new file mode 100644
index 0000000000000..26fa8c5bd7e94
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/ctor-multiple-inheritance.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Multiple inheritance constructors missing comdat.
+//
+// CodeGen:
+//   $_ZN1CC1Ev = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN1CC1Ev = comdat any
+
+struct A {
+    int a = 1;
+};
+
+struct B {
+    int b = 2;
+};
+
+struct C : A, B {
+    int c = 3;
+};
+
+int test() {
+    C obj;
+    return obj.a + obj.b + obj.c;
+}
diff --git a/clang/test/CIR/Incubator/divergences/ctor-parameterized.cpp b/clang/test/CIR/Incubator/divergences/ctor-parameterized.cpp
new file mode 100644
index 0000000000000..66fb43a073ae7
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/ctor-parameterized.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Parameterized constructor missing comdat.
+//
+// CodeGen:
+//   $_ZN1SC1Eii = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN1SC1Eii = comdat any
+
+struct S {
+    int x, y;
+    S(int a, int b) : x(a), y(b) {}
+};
+
+int test() {
+    S s(10, 20);
+    return s.x + s.y;
+}
diff --git a/clang/test/CIR/Incubator/divergences/float-double-struct.cpp b/clang/test/CIR/Incubator/divergences/float-double-struct.cpp
new file mode 100644
index 0000000000000..65c3333a7e8d7
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/float-double-struct.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Struct with double should be passed in XMM register per x86_64 ABI.
+//
+// CodeGen handles SSE classification:
+//   (check specific lowering)
+//
+// CIR may not handle correctly
+
+// DIFF: Check for double struct handling
+
+struct DoubleStruct {
+    double d;  // 8 bytes, SSE class
+};
+
+DoubleStruct return_double() {
+    return {3.14159};
+}
+
+int test() {
+    DoubleStruct s = return_double();
+    return s.d > 3.0 ? 1 : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/float-mixed-int-float.cpp b/clang/test/CIR/Incubator/divergences/float-mixed-int-float.cpp
new file mode 100644
index 0000000000000..0aa968309b5e5
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/float-mixed-int-float.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Mixed int and float struct has complex ABI classification.
+//
+// CodeGen handles mixed INTEGER/SSE classification:
+//   (check specific lowering - may be split across registers)
+//
+// CIR may not handle correctly
+
+// DIFF: Check for mixed int/float handling
+
+struct MixedStruct {
+    int i;      // INTEGER class
+    float f;    // SSE class
+    // Total 8 bytes
+};
+
+MixedStruct return_mixed() {
+    return {42, 3.14f};
+}
+
+int test() {
+    MixedStruct s = return_mixed();
+    return s.i;
+}
diff --git a/clang/test/CIR/Incubator/divergences/float-single-float-struct.cpp b/clang/test/CIR/Incubator/divergences/float-single-float-struct.cpp
new file mode 100644
index 0000000000000..6b54cdcf6a47f
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/float-single-float-struct.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Struct with single float should be passed in XMM register per x86_64 ABI.
+//
+// CodeGen handles SSE classification:
+//   (check specific lowering)
+//
+// CIR may not handle correctly
+
+// DIFF: Check for float struct handling
+
+struct FloatStruct {
+    float f;  // 4 bytes, SSE class
+};
+
+FloatStruct return_float() {
+    return {3.14f};
+}
+
+int test() {
+    FloatStruct s = return_float();
+    return s.f > 3.0f ? 1 : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/float-struct-calling-conv.cpp b/clang/test/CIR/Incubator/divergences/float-struct-calling-conv.cpp
new file mode 100644
index 0000000000000..3ee8d17dd419f
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/float-struct-calling-conv.cpp
@@ -0,0 +1,105 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Floating-point struct calling conventions diverge from CodeGen.
+//
+// Per the System V AMD64 ABI, structs containing only floating-point types
+// are classified as SSE and passed in XMM registers, with special handling.
+//
+// This affects:
+// - Structs with single float (should be in XMM register)
+// - Structs with double (should be in XMM register)
+// - Structs with two floats (should be in XMM registers)
+// - Mixed integer/float structs (different classification)
+//
+// Impact: May affect ABI compatibility for floating-point structs
+
+// DIFF: Check for differences in float/double struct handling
+
+// Single float struct
+struct FloatStruct {
+    float f;
+};
+
+FloatStruct return_float() {
+    return {3.14f};
+}
+
+int test_float() {
+    FloatStruct s = return_float();
+    return s.f > 3.0f ? 1 : 0;
+}
+
+// Single double struct
+struct DoubleStruct {
+    double d;
+};
+
+DoubleStruct return_double() {
+    return {3.14159};
+}
+
+int test_double() {
+    DoubleStruct s = return_double();
+    return s.d > 3.0 ? 1 : 0;
+}
+
+// Two floats
+struct TwoFloats {
+    float a, b;
+};
+
+TwoFloats return_two_floats() {
+    return {1.0f, 2.0f};
+}
+
+int test_two_floats() {
+    TwoFloats s = return_two_floats();
+    return s.a + s.b > 2.5f ? 1 : 0;
+}
+
+// Mixed int and float
+struct MixedStruct {
+    int i;
+    float f;
+};
+
+MixedStruct return_mixed() {
+    return {42, 3.14f};
+}
+
+int test_mixed() {
+    MixedStruct s = return_mixed();
+    return s.i;
+}
+
+// Three floats (larger than 16 bytes, needs sret)
+struct ThreeFloats {
+    float a, b, c;
+};
+
+ThreeFloats return_three_floats() {
+    return {1.0f, 2.0f, 3.0f};
+}
+
+int test_three_floats() {
+    ThreeFloats s = return_three_floats();
+    return s.a + s.b + s.c > 5.0f ? 1 : 0;
+}
+
+// Four floats (definitely needs sret)
+struct FourFloats {
+    float a, b, c, d;
+};
+
+FourFloats return_four_floats() {
+    return {1.0f, 2.0f, 3.0f, 4.0f};
+}
+
+int test_four_floats() {
+    FourFloats s = return_four_floats();
+    return s.a + s.b + s.c + s.d > 9.0f ? 1 : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/float-two-floats-struct.cpp b/clang/test/CIR/Incubator/divergences/float-two-floats-struct.cpp
new file mode 100644
index 0000000000000..aefee32bbfdbe
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/float-two-floats-struct.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Struct with two floats (8 bytes total) fits in one XMM register.
+//
+// CodeGen handles SSE classification:
+//   (check specific lowering)
+//
+// CIR may not handle correctly
+
+// DIFF: Check for two floats struct handling
+
+struct TwoFloats {
+    float a, b;  // 8 bytes, SSE class
+};
+
+TwoFloats return_two_floats() {
+    return {1.0f, 2.0f};
+}
+
+int test() {
+    TwoFloats s = return_two_floats();
+    return s.a + s.b > 2.5f ? 1 : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/global-constructor.cpp b/clang/test/CIR/Incubator/divergences/global-constructor.cpp
new file mode 100644
index 0000000000000..ee1b340fe3200
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/global-constructor.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Global object with constructor.
+//
+// CodeGen:
+//   @global_obj = global %struct.GlobalClass zeroinitializer
+//   @llvm.global_ctors for initialization
+//
+// CIR:
+//   Check for differences
+
+// DIFF: Check for global constructor handling
+
+struct GlobalClass {
+    int value;
+    GlobalClass(int v) : value(v) {}
+};
+
+GlobalClass global_obj(42);
+
+int test() {
+    return global_obj.value;
+}
diff --git a/clang/test/CIR/Incubator/divergences/inheritance-diamond.cpp b/clang/test/CIR/Incubator/divergences/inheritance-diamond.cpp
new file mode 100644
index 0000000000000..3b274c4d3f40a
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/inheritance-diamond.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Diamond inheritance (non-virtual) missing comdat.
+//
+// CodeGen:
+//   $_ZN7DiamondC1Ev = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN7DiamondC1Ev = comdat any
+
+struct Base {
+    int x = 1;
+};
+
+struct Left : Base {
+    int y = 2;
+};
+
+struct Right : Base {
+    int z = 3;
+};
+
+struct Diamond : Left, Right {
+    int w = 4;
+};
+
+int test() {
+    Diamond d;
+    return d.Left::x + d.Right::x + d.y + d.z + d.w;
+}
diff --git a/clang/test/CIR/Incubator/divergences/inheritance-empty-base.cpp b/clang/test/CIR/Incubator/divergences/inheritance-empty-base.cpp
new file mode 100644
index 0000000000000..edaf416cdb1fa
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/inheritance-empty-base.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Empty base optimization.
+//
+// CodeGen:
+//   $_ZN7DerivedC1Ev = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN7DerivedC1Ev = comdat any
+
+struct Empty {};
+
+struct Derived : Empty {
+    int x = 42;
+};
+
+int test() {
+    Derived d;
+    return d.x;
+}
diff --git a/clang/test/CIR/Incubator/divergences/inheritance-missing-comdat.cpp b/clang/test/CIR/Incubator/divergences/inheritance-missing-comdat.cpp
new file mode 100644
index 0000000000000..4801456d035ce
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/inheritance-missing-comdat.cpp
@@ -0,0 +1,108 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Inheritance-related functions are missing comdat groups.
+//
+// CodeGen generates comdat for:
+// - Constructors of derived classes
+// - Destructors of derived classes
+// - Base-to-derived conversions
+// - Virtual function overrides in derived classes
+//
+// CIR omits these comdat declarations.
+//
+// Impact: ODR violations possible with multiple inheritance hierarchies
+
+// DIFF: -$_ZN7DerivedC1Ev = comdat any
+// DIFF: -$_ZN7DerivedD1Ev = comdat any
+// DIFF: +# Missing comdat declarations
+
+// Simple single inheritance
+struct Base {
+    int x = 10;
+    virtual ~Base() {}
+};
+
+struct Derived : Base {
+    int y = 20;
+    ~Derived() override {}
+};
+
+int test_single_inheritance() {
+    Derived d;
+    return d.x + d.y;
+}
+
+// Multiple inheritance
+struct A {
+    int a = 1;
+    virtual ~A() {}
+};
+
+struct B {
+    int b = 2;
+    virtual ~B() {}
+};
+
+struct C : A, B {
+    int c = 3;
+    ~C() override {}
+};
+
+int test_multiple_inheritance() {
+    C obj;
+    return obj.a + obj.b + obj.c;
+}
+
+// Three levels of inheritance
+struct GrandParent {
+    int gp = 1;
+    virtual ~GrandParent() {}
+};
+
+struct Parent : GrandParent {
+    int p = 2;
+    ~Parent() override {}
+};
+
+struct Child : Parent {
+    int c = 3;
+    ~Child() override {}
+};
+
+int test_deep_inheritance() {
+    Child ch;
+    return ch.gp + ch.p + ch.c;
+}
+
+// Constructor inheritance
+struct BaseWithCtor {
+    int x;
+    BaseWithCtor(int val) : x(val) {}
+    virtual ~BaseWithCtor() {}
+};
+
+struct DerivedWithCtor : BaseWithCtor {
+    using BaseWithCtor::BaseWithCtor;
+    ~DerivedWithCtor() override {}
+};
+
+int test_ctor_inheritance() {
+    DerivedWithCtor d(42);
+    return d.x;
+}
+
+// Empty base optimization
+struct Empty {};
+
+struct DerivedFromEmpty : Empty {
+    int x = 42;
+};
+
+int test_empty_base() {
+    DerivedFromEmpty d;
+    return d.x;
+}
diff --git a/clang/test/CIR/Incubator/divergences/inheritance-private.cpp b/clang/test/CIR/Incubator/divergences/inheritance-private.cpp
new file mode 100644
index 0000000000000..665cc9c9140ea
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/inheritance-private.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Private inheritance constructor missing comdat.
+//
+// CodeGen:
+//   $_ZN7DerivedC1Ev = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN7DerivedC1Ev = comdat any
+
+struct Base {
+    int x = 42;
+};
+
+struct Derived : private Base {
+    int get_x() { return x; }
+};
+
+int test() {
+    Derived d;
+    return d.get_x();
+}
diff --git a/clang/test/CIR/Incubator/divergences/inheritance-protected.cpp b/clang/test/CIR/Incubator/divergences/inheritance-protected.cpp
new file mode 100644
index 0000000000000..a8b10b3af16a4
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/inheritance-protected.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Protected inheritance constructor missing comdat.
+//
+// CodeGen:
+//   $_ZN7DerivedC1Ev = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN7DerivedC1Ev = comdat any
+
+struct Base {
+    int x = 42;
+};
+
+struct Derived : protected Base {
+    int get_x() { return x; }
+};
+
+int test() {
+    Derived d;
+    return d.get_x();
+}
diff --git a/clang/test/CIR/Incubator/divergences/inline-ctor-dtor-missing-comdat.cpp b/clang/test/CIR/Incubator/divergences/inline-ctor-dtor-missing-comdat.cpp
new file mode 100644
index 0000000000000..aad1b4491ac05
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/inline-ctor-dtor-missing-comdat.cpp
@@ -0,0 +1,126 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Inline constructors and destructors are missing comdat group declarations.
+// This is a divergence from CodeGen's behavior.
+//
+// Per the Itanium C++ ABI:
+// - Inline functions with linkonce_odr linkage should be in comdat groups
+// - This allows the linker to merge duplicate definitions
+// - Prevents ODR violations across translation units
+//
+// CodeGen correctly generates comdat declarations:
+//   $_ZN1SC1Ev = comdat any
+//   $_ZN1SD1Ev = comdat any
+//   define linkonce_odr void @_ZN1SC1Ev(...) comdat
+//
+// CIR omits the comdat declarations and attribute:
+//   define linkonce_odr void @_ZN1SC1Ev(...)  // No comdat!
+//
+// This affects:
+// - All inline constructors (C1 and C2 variants)
+// - All inline destructors (D1 and D2 variants)
+// - Implicitly-defined constructors/destructors
+// - Defaulted constructors/destructors
+// - Delegating constructors
+//
+// Impact:
+// - May cause ODR violations with multiple translation units
+// - Linker cannot merge duplicate definitions
+// - Potential code bloat from duplicate definitions
+
+// DIFF: -$_ZN1SC1Ev = comdat any
+// DIFF: -$_ZN1SC2Ev = comdat any
+// DIFF: -$_ZN1SD1Ev = comdat any
+// DIFF: -$_ZN1SD2Ev = comdat any
+// DIFF: -define linkonce_odr {{.*}} @_ZN1SC1Ev{{.*}} comdat
+// DIFF: +define linkonce_odr {{.*}} @_ZN1SC1Ev
+
+struct S {
+    int x;
+
+    // Inline constructor
+    S() : x(42) {}
+
+    // Inline destructor
+    ~S() {}
+};
+
+int test_ctor_dtor() {
+    S s;
+    return s.x;
+}
+
+// Test with parameterized constructor
+struct WithParams {
+    int a, b;
+
+    // DIFF: -$_ZN10WithParamsC1Eii = comdat any
+    // DIFF: +# Missing comdat declaration
+
+    WithParams(int x, int y) : a(x), b(y) {}
+    ~WithParams() {}
+};
+
+int test_params() {
+    WithParams w(10, 20);
+    return w.a + w.b;
+}
+
+// Test with delegating constructor
+struct Delegating {
+    int x, y;
+
+    // DIFF: -$_ZN10DelegatingC1Ei = comdat any
+    // DIFF: -$_ZN10DelegatingC1Eii = comdat any
+
+    Delegating(int a) : Delegating(a, a * 2) {}
+    Delegating(int a, int b) : x(a), y(b) {}
+    ~Delegating() {}
+};
+
+int test_delegating() {
+    Delegating d(5);
+    return d.x + d.y;
+}
+
+// Test with copy constructor
+struct WithCopy {
+    int val;
+
+    // DIFF: -$_ZN8WithCopyC1Ei = comdat any
+    // DIFF: -$_ZN8WithCopyC1ERKS_ = comdat any
+
+    WithCopy(int v) : val(v) {}
+    WithCopy(const WithCopy& other) : val(other.val * 2) {}
+    ~WithCopy() {}
+};
+
+int test_copy() {
+    WithCopy w1(10);
+    WithCopy w2(w1);
+    return w2.val;
+}
+
+// Test with move constructor
+struct WithMove {
+    int* ptr;
+
+    // DIFF: -$_ZN8WithMoveC1Ei = comdat any
+    // DIFF: -$_ZN8WithMoveC1EOS_ = comdat any
+
+    WithMove(int v) : ptr(new int(v)) {}
+    WithMove(WithMove&& other) : ptr(other.ptr) { other.ptr = nullptr; }
+    ~WithMove() { delete ptr; }
+
+    int get() const { return ptr ? *ptr : 0; }
+};
+
+int test_move() {
+    WithMove w1(42);
+    WithMove w2(static_cast<WithMove&&>(w1));
+    return w2.get();
+}
diff --git a/clang/test/CIR/Incubator/divergences/lambda-capture-by-ref.cpp b/clang/test/CIR/Incubator/divergences/lambda-capture-by-ref.cpp
new file mode 100644
index 0000000000000..9a61184e79e3a
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/lambda-capture-by-ref.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Lambda with capture by reference missing comdat.
+//
+// CodeGen:
+//   $_ZZ4testvENK3$_0clEv = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZZ4testvENK3$_0clEv = comdat any
+
+int test() {
+    int x = 10;
+    auto f = [&x]() { x *= 2; return x; };
+    return f();
+}
diff --git a/clang/test/CIR/Incubator/divergences/lambda-capture-by-value.cpp b/clang/test/CIR/Incubator/divergences/lambda-capture-by-value.cpp
new file mode 100644
index 0000000000000..4b38849e884d1
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/lambda-capture-by-value.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Lambda with capture by value missing comdat.
+//
+// CodeGen:
+//   $_ZZ4testvENK3$_0clEv = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZZ4testvENK3$_0clEv = comdat any
+
+int test() {
+    int x = 10;
+    auto f = [x]() { return x * 2; };
+    return f();
+}
diff --git a/clang/test/CIR/Incubator/divergences/lambda-missing-comdat.cpp b/clang/test/CIR/Incubator/divergences/lambda-missing-comdat.cpp
new file mode 100644
index 0000000000000..e85d010b8b6d0
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/lambda-missing-comdat.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Lambda functions are missing comdat groups and other attributes.
+//
+// CodeGen generates comdat declarations for lambda operator():
+//   $_ZZ4testvENK3$_0clEv = comdat any
+//   define linkonce_odr i32 @_ZZ4testvENK3$_0clEv(...) comdat
+//
+// CIR omits comdat:
+//   define linkonce_odr i32 @_ZZ4testvENK3$_0clEv(...)  // No comdat
+//
+// This affects:
+// - Lambda operator() functions
+// - Lambda copy/move constructors
+// - Lambda destructors
+//
+// Impact: Potential ODR violations with multiple TUs
+
+// DIFF: -$_ZZ4testvENK3$_0clEv = comdat any
+// DIFF: -define linkonce_odr {{.*}} @_ZZ4testvENK3$_0clEv{{.*}} comdat
+// DIFF: +define linkonce_odr {{.*}} @_ZZ4testvENK3$_0clEv
+
+int test() {
+    auto f = []() { return 42; };
+    return f();
+}
+
+// Lambda with capture
+int test_capture() {
+    int x = 10;
+    auto f = [x]() { return x * 2; };
+    return f();
+}
+
+// Lambda taking parameters
+int test_params() {
+    auto f = [](int a, int b) { return a + b; };
+    return f(10, 20);
+}
+
+// Mutable lambda
+int test_mutable() {
+    int x = 10;
+    auto f = [x]() mutable { return ++x; };
+    return f();
+}
+
+// Lambda returning struct
+struct Result {
+    int value;
+};
+
+int test_struct_return() {
+    auto f = []() { return Result{42}; };
+    return f().value;
+}
diff --git a/clang/test/CIR/Incubator/divergences/lambda-mutable.cpp b/clang/test/CIR/Incubator/divergences/lambda-mutable.cpp
new file mode 100644
index 0000000000000..ad923050b54e7
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/lambda-mutable.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Mutable lambda missing comdat.
+//
+// CodeGen:
+//   $_ZZ4testvENK3$_0clEv = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZZ4testvENK3$_0clEv = comdat any
+
+int test() {
+    int x = 10;
+    auto f = [x]() mutable { return ++x; };
+    return f();
+}
diff --git a/clang/test/CIR/Incubator/divergences/lambda-returning-struct.cpp b/clang/test/CIR/Incubator/divergences/lambda-returning-struct.cpp
new file mode 100644
index 0000000000000..dbdc53f0b4cc2
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/lambda-returning-struct.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Lambda returning struct missing comdat and may have struct return ABI issues.
+//
+// CodeGen:
+//   $_ZZ4testvENK3$_0clEv = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZZ4testvENK3$_0clEv = comdat any
+
+struct Result {
+    int value;
+};
+
+int test() {
+    auto f = []() { return Result{42}; };
+    return f().value;
+}
diff --git a/clang/test/CIR/Incubator/divergences/lambda-simple.cpp b/clang/test/CIR/Incubator/divergences/lambda-simple.cpp
new file mode 100644
index 0000000000000..67eaf641aafc2
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/lambda-simple.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Simple lambda missing comdat.
+//
+// CodeGen:
+//   $_ZZ4testvENK3$_0clEv = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZZ4testvENK3$_0clEv = comdat any
+
+int test() {
+    auto f = []() { return 42; };
+    return f();
+}
diff --git a/clang/test/CIR/Incubator/divergences/lambda-with-params.cpp b/clang/test/CIR/Incubator/divergences/lambda-with-params.cpp
new file mode 100644
index 0000000000000..6adf6f9b1cb79
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/lambda-with-params.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Lambda with parameters missing comdat.
+//
+// CodeGen:
+//   $_ZZ4testvENK3$_0clEii = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZZ4testvENK3$_0clEii = comdat any
+
+int test() {
+    auto f = [](int a, int b) { return a + b; };
+    return f(10, 20);
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-abi-calling-conv.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-abi-calling-conv.cpp
new file mode 100644
index 0000000000000..4425c3e784d85
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-abi-calling-conv.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR uses incorrect calling convention for member function pointers.
+//
+// Member function pointers are represented as { i64, i64 } structs containing:
+// - Function pointer or vtable offset (first i64)
+// - This-pointer adjustment (second i64)
+//
+// Per the System V x86_64 ABI, small structs should be passed in registers
+// by decomposing them into separate scalar arguments.
+//
+// Current divergence:
+// CIR: define i32 @_Z15call_member_ptrP1SMS_FiiEi(ptr %0, { i64, i64 } %1, i32 %2)
+//      (passes struct directly)
+//
+// CodeGen: define i32 @_Z15call_member_ptrP1SMS_FiiEi(ptr %s, i64 %ptr.coerce0, i64 %ptr.coerce1, i32 %val)
+//          (decomposes struct into two i64 parameters)
+//
+// This breaks ABI compatibility when calling functions across TUs.
+
+struct S {
+    int x;
+    int foo(int y) { return x + y; }
+};
+
+int call_member_ptr(S* s, int (S::*ptr)(int), int val) {
+    return (s->*ptr)(val);
+}
+
+int test() {
+    S s;
+    s.x = 42;
+    return call_member_ptr(&s, &S::foo, 10);
+}
+
+// LLVM: Should decompose member pointer struct
+// LLVM: define {{.*}} i32 @_Z15call_member_ptrP1SMS_FiiEi(ptr {{.*}}, { i64, i64 } {{.*}}, i32 {{.*}})
+
+// OGCG: Should pass member pointer as two scalars
+// OGCG: define {{.*}} i32 @_Z15call_member_ptrP1SMS_FiiEi(ptr {{.*}} %s, i64 %ptr.coerce0, i64 %ptr.coerce1, i32 {{.*}} %val)
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-array.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-array.cpp
new file mode 100644
index 0000000000000..ba7596ce921db
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-array.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Array of member pointers.
+// Each element is {i64, i64} but array indexing and access still needs proper ABI.
+//
+// CodeGen handles array properly
+// CIR may have issues
+
+// DIFF: Check for member pointer array handling
+
+struct S {
+    int a, b, c;
+};
+
+int access_by_index(S* s, int index) {
+    int S::*ptrs[] = {&S::a, &S::b, &S::c};
+    return s->*ptrs[index];
+}
+
+int test() {
+    S s{10, 20, 30};
+    return access_by_index(&s, 1);
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-base-to-derived.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-base-to-derived.cpp
new file mode 100644
index 0000000000000..692bc8707e3de
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-base-to-derived.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Member pointer to base class member accessed via derived.
+//
+// CodeGen decomposes:
+//   define i32 @access_base_member(ptr %d, i64 %ptr.coerce0, i64 %ptr.coerce1)
+//
+// CIR passes as struct:
+//   define i32 @access_base_member(ptr %0, { i64, i64 } %1)
+
+// DIFF: -define {{.*}} @{{.*}}access_base_member(ptr{{.*}}, i64{{.*}}, i64
+// DIFF: +define {{.*}} @{{.*}}access_base_member(ptr{{.*}}, { i64, i64 }
+
+struct Base {
+    int x;
+};
+
+struct Derived : Base {
+    int y;
+};
+
+int access_base_member(Derived* d, int Base::*ptr) {
+    return d->*ptr;
+}
+
+int test() {
+    Derived d{{42}, 100};
+    return access_base_member(&d, &Base::x);
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-comparison.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-comparison.cpp
new file mode 100644
index 0000000000000..d9e8054782a6d
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-comparison.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Member pointer comparison.
+//
+// CodeGen decomposes both member pointers:
+//   define i1 @compare_member_ptrs(i64 %p1.coerce0, i64 %p1.coerce1, i64 %p2.coerce0, i64 %p2.coerce1)
+//
+// CIR passes as structs:
+//   define i1 @compare_member_ptrs({ i64, i64 } %0, { i64, i64 } %1)
+
+// DIFF: -define {{.*}} @{{.*}}compare_member_ptrs(i64{{.*}}, i64{{.*}}, i64{{.*}}, i64
+// DIFF: +define {{.*}} @{{.*}}compare_member_ptrs({ i64, i64 }{{.*}}, { i64, i64 }
+
+struct S {
+    int x, y;
+};
+
+bool compare_member_ptrs(int S::*p1, int S::*p2) {
+    return p1 == p2;
+}
+
+int test() {
+    return compare_member_ptrs(&S::x, &S::x) ? 1 : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-const-method.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-const-method.cpp
new file mode 100644
index 0000000000000..b410ecbf3bd1a
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-const-method.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Pointer to const member function.
+//
+// CodeGen decomposes:
+//   define i32 @call_const_method(ptr %s, i64 %ptr.coerce0, i64 %ptr.coerce1)
+//
+// CIR passes as struct:
+//   define i32 @call_const_method(ptr %0, { i64, i64 } %1)
+
+// DIFF: -define {{.*}} @{{.*}}call_const_method(ptr{{.*}}, i64{{.*}}, i64
+// DIFF: +define {{.*}} @{{.*}}call_const_method(ptr{{.*}}, { i64, i64 }
+
+struct S {
+    int get() const { return 42; }
+};
+
+int call_const_method(const S* s, int (S::*ptr)() const) {
+    return (s->*ptr)();
+}
+
+int test() {
+    S s;
+    return call_const_method(&s, &S::get);
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-data-member.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-data-member.cpp
new file mode 100644
index 0000000000000..c2fb2641b1113
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-data-member.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Pointer to data member calling convention.
+// Member pointers are {i64, i64} and should be decomposed per x86_64 ABI.
+//
+// CodeGen decomposes:
+//   define i32 @access_member(ptr %s, i64 %ptr.coerce0, i64 %ptr.coerce1)
+//
+// CIR passes as struct:
+//   define i32 @access_member(ptr %0, { i64, i64 } %1)
+
+// DIFF: -define {{.*}} @{{.*}}access_member(ptr{{.*}}, i64{{.*}}, i64
+// DIFF: +define {{.*}} @{{.*}}access_member(ptr{{.*}}, { i64, i64 }
+
+struct S {
+    int x, y;
+};
+
+int access_member(S* s, int S::*ptr) {
+    return s->*ptr;
+}
+
+int test() {
+    S s{1, 2};
+    return access_member(&s, &S::x);
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-multiple-inheritance.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-multiple-inheritance.cpp
new file mode 100644
index 0000000000000..61454b8a7e592
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-multiple-inheritance.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Pointer to member function with multiple inheritance.
+// Member function pointers need offset adjustment for non-first base.
+//
+// CodeGen decomposes {i64, i64}:
+//   define i32 @access_b(ptr %c, i64 %ptr.coerce0, i64 %ptr.coerce1)
+//
+// CIR passes as struct:
+//   define i32 @access_b(ptr %0, { i64, i64 } %1)
+
+// DIFF: -define {{.*}} @{{.*}}access_b(ptr{{.*}}, i64{{.*}}, i64
+// DIFF: +define {{.*}} @{{.*}}access_b(ptr{{.*}}, { i64, i64 }
+
+struct A { int a; };
+struct B { int b; };
+struct C : A, B { int c; };
+
+int access_b(C* c, int B::*ptr) {
+    return c->*ptr;
+}
+
+int test() {
+    C c{{1}, {2}, 3};
+    return access_b(&c, &B::b);
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-null.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-null.cpp
new file mode 100644
index 0000000000000..32e6280dc04bd
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-null.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Null member pointer should be {0, 0}.
+//
+// CodeGen decomposes:
+//   define i1 @test_null(i64 %ptr.coerce0, i64 %ptr.coerce1)
+//
+// CIR passes as struct:
+//   define i1 @test_null({ i64, i64 } %0)
+
+// DIFF: -define {{.*}} @{{.*}}test_null(i64{{.*}}, i64
+// DIFF: +define {{.*}} @{{.*}}test_null({ i64, i64 }
+
+struct S {
+    int x;
+};
+
+bool test_null(int S::*ptr) {
+    return ptr == nullptr;
+}
+
+int test() {
+    return test_null(nullptr) ? 1 : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-overloaded-function.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-overloaded-function.cpp
new file mode 100644
index 0000000000000..b03067eba1a11
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-overloaded-function.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Pointer to overloaded member function requires explicit type cast.
+//
+// CodeGen decomposes:
+//   define i32 @call_overloaded(ptr %s, i64 %ptr.coerce0, i64 %ptr.coerce1)
+//
+// CIR passes as struct:
+//   define i32 @call_overloaded(ptr %0, { i64, i64 } %1)
+
+// DIFF: -define {{.*}} @{{.*}}call_overloaded(ptr{{.*}}, i64{{.*}}, i64
+// DIFF: +define {{.*}} @{{.*}}call_overloaded(ptr{{.*}}, { i64, i64 }
+
+struct S {
+    int foo(int x) { return x; }
+    int foo(double x) { return static_cast<int>(x); }
+};
+
+int call_overloaded(S* s, int (S::*ptr)(int)) {
+    return (s->*ptr)(42);
+}
+
+int test() {
+    S s;
+    return call_overloaded(&s, static_cast<int (S::*)(int)>(&S::foo));
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-returning-struct.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-returning-struct.cpp
new file mode 100644
index 0000000000000..3f1dbc907bf6e
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-returning-struct.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Member function pointer returning struct.
+// Both member pointer AND return value have calling convention issues.
+//
+// CodeGen decomposes member pointer and may coerce return:
+//   (check specific signature)
+//
+// CIR has both issues
+
+// DIFF: Check for both issues
+
+struct Result {
+    int value;
+};
+
+struct Worker {
+    Result compute() { return {42}; }
+};
+
+Result call_worker(Worker* w, Result (Worker::*ptr)()) {
+    return (w->*ptr)();
+}
+
+int test() {
+    Worker w;
+    return call_worker(&w, &Worker::compute).value;
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-stored-in-struct.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-stored-in-struct.cpp
new file mode 100644
index 0000000000000..689ba8c6ab9d3
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-stored-in-struct.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Member pointer stored as struct member.
+//
+// CodeGen still decomposes when passing/returning:
+//   (check for decomposition in invoke())
+//
+// CIR passes as struct
+
+// DIFF: Check for member pointer calling convention
+
+struct Callback {
+    int (Callback::*method)();
+    int impl() { return 42; }
+    int invoke() { return (this->*method)(); }
+};
+
+int test() {
+    Callback cb;
+    cb.method = &Callback::impl;
+    return cb.invoke();
+}
diff --git a/clang/test/CIR/Incubator/divergences/member-ptr-virtual-function.cpp b/clang/test/CIR/Incubator/divergences/member-ptr-virtual-function.cpp
new file mode 100644
index 0000000000000..eb66e4e9a3692
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/member-ptr-virtual-function.cpp
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Pointer to virtual member function.
+// Virtual member pointers encode vtable offset and need special handling.
+//
+// CodeGen decomposes:
+//   define i32 @call_virtual_ptr(ptr %b, i64 %ptr.coerce0, i64 %ptr.coerce1)
+//
+// CIR passes as struct:
+//   define i32 @call_virtual_ptr(ptr %0, { i64, i64 } %1)
+
+// DIFF: -define {{.*}} @{{.*}}call_virtual_ptr(ptr{{.*}}, i64{{.*}}, i64
+// DIFF: +define {{.*}} @{{.*}}call_virtual_ptr(ptr{{.*}}, { i64, i64 }
+
+struct Base {
+    virtual int foo() { return 1; }
+};
+
+struct Derived : Base {
+    int foo() override { return 2; }
+};
+
+int call_virtual_ptr(Base* b, int (Base::*ptr)()) {
+    return (b->*ptr)();
+}
+
+int test() {
+    Derived d;
+    return call_virtual_ptr(&d, &Base::foo);
+}
diff --git a/clang/test/CIR/Incubator/divergences/missing-llvm-attributes.cpp b/clang/test/CIR/Incubator/divergences/missing-llvm-attributes.cpp
new file mode 100644
index 0000000000000..b74c4f403b4aa
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/missing-llvm-attributes.cpp
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR does not generate several important LLVM attributes on functions and parameters.
+//
+// Missing attributes affect:
+// 1. Optimization opportunities (noundef, nonnull, dereferenceable allow more aggressive opts)
+// 2. Undefined behavior detection (noundef makes undef/poison values explicit violations)
+// 3. Memory safety analysis (nonnull, dereferenceable help catch null pointer bugs)
+// 4. Link-time optimization (unnamed_addr allows more merging)
+//
+// Current divergences:
+//
+// Parameter attributes:
+// - noundef: Parameter must not be undef or poison (helps catch UB)
+// - nonnull: Pointer parameter must not be null
+// - dereferenceable(N): Pointer must be dereferenceable for at least N bytes
+//
+// Function attributes:
+// - mustprogress: Function must make forward progress (no infinite loops without side effects)
+// - unnamed_addr: Function address is not semantically significant
+//
+// Missing metadata:
+// - Function attributes: min-legal-vector-width, target-features, stack-protector-buffer-size
+//
+// Impact: Medium - Reduces optimization quality and UB detection
+
+struct S {
+    int x;
+    S(int v) : x(v) {}
+    ~S() {}
+};
+
+// Test parameter attributes
+int process_struct(S* s, int value) {
+    return s->x + value;
+}
+
+// Test return and 'this' pointer attributes
+int S_get_value(S* s) {
+    return s->x;
+}
+
+// Test reference parameters
+void take_reference(const S& s) {
+}
+
+// LLVM: Missing noundef, nonnull, dereferenceable
+// LLVM: define {{.*}} i32 @_Z14process_structP1Si(ptr %0, i32 %1)
+// LLVM-NOT: noundef
+// LLVM-NOT: nonnull
+// LLVM-NOT: dereferenceable
+
+// OGCG: Should have all attributes
+// OGCG: define {{.*}} noundef i32 @_Z14process_structP1Si(ptr noundef %s, i32 noundef %value)
+// OGCG: define {{.*}} noundef i32 @_Z12S_get_valueP1S(ptr noundef %s)
+// OGCG: define {{.*}} void @_Z14take_referenceRK1S(ptr noundef nonnull align 4 dereferenceable(4) %s)
diff --git a/clang/test/CIR/Incubator/divergences/operator-missing-comdat.cpp b/clang/test/CIR/Incubator/divergences/operator-missing-comdat.cpp
new file mode 100644
index 0000000000000..5f37441314a3d
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/operator-missing-comdat.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Operator overloads are missing comdat groups.
+//
+// CodeGen generates comdat for inline operator overloads:
+//   $_ZN3IntplERKS_ = comdat any
+//   define linkonce_odr ... @_ZN3IntplERKS_(...) comdat
+//
+// CIR omits comdat:
+//   define linkonce_odr ... @_ZN3IntplERKS_(...)  // No comdat
+//
+// This affects all inline operator overloads:
+// - Arithmetic operators (+, -, *, /, etc.)
+// - Comparison operators (==, !=, <, >, etc.)
+// - Assignment operators (=, +=, -=, etc.)
+// - Subscript operator []
+// - Call operator ()
+// - Conversion operators
+//
+// Impact: Potential ODR violations in multi-TU programs
+
+// DIFF: -$_ZN3IntplERKS_ = comdat any
+// DIFF: -define linkonce_odr {{.*}} @_ZN3IntplERKS_{{.*}} comdat
+// DIFF: +define linkonce_odr {{.*}} @_ZN3IntplERKS_
+
+// Arithmetic operator
+struct Int {
+    int value;
+
+    Int operator+(const Int& other) const {
+        return {value + other.value};
+    }
+
+    Int operator-(const Int& other) const {
+        return {value - other.value};
+    }
+};
+
+int test_arithmetic() {
+    Int a{10}, b{20};
+    Int c = a + b;
+    return c.value;
+}
+
+// Comparison operators
+struct Comparable {
+    int value;
+
+    bool operator==(const Comparable& other) const {
+        return value == other.value;
+    }
+
+    bool operator<(const Comparable& other) const {
+        return value < other.value;
+    }
+};
+
+int test_comparison() {
+    Comparable a{10}, b{20};
+    return a < b ? 1 : 0;
+}
+
+// Assignment operator
+struct Assignable {
+    int value;
+
+    Assignable& operator=(const Assignable& other) {
+        value = other.value;
+        return *this;
+    }
+};
+
+int test_assignment() {
+    Assignable a{10}, b{20};
+    a = b;
+    return a.value;
+}
+
+// Subscript operator
+struct Array {
+    int data[3] = {1, 2, 3};
+
+    int& operator[](int index) {
+        return data[index];
+    }
+
+    const int& operator[](int index) const {
+        return data[index];
+    }
+};
+
+int test_subscript() {
+    Array arr;
+    return arr[1];
+}
+
+// Call operator (functor)
+struct Adder {
+    int operator()(int a, int b) const {
+        return a + b;
+    }
+};
+
+int test_call() {
+    Adder add;
+    return add(10, 20);
+}
+
+// Conversion operator
+struct Convertible {
+    int value;
+
+    operator int() const {
+        return value;
+    }
+
+    operator bool() const {
+        return value != 0;
+    }
+};
+
+int test_conversion() {
+    Convertible c{42};
+    int x = c;  // Uses conversion operator
+    return x;
+}
diff --git a/clang/test/CIR/Incubator/divergences/rtti-dynamic-cast-downcast.cpp b/clang/test/CIR/Incubator/divergences/rtti-dynamic-cast-downcast.cpp
new file mode 100644
index 0000000000000..e81b603999328
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/rtti-dynamic-cast-downcast.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// dynamic_cast for downcasting missing type info comdat.
+//
+// CodeGen:
+//   $_ZTI4Base = comdat any
+//   $_ZTI7Derived = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZTI4Base = comdat any
+// DIFF: -$_ZTI7Derived = comdat any
+
+struct Base {
+    virtual ~Base() {}
+};
+
+struct Derived : Base {
+    int value = 42;
+};
+
+int test() {
+    Derived d;
+    Base* b = &d;
+    Derived* dp = dynamic_cast<Derived*>(b);
+    return dp ? dp->value : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/rtti-dynamic-cast-upcast.cpp b/clang/test/CIR/Incubator/divergences/rtti-dynamic-cast-upcast.cpp
new file mode 100644
index 0000000000000..e049a3c4bb7fd
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/rtti-dynamic-cast-upcast.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// dynamic_cast for upcasting missing type info comdat.
+//
+// CodeGen:
+//   $_ZTI4Base = comdat any
+//   $_ZTI7Derived = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZTI4Base = comdat any
+// DIFF: -$_ZTI7Derived = comdat any
+
+struct Base {
+    virtual ~Base() {}
+    int x = 10;
+};
+
+struct Derived : Base {
+    int y = 20;
+};
+
+int test() {
+    Derived d;
+    Base* b = dynamic_cast<Base*>(&d);
+    return b ? b->x : 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/rtti-linkage-gep.cpp b/clang/test/CIR/Incubator/divergences/rtti-linkage-gep.cpp
new file mode 100644
index 0000000000000..9406eb10db63e
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/rtti-linkage-gep.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR generates incorrect linkage and GEP types for RTTI type info structures.
+//
+// RTTI type info structures should be:
+// 1. Marked as linkonce_odr with comdat for proper ODR compliance
+// 2. Use consistent GEP indexing type (ptr-based not byte-based)
+//
+// Current divergences:
+// 1. CIR generates: @_ZTI7Derived = constant (missing linkonce_odr, comdat)
+//    CodeGen: @_ZTI7Derived = linkonce_odr constant ... comdat
+//
+// 2. CIR generates: getelementptr inbounds nuw (i8, ptr @..., i64 16)
+//    CodeGen: getelementptr inbounds (ptr, ptr @..., i64 2)
+//    These are semantically equivalent (2 ptrs = 16 bytes) but type differs
+//
+// This can cause linker errors and ODR violations in multi-TU programs.
+
+struct Base {
+    virtual ~Base() {}
+    virtual int get() { return 1; }
+};
+
+struct Derived : Base {
+    int get() override { return 2; }
+    int extra() { return 3; }
+};
+
+int test_dynamic_cast(Base* b) {
+    if (Derived* d = dynamic_cast<Derived*>(b)) {
+        return d->extra();
+    }
+    return 0;
+}
+
+// LLVM: Type info should have proper linkage
+// LLVM: @_ZTI7Derived = {{.*}}constant
+
+// OGCG: Type info should be linkonce_odr with comdat
+// OGCG: @_ZTI7Derived = linkonce_odr constant {{.*}} comdat
+// OGCG: getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2)
diff --git a/clang/test/CIR/Incubator/divergences/small-struct-coercion.cpp b/clang/test/CIR/Incubator/divergences/small-struct-coercion.cpp
new file mode 100644
index 0000000000000..9bb14c2149eb2
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/small-struct-coercion.cpp
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Small structs (8 bytes or less) should be coerced to integer types per x86_64 ABI.
+// This is a divergence from CodeGen's behavior.
+//
+// Per the System V AMD64 ABI:
+// - Structs up to 8 bytes containing only INTEGER class should be passed in one register
+// - The struct should be coerced to i64 for 8-byte structs
+// - This applies to both parameters and return values
+//
+// CodeGen correctly coerces small structs:
+//   define i64 @return_small()    // Returns i64, not struct
+//
+// CIR incorrectly returns the struct directly:
+//   define %struct.SmallStruct @return_small()   // Wrong!
+//
+// This affects:
+// - Struct returns (8 bytes or less)
+// - Struct parameters (8 bytes or less)
+// - Any struct containing two i32 fields (8 bytes total)
+// - Struct with single i64 field
+// - Struct with single double field (floating point class, different rules)
+//
+// Impact: ABI incompatibility between ClangIR-compiled and CodeGen-compiled code
+
+// DIFF: -define {{.*}} i64 @{{.*}}return_small
+// DIFF: +define {{.*}} %struct.SmallStruct @{{.*}}return_small
+
+struct SmallStruct {
+    int a, b;  // 8 bytes total
+};
+
+// Should return i64 per ABI, but CIR returns struct
+SmallStruct return_small() {
+    return {1, 2};
+}
+
+// Should take i64 parameter per ABI, but CIR takes struct
+int take_small(SmallStruct s) {
+    return s.a + s.b;
+}
+
+// Test with 4-byte struct (should be coerced to i32)
+struct TinyStruct {
+    int x;  // 4 bytes
+};
+
+// DIFF: -define {{.*}} i32 @{{.*}}return_tiny
+// DIFF: +define {{.*}} %struct.TinyStruct @{{.*}}return_tiny
+
+TinyStruct return_tiny() {
+    return {42};
+}
+
+// Test with struct containing long long (8 bytes, should be i64)
+struct LongStruct {
+    long long value;
+};
+
+// DIFF: -define {{.*}} i64 @{{.*}}return_long
+// DIFF: +define {{.*}} %struct.LongStruct @{{.*}}return_long
+
+LongStruct return_long() {
+    return {123456789LL};
+}
diff --git a/clang/test/CIR/Incubator/divergences/sret-abi-mismatch.cpp b/clang/test/CIR/Incubator/divergences/sret-abi-mismatch.cpp
new file mode 100644
index 0000000000000..d2e5d6d7ad263
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/sret-abi-mismatch.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR→LLVM lowering does not implement sret (struct return) calling convention.
+//
+// When returning non-trivial C++ types by value on x86_64, the System V ABI
+// requires using the sret calling convention:
+// - The function should return void
+// - A hidden first parameter (ptr sret) receives the return value address
+// - The caller allocates space for the return value
+//
+// Currently, CIR→LLVM lowering returns structs by value directly, which is
+// not ABI-compliant and will cause calling convention mismatches with code
+// compiled via standard CodeGen.
+//
+// This affects any function returning a non-trivial struct/class by value.
+
+struct S {
+  int x;
+  ~S();  // Non-trivial destructor makes this sret-eligible
+};
+
+S foo() {
+  S s;
+  s.x = 42;
+  return s;
+}
+
+// LLVM lowering incorrectly returns %struct.S by value
+// LLVM: define dso_local %struct.S @_Z3foov()
+// LLVM-NOT: sret
+
+// Original CodeGen correctly uses sret calling convention
+// OGCG: define dso_local void @_Z3foov(ptr {{.*}}sret(%struct.S){{.*}} %agg.result)
+// OGCG-NOT: define {{.*}} %struct.S @_Z3foov
+
+// Expected LLVM lowering (when fixed):
+// Should match OGCG: define dso_local void @_Z3foov(ptr sret(%struct.S) %agg.result)
diff --git a/clang/test/CIR/Incubator/divergences/static-inline-member.cpp b/clang/test/CIR/Incubator/divergences/static-inline-member.cpp
new file mode 100644
index 0000000000000..37cbfaf40236f
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/static-inline-member.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Inline static member (C++17).
+//
+// CodeGen:
+//   @_ZN1S5valueE = linkonce_odr global i32 42, comdat
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: - at _ZN1S5valueE = linkonce_odr global i32 42, comdat
+// DIFF: + at _ZN1S5valueE = linkonce_odr global i32 42
+
+struct S {
+    inline static int value = 42;
+};
+
+int test() {
+    return S::value;
+}
diff --git a/clang/test/CIR/Incubator/divergences/static-local-trivial.cpp b/clang/test/CIR/Incubator/divergences/static-local-trivial.cpp
new file mode 100644
index 0000000000000..8b26e92c3f0e0
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/static-local-trivial.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Static local with trivial initialization.
+//
+// CodeGen:
+//   @_ZZ10get_staticvE1x = internal global i32 42
+//
+// CIR:
+//   Same, but check for any divergences in access
+
+// DIFF: Check for static local handling
+
+int get_static() {
+    static int x = 42;
+    return x++;
+}
+
+int test() {
+    return get_static();
+}
diff --git a/clang/test/CIR/Incubator/divergences/static-member-variable.cpp b/clang/test/CIR/Incubator/divergences/static-member-variable.cpp
new file mode 100644
index 0000000000000..562cdb00a1ae0
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/static-member-variable.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Static member variable.
+//
+// CodeGen:
+//   @_ZN1S7counterE = global i32 100
+//
+// CIR:
+//   Check for differences
+
+// DIFF: Check for static member handling
+
+struct S {
+    static int counter;
+};
+
+int S::counter = 100;
+
+int test() {
+    return S::counter;
+}
diff --git a/clang/test/CIR/Incubator/divergences/template-class-instantiation.cpp b/clang/test/CIR/Incubator/divergences/template-class-instantiation.cpp
new file mode 100644
index 0000000000000..37ef068ead957
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/template-class-instantiation.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Template class instantiation missing comdat.
+//
+// CodeGen:
+//   $_ZN9ContainerIiEC1Ei = comdat any
+//   define linkonce_odr void @_ZN9ContainerIiEC1Ei(...) comdat
+//
+// CIR:
+//   define linkonce_odr void @_ZN9ContainerIiEC1Ei(...)  // No comdat
+
+// DIFF: -$_ZN9ContainerIiEC1Ei = comdat any
+// DIFF: -define linkonce_odr {{.*}} comdat
+// DIFF: +define linkonce_odr
+
+template<typename T>
+struct Container {
+    T value;
+    Container(T v) : value(v) {}
+    T get() const { return value; }
+};
+
+int test() {
+    Container<int> c(42);
+    return c.get();
+}
diff --git a/clang/test/CIR/Incubator/divergences/template-inheritance.cpp b/clang/test/CIR/Incubator/divergences/template-inheritance.cpp
new file mode 100644
index 0000000000000..9cfb4894bd1a5
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/template-inheritance.cpp
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Template inheritance missing comdat.
+//
+// CodeGen:
+//   $_ZN7DerivedC1Ei = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN7DerivedC1Ei = comdat any
+
+template<typename T>
+struct Base {
+    T value;
+    Base(T v) : value(v) {}
+};
+
+struct Derived : Base<int> {
+    Derived(int v) : Base<int>(v) {}
+};
+
+int test() {
+    Derived d(42);
+    return d.value;
+}
diff --git a/clang/test/CIR/Incubator/divergences/template-member-function.cpp b/clang/test/CIR/Incubator/divergences/template-member-function.cpp
new file mode 100644
index 0000000000000..f5bae27d0bf3d
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/template-member-function.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Member function template missing comdat.
+//
+// CodeGen:
+//   $_ZN1S7processIiEEiT_ = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN1S7processIiEEiT_ = comdat any
+
+struct S {
+    template<typename T>
+    int process(T value) {
+        return static_cast<int>(value);
+    }
+};
+
+int test() {
+    S s;
+    return s.process(42.5);
+}
diff --git a/clang/test/CIR/Incubator/divergences/template-missing-comdat.cpp b/clang/test/CIR/Incubator/divergences/template-missing-comdat.cpp
new file mode 100644
index 0000000000000..9a402a5c5deee
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/template-missing-comdat.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR generates template instantiations without comdat groups.
+//
+// Template instantiations should be marked with comdat groups to ensure
+// proper ODR (One Definition Rule) compliance when linking multiple TUs
+// that instantiate the same template.
+//
+// Current divergences:
+// 1. CIR: define linkonce_odr void @_ZN7WrapperIiEC1Ei(...)
+//    CodeGen: define linkonce_odr void @_ZN7WrapperIiEC1Ei(...) comdat
+//
+// 2. CIR: define linkonce_odr i32 @_Z3addIiET_S0_S0_(...)
+//    CodeGen: define linkonce_odr i32 @_Z3addIiET_S0_S0_(...) comdat
+//
+// Without comdat, the linker may fail to properly merge duplicate definitions,
+// leading to ODR violations, increased binary size, or linker errors.
+
+template<typename T>
+struct Wrapper {
+    T value;
+    Wrapper(T v) : value(v) {}
+    T get() const { return value; }
+};
+
+template<typename T>
+T add(T a, T b) {
+    return a + b;
+}
+
+int test_templates() {
+    Wrapper<int> w(42);
+    return w.get() + add(1, 2);
+}
+
+// LLVM: Template instantiations exist
+// LLVM: define linkonce_odr {{.*}} @_ZN7WrapperIiEC1Ei
+// LLVM: define linkonce_odr {{.*}} @_Z3addIiET_S0_S0_
+
+// OGCG: Template instantiations should have comdat
+// OGCG: define linkonce_odr {{.*}} @_ZN7WrapperIiEC1Ei({{.*}}) {{.*}} comdat
+// OGCG: define linkonce_odr {{.*}} @_Z3addIiET_S0_S0_({{.*}}) {{.*}} comdat
diff --git a/clang/test/CIR/Incubator/divergences/template-multiple-type-params.cpp b/clang/test/CIR/Incubator/divergences/template-multiple-type-params.cpp
new file mode 100644
index 0000000000000..26b32e0b07711
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/template-multiple-type-params.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Template with multiple type parameters missing comdat.
+//
+// CodeGen:
+//   $_Z7processIiiEvT_T0_ = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_Z7processIiiEvT_T0_ = comdat any
+// DIFF: +# Missing comdat
+
+template<typename T, typename U>
+void process(T t, U u) {}
+
+int test() {
+    process(10, 20);
+    return 0;
+}
diff --git a/clang/test/CIR/Incubator/divergences/template-non-type-param.cpp b/clang/test/CIR/Incubator/divergences/template-non-type-param.cpp
new file mode 100644
index 0000000000000..071012e64f786
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/template-non-type-param.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Template with non-type parameter missing comdat.
+//
+// CodeGen:
+//   $_ZN5ArrayILi5EE4sizeEv = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN5ArrayILi5EE4sizeEv = comdat any
+
+template<int N>
+struct Array {
+    int data[N];
+    int size() const { return N; }
+};
+
+int test() {
+    Array<5> arr;
+    return arr.size();
+}
diff --git a/clang/test/CIR/Incubator/divergences/template-specialization.cpp b/clang/test/CIR/Incubator/divergences/template-specialization.cpp
new file mode 100644
index 0000000000000..87ecaf9586bd0
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/template-specialization.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Template specialization missing comdat.
+//
+// CodeGen generates comdat for specialization:
+//   $_ZN5ValueIiE3getEv = comdat any
+//
+// CIR missing comdat
+
+// DIFF: -$_ZN5ValueIiE3getEv = comdat any
+
+template<typename T>
+struct Value {
+    static int get() { return 1; }
+};
+
+template<>
+struct Value<int> {
+    static int get() { return 42; }
+};
+
+int test() {
+    return Value<int>::get();
+}
diff --git a/clang/test/CIR/Incubator/divergences/template-variadic.cpp b/clang/test/CIR/Incubator/divergences/template-variadic.cpp
new file mode 100644
index 0000000000000..59e44cff3c652
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/template-variadic.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Variadic template missing comdat.
+//
+// CodeGen:
+//   $_ZN7CounterIJidcEE5countE = comdat any
+//
+// CIR:
+//   Missing comdat
+
+// DIFF: -$_ZN7CounterIJidcEE5countE = comdat any
+
+template<typename... Args>
+struct Counter {
+    static constexpr int count = sizeof...(Args);
+};
+
+int test() {
+    return Counter<int, double, char>::count;
+}
diff --git a/clang/test/CIR/Incubator/divergences/thread-local-wrapper-missing.cpp b/clang/test/CIR/Incubator/divergences/thread-local-wrapper-missing.cpp
new file mode 100644
index 0000000000000..7ba4ba6fd01a3
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/thread-local-wrapper-missing.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM
+//
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --input-file=%t.og.ll %s --check-prefix=OGCG
+//
+// XFAIL: *
+//
+// CIR does not generate thread wrapper functions for thread-local variables.
+//
+// The Itanium C++ ABI requires thread wrapper functions for non-local
+// thread-local variables to properly initialize them on first access.
+// The wrapper is named __tls_wrapper_<mangled_name> or _ZTW<mangled_name>.
+//
+// Current divergence:
+// CIR: Does not generate @_ZTW7tls_var wrapper function
+// CodeGen: Generates weak_odr hidden ptr @_ZTW7tls_var() comdat
+//
+// Without the wrapper, thread-local initialization may not work correctly
+// when the variable is accessed from other translation units.
+
+thread_local int tls_var = 42;
+
+int get_tls() {
+    return tls_var;
+}
+
+void set_tls(int val) {
+    tls_var = val;
+}
+
+// LLVM: Should define functions
+// LLVM: define {{.*}} @_Z7get_tlsv()
+// LLVM: define {{.*}} @_Z7set_tlsi({{.*}})
+
+// OGCG: Should generate thread wrapper function
+// OGCG: define weak_odr hidden {{.*}} ptr @_ZTW7tls_var() {{.*}} comdat
+// OGCG: $_ZTW7tls_var = comdat any
diff --git a/clang/test/CIR/Incubator/divergences/unnecessary-temp-allocas.cpp b/clang/test/CIR/Incubator/divergences/unnecessary-temp-allocas.cpp
new file mode 100644
index 0000000000000..0a8f946861ac4
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/unnecessary-temp-allocas.cpp
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// CIR generates unnecessary temporary allocas for return values.
+// This is a code quality issue - the IR is more verbose than necessary.
+//
+// CodeGen directly returns values:
+//   %call = call i32 @foo()
+//   ret i32 %call
+//
+// CIR allocates a temporary, stores to it, then loads from it:
+//   %1 = alloca i32, i64 1, align 4
+//   %2 = call i32 @foo()
+//   store i32 %2, ptr %1, align 4
+//   %3 = load i32, ptr %1, align 4
+//   ret i32 %3
+//
+// This pattern appears in nearly all functions.
+// Impact: More verbose IR, extra instructions
+// Likely optimized away by later passes, but unnecessary
+
+// DIFF: -  %call = call {{.*}} @_Z3foov()
+// DIFF: -  ret i32 %call
+// DIFF: +  %{{[0-9]+}} = alloca i32
+// DIFF: +  %{{[0-9]+}} = call {{.*}} @_Z3foov()
+// DIFF: +  store i32 %{{[0-9]+}}, ptr %{{[0-9]+}}
+// DIFF: +  %{{[0-9]+}} = load i32, ptr %{{[0-9]+}}
+// DIFF: +  ret i32 %{{[0-9]+}}
+
+int foo() {
+    return 42;
+}
+
+int test() {
+    return foo();
+}
+
+// Also affects struct returns
+struct S {
+    int x, y;
+};
+
+S bar() {
+    return {1, 2};
+}
+
+S test_struct() {
+    return bar();
+}
+
+// And void functions with calls
+void baz() {}
+
+void test_void() {
+    baz();
+}
diff --git a/clang/test/CIR/Incubator/divergences/virtual-inheritance-vtt.cpp b/clang/test/CIR/Incubator/divergences/virtual-inheritance-vtt.cpp
new file mode 100644
index 0000000000000..b23fdd4f8be38
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/virtual-inheritance-vtt.cpp
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Virtual inheritance produces VTT (Virtual Table Table) with divergences:
+// 1. Missing comdat on VTT, vtable, type info
+// 2. Type info linkage: should be linkonce_odr constant with comdat, but is just constant
+// 3. Missing unnamed_addr on vtables
+// 4. Missing inrange annotations on GEP instructions in VTT
+// 5. String constants missing null terminators
+//
+// CodeGen:
+//   $_ZTT7Diamond = comdat any
+//   $_ZTV7Diamond = comdat any
+//   $_ZTI4Base = comdat any
+//   @_ZTV7Diamond = linkonce_odr unnamed_addr constant {...}, comdat
+//   @_ZTT7Diamond = linkonce_odr unnamed_addr constant [4 x ptr] [
+//     ptr getelementptr inbounds inrange(-24, 0) ({...}, ptr @_ZTV7Diamond, ...)
+//   ], comdat
+//   @_ZTI4Base = linkonce_odr constant {...}, comdat
+//   @_ZTS4Base = linkonce_odr constant [6 x i8] c"4Base\00", comdat
+//
+// CIR:
+//   @_ZTV7Diamond = linkonce_odr global {...}  (no comdat, no unnamed_addr)
+//   @_ZTT7Diamond = linkonce_odr global [4 x ptr] [
+//     ptr getelementptr inbounds nuw (i8, ptr @_ZTV7Diamond, ...)  (no inrange)
+//   ]  (no comdat, no unnamed_addr)
+//   @_ZTI4Base = constant {...}  (no linkonce_odr, no comdat)
+//   @_ZTS4Base = linkonce_odr global [5 x i8] c"4Base", comdat  (no \00)
+
+// DIFF: -$_ZTV7Diamond = comdat any
+// DIFF: -$_ZTT7Diamond = comdat any
+// DIFF: -$_ZTI{{.*}} = comdat any
+// DIFF: - at _ZTV7Diamond = linkonce_odr unnamed_addr constant
+// DIFF: + at _ZTV7Diamond = linkonce_odr global
+// DIFF: - at _ZTT7Diamond = linkonce_odr unnamed_addr constant
+// DIFF: + at _ZTT7Diamond = linkonce_odr global
+// DIFF: - at _ZTI{{.*}} = linkonce_odr constant
+// DIFF: + at _ZTI{{.*}} = constant
+// DIFF: inrange(-24, 0)
+// DIFF: c"{{.*}}\00"
+// DIFF: -c"{{.*}}\00"
+// DIFF: +c"{{.*}}"
+
+struct Base {
+    int x = 10;
+};
+
+struct Derived1 : virtual Base {
+    int y = 20;
+};
+
+struct Derived2 : virtual Base {
+    int z = 30;
+};
+
+struct Diamond : Derived1, Derived2 {
+    int w = 40;
+};
+
+int test() {
+    Diamond d;
+    return d.x + d.y + d.z + d.w;
+}
diff --git a/clang/test/CIR/Incubator/divergences/vtable-missing-comdat.cpp b/clang/test/CIR/Incubator/divergences/vtable-missing-comdat.cpp
new file mode 100644
index 0000000000000..eabf68e89ffdf
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/vtable-missing-comdat.cpp
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.cir.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: diff -u %t.ll %t.cir.ll | FileCheck %s --check-prefix=DIFF
+//
+// XFAIL: *
+//
+// Vtable-related structures are missing comdat groups.
+//
+// CodeGen generates comdat for vtables and related structures:
+//   $_ZTV4Base = comdat any
+//   $_ZTI4Base = comdat any  (type info)
+//   $_ZTS4Base = comdat any  (type string)
+//
+// CIR omits these comdat declarations:
+//   @_ZTV4Base = linkonce_odr ... // No comdat
+//
+// This affects:
+// - Vtables ($_ZTV*)
+// - Type info structures ($_ZTI*)
+// - Type name strings ($_ZTS*)
+// - Virtual destructors
+// - Virtual function overrides
+//
+// Impact: Linker cannot merge duplicates, potential code bloat
+
+// DIFF: -$_ZTV4Base = comdat any
+// DIFF: -$_ZTI4Base = comdat any
+// DIFF: -$_ZTS4Base = comdat any
+// DIFF: +# Missing comdat declarations
+
+// Simple vtable
+struct Base {
+    virtual ~Base() {}
+    virtual int get() { return 1; }
+};
+
+int test_simple_vtable() {
+    Base b;
+    return b.get();
+}
+
+// Virtual function override
+struct Derived : Base {
+    int get() override { return 2; }
+};
+
+int test_override() {
+    Derived d;
+    Base* b = &d;
+    return b->get();
+}
+
+// Multiple virtual functions
+struct Multi {
+    virtual int foo() { return 1; }
+    virtual int bar() { return 2; }
+    virtual int baz() { return 3; }
+    virtual ~Multi() {}
+};
+
+int test_multiple() {
+    Multi m;
+    return m.foo() + m.bar() + m.baz();
+}
+
+// Pure virtual
+struct Abstract {
+    virtual int get() = 0;
+    virtual ~Abstract() {}
+};
+
+struct Concrete : Abstract {
+    int get() override { return 42; }
+};
+
+int test_pure_virtual() {
+    Concrete c;
+    Abstract* a = &c;
+    return a->get();
+}
+
+// Virtual with parameters
+struct WithParams {
+    virtual int add(int a, int b) { return a + b; }
+    virtual ~WithParams() {}
+};
+
+struct WithParamsDerived : WithParams {
+    int add(int a, int b) override { return a + b + 1; }
+};
+
+int test_params() {
+    WithParamsDerived d;
+    WithParams* p = &d;
+    return p->add(10, 20);
+}
diff --git a/clang/test/CIR/Incubator/divergences/vtable-thunk-destructor.cpp b/clang/test/CIR/Incubator/divergences/vtable-thunk-destructor.cpp
new file mode 100644
index 0000000000000..3146589e27a9b
--- /dev/null
+++ b/clang/test/CIR/Incubator/divergences/vtable-thunk-destructor.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --input-file=%t.ll %s
+// RUN: FileCheck --input-file=%t.ogcg.ll %s
+
+// XFAIL: *
+
+// Test thunk generation for virtual destructors in multiple inheritance
+
+class Base1 {
+public:
+  virtual ~Base1() {}
+  int x;
+};
+
+class Base2 {
+public:
+  virtual ~Base2() {}
+  int y;
+};
+
+class Derived : public Base1, public Base2 {
+public:
+  ~Derived() override {}
+};
+
+void test() {
+  Base2* b2 = new Derived();
+  delete b2;  // Uses destructor thunk
+}
+
+// CHECK-LABEL: define linkonce_odr void @_ZThn16_N7DerivedD1Ev
+//       CHECK: getelementptr inbounds i8, ptr %{{.*}}, i64 -16
+//       CHECK: call void @_ZN7DerivedD1Ev
+
+// CHECK-LABEL: define linkonce_odr void @_ZThn16_N7DerivedD0Ev
+//       CHECK: getelementptr inbounds i8, ptr %{{.*}}, i64 -16
+//       CHECK: call void @_ZN7DerivedD0Ev
diff --git a/clang/test/CIR/Incubator/driver.c b/clang/test/CIR/Incubator/driver.c
new file mode 100644
index 0000000000000..0a223edb28994
--- /dev/null
+++ b/clang/test/CIR/Incubator/driver.c
@@ -0,0 +1,56 @@
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fclangir-direct-lowering -S -Xclang -emit-cir %s -o %t1.cir
+// RUN: FileCheck --input-file=%t1.cir %s -check-prefix=CIR
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -S -Xclang -emit-cir %s -o %t2.cir
+// RUN: FileCheck --input-file=%t2.cir %s -check-prefix=CIR
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fclangir-direct-lowering -S -emit-llvm %s -o %t1.ll
+// RUN: FileCheck --input-file=%t1.ll %s -check-prefix=LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -S -emit-llvm %s -o %t2.ll
+// RUN: FileCheck --input-file=%t2.ll %s -check-prefix=CIR_STD_LLVM
+// Test also the cases for both -fclangir-direct-lowering and -fno-clangir-direct-lowering,
+// with -fno-clangir-direct-lowering having the preference
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fclangir-direct-lowering -fno-clangir-direct-lowering -S -emit-llvm %s -o %t2.ll
+// RUN: FileCheck --input-file=%t2.ll %s -check-prefix=CIR_STD_LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -fclangir-direct-lowering -c -emit-llvm %s -o %t1.bc
+// RUN: FileCheck --input-file=%t2.ll %s -check-prefix=CIR_STD_LLVM
+// RUN: llvm-dis %t1.bc -o %t1.bc.ll
+// RUN: FileCheck --input-file=%t1.bc.ll %s -check-prefix=LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -c -emit-llvm %s -o %t2.bc
+// RUN: llvm-dis %t2.bc -o %t2.bc.ll
+// RUN: FileCheck --input-file=%t2.bc.ll %s -check-prefix=CIR_STD_LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -c %s -o %t.o
+// RUN: llvm-objdump -d %t.o | FileCheck %s -check-prefix=OBJ
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -clangir-disable-passes -S -Xclang -emit-cir %s -o %t.cir
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -clangir-disable-verifier -S -Xclang -emit-cir %s -o %t.cir
+// RUN: %clang -target arm64-apple-macosx12.0.0 -fclangir -S -Xclang -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR_MACOS
+// RUN: %clang -target arm64-apple-macosx12.0.0 -fclangir -S -emit-llvm %s -o %t3.ll
+// RUN: FileCheck --input-file=%t3.ll %s -check-prefix=LLVM_MACOS
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -fno-clangir-direct-lowering -c %s -o %t.o
+
+void foo(void) {}
+
+//      CIR: module {{.*}} {
+// CIR-NEXT:   cir.func{{.*}} @foo()
+// CIR-NEXT:     cir.return
+// CIR-NEXT:   }
+// CIR-NEXT: }
+
+//      CIR_MACOS: module {{.*}} {
+// CIR_MACOS-NEXT:   cir.func{{.*}} @foo()
+// CIR_MACOS-NEXT:     cir.return
+// CIR_MACOS-NEXT:   }
+// CIR_MACOS-NEXT: }
+
+//      LLVM: define dso_local void @foo()
+// LLVM-NEXT:   ret void
+// LLVM-NEXT: }
+
+//      CIR_STD_LLVM: define void @foo()
+// CIR_STD_LLVM-NEXT:   ret void
+// CIR_STD_LLVM-NEXT: }
+
+//      LLVM_MACOS: define void @foo()
+// LLVM_MACOS-NEXT:   ret void
+// LLVM_MACOS-NEXT: }
+
+// OBJ: 0: c3 retq
diff --git a/clang/test/CIR/Incubator/emit-mlir.c b/clang/test/CIR/Incubator/emit-mlir.c
new file mode 100644
index 0000000000000..5adae9aab9241
--- /dev/null
+++ b/clang/test/CIR/Incubator/emit-mlir.c
@@ -0,0 +1,44 @@
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -emit-mlir %s -o - | FileCheck %s -check-prefix=LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fno-clangir-direct-lowering -emit-mlir %s -o - | FileCheck %s -check-prefix=CORE
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-mlir=llvm %s -o - | FileCheck %s -check-prefix=LLVM
+// RUN: not %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-mlir=core %s -o - 2>&1 | FileCheck %s -check-prefix=CORE_ERR
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fno-clangir-direct-lowering -emit-mlir=llvm %s -o - | FileCheck %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fno-clangir-direct-lowering -emit-mlir=core %s -o - | FileCheck %s -check-prefix=CORE
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-mlir=cir %s -o - | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-mlir=cir-flat %s -o - | FileCheck %s -check-prefix=CIR_FLAT
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -emit-mlir %s -o - | FileCheck %s -check-prefix=LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fno-clangir-direct-lowering -emit-mlir %s -o - | FileCheck %s -check-prefix=CORE
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -emit-mlir %s -o - -### 2>&1 | FileCheck %s -check-prefix=OPTS_LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fno-clangir-direct-lowering -emit-mlir %s -o - -### 2>&1 | FileCheck %s -check-prefix=OPTS_CORE
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -emit-mlir=llvm %s -o - -###  2>&1 | FileCheck %s -check-prefix=OPTS_LLVM
+// RUN: %clang -target x86_64-unknown-linux-gnu -fno-clangir-direct-lowering -emit-mlir=core %s -o - -### 2>&1 | FileCheck %s -check-prefix=OPTS_CORE
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -emit-mlir=cir %s -o - -###  2>&1 | FileCheck %s -check-prefix=OPTS_CIR
+// RUN: %clang -target x86_64-unknown-linux-gnu -fclangir -emit-mlir=cir-flat %s -o - -###  2>&1 | FileCheck %s -check-prefix=OPTS_CIR_FLAT
+
+int foo(int a, int b) {
+    int c;
+    if (a) {
+      c = a;
+    }
+    c = b;
+    return c;
+}
+
+// LLVM: llvm.func @foo
+// CORE: func.func @foo
+// CIR: cir.func {{.*}} @foo
+// CIR: cir.scope
+// CIR_FLAT: cir.func {{.*}} @foo
+// CIR_FLAT: ^bb1
+// CIR_FLAT-NOT: cir.scope
+// CORE_ERR: ClangIR direct lowering is incompatible with emitting of MLIR standard dialects
+// OPTS_LLVM: "-emit-mlir=llvm"
+// OPTS_CORE: "-emit-mlir=core"
+// OPTS_CIR: "-emit-mlir=cir"
+// OPTS_CIR_FLAT: "-emit-mlir=cir-flat"
\ No newline at end of file
diff --git a/clang/test/CIR/Incubator/global-var-simple.cpp b/clang/test/CIR/Incubator/global-var-simple.cpp
new file mode 100644
index 0000000000000..cfe446f343789
--- /dev/null
+++ b/clang/test/CIR/Incubator/global-var-simple.cpp
@@ -0,0 +1,78 @@
+// Global variables of intergal types
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck %s --input-file=%t.cir
+
+char c;
+// CHECK: cir.global external @c = #cir.int<0> : !s8i
+
+signed char sc;
+// CHECK: cir.global external @sc = #cir.int<0> : !s8i
+
+unsigned char uc;
+// CHECK: cir.global external @uc = #cir.int<0> : !u8i
+
+short ss;
+// CHECK: cir.global external @ss = #cir.int<0> : !s16i
+
+unsigned short us;
+// CHECK: cir.global external @us = #cir.int<0> : !u16i
+
+int si;
+// CHECK: cir.global external @si = #cir.int<0> : !s32i
+
+unsigned ui;
+// CHECK: cir.global external @ui = #cir.int<0> : !u32i
+
+long sl;
+// CHECK: cir.global external @sl = #cir.int<0> : !s64i
+
+unsigned long ul;
+// CHECK: cir.global external @ul = #cir.int<0> : !u64i
+
+long long sll;
+// CHECK: cir.global external @sll = #cir.int<0> : !s64i
+
+unsigned long long ull;
+// CHECK: cir.global external @ull = #cir.int<0> : !u64i
+
+__int128 s128;
+// CHECK: cir.global external @s128 = #cir.int<0> : !s128i
+
+unsigned __int128 u128;
+// CHECK: cir.global external @u128 = #cir.int<0> : !u128i
+
+wchar_t wc;
+// CHECK: cir.global external @wc = #cir.int<0> : !s32i
+
+char8_t c8;
+// CHECK: cir.global external @c8 = #cir.int<0> : !u8i
+
+char16_t c16;
+// CHECK: cir.global external @c16 = #cir.int<0> : !u16i
+
+char32_t c32;
+// CHECK: cir.global external @c32 = #cir.int<0> : !u32i
+
+_BitInt(20) sb20;
+// CHECK: cir.global external @sb20 = #cir.int<0> : !cir.int<s, 20>
+
+unsigned _BitInt(48) ub48;
+// CHECK: external @ub48 = #cir.int<0> : !u48i
+
+_Float16 f16;
+// CHECK: cir.global external @f16 = #cir.fp<0.000000e+00> : !cir.f16
+
+__bf16 bf16;
+// CHECK: cir.global external @bf16 = #cir.fp<0.000000e+00> : !cir.bf16
+
+float f;
+// CHECK: cir.global external @f = #cir.fp<0.000000e+00> : !cir.float
+
+double d = 1.25;
+// CHECK: cir.global external @d = #cir.fp<1.250000e+00> : !cir.double
+
+long double ld;
+// CHECK: cir.global external @ld = #cir.fp<0.000000e+00> : !cir.long_double<!cir.f80>
+
+__float128 f128;
+// CHECK: cir.global external @f128 = #cir.fp<0.000000e+00> : !cir.f128
diff --git a/clang/test/CIR/Incubator/hello.c b/clang/test/CIR/Incubator/hello.c
new file mode 100644
index 0000000000000..04ca3095ed7fe
--- /dev/null
+++ b/clang/test/CIR/Incubator/hello.c
@@ -0,0 +1,5 @@
+// Smoke test for ClangIR code generation
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o -  | FileCheck %s
+
+void foo() {}
+// CHECK: cir.func {{.*}} @foo
diff --git a/clang/test/CIR/Incubator/mlirargs.c b/clang/test/CIR/Incubator/mlirargs.c
new file mode 100644
index 0000000000000..cfb07197ef185
--- /dev/null
+++ b/clang/test/CIR/Incubator/mlirargs.c
@@ -0,0 +1,12 @@
+// Clang returns 1 when wrong arguments are given.
+// RUN: not %clang_cc1 -mmlir -mlir-disable-threadingd  -mmlir -mlir-print-op-genericd 2>&1 | FileCheck %s --check-prefix=WRONG
+// Test that the driver can pass mlir args to cc1.
+// RUN: %clang -### -mmlir -mlir-disable-threading %s 2>&1 | FileCheck %s --check-prefix=CC1
+
+
+// WRONG: clang (MLIR option parsing): Unknown command line argument '-mlir-disable-threadingd'.  Try: 'clang (MLIR option parsing) --help'
+// WRONG: clang (MLIR option parsing): Did you mean '--mlir-disable-threading'?
+// WRONG: clang (MLIR option parsing): Unknown command line argument '-mlir-print-op-genericd'.  Try: 'clang (MLIR option parsing) --help'
+// WRONG: clang (MLIR option parsing): Did you mean '--mlir-print-op-generic'?
+
+// CC1: "-mmlir" "-mlir-disable-threading"
diff --git a/clang/test/CIR/Incubator/mlirprint.c b/clang/test/CIR/Incubator/mlirprint.c
new file mode 100644
index 0000000000000..f631e8edaee43
--- /dev/null
+++ b/clang/test/CIR/Incubator/mlirprint.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after-all %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir-flat -mmlir --mlir-print-ir-after-all %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIRFLAT
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fno-clangir-direct-lowering -emit-mlir=core -mmlir --mlir-print-ir-after-all %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIRMLIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -mmlir --mlir-print-ir-after-all -mllvm -print-after-all  %s -o %t.ll 2>&1 | FileCheck %s -check-prefix=CIR -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-drop-ast %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CIRPASS
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir-flat -mmlir --mlir-print-ir-before=cir-flatten-cfg %s -o %t.cir 2>&1 | FileCheck %s -check-prefix=CFGPASS
+
+int foo(void) {
+  int i = 3;
+  return i;
+}
+
+
+// CIR:  IR Dump After CIRCanonicalize (cir-canonicalize)
+// CIR:  cir.func {{.*}} @foo() -> !s32i
+// CIR:  IR Dump After LoweringPrepare (cir-lowering-prepare)
+// CIR:  cir.func {{.*}} @foo() -> !s32i
+// CIR-NOT: IR Dump After FlattenCFG
+// CIR-NOT: IR Dump After SCFPrepare
+// CIR:  IR Dump After DropAST (cir-drop-ast)
+// CIR:  cir.func {{.*}} @foo() -> !s32i
+// CIRFLAT:  IR Dump After CIRCanonicalize (cir-canonicalize)
+// CIRFLAT:  cir.func {{.*}} @foo() -> !s32i
+// CIRFLAT:  IR Dump After LoweringPrepare (cir-lowering-prepare)
+// CIRFLAT:  cir.func {{.*}} @foo() -> !s32i
+// CIRFLAT:  IR Dump After FlattenCFG (cir-flatten-cfg)
+// CIRFLAT:  IR Dump After DropAST (cir-drop-ast)
+// CIRFLAT:  cir.func {{.*}} @foo() -> !s32i
+// CIRMLIR:  IR Dump After CIRCanonicalize (cir-canonicalize)
+// CIRMLIR:  IR Dump After LoweringPrepare (cir-lowering-prepare)
+// CIRMLIR:  IR Dump After SCFPrepare (cir-mlir-scf-prepare
+// CIRMLIR:  IR Dump After DropAST (cir-drop-ast)
+// LLVM: IR Dump After cir::direct::ConvertCIRToLLVMPass (cir-flat-to-llvm)
+// LLVM: llvm.func @foo() -> i32
+// LLVM: IR Dump After
+// LLVM: define dso_local i32 @foo()
+
+// CIRPASS-NOT:  IR Dump After CIRCanonicalize
+// CIRPASS:      IR Dump After DropAST
+
+// CFGPASS: IR Dump Before FlattenCFG (cir-flatten-cfg)