[llvm] [SVE][InstCombine] Fold ld1d and splice into ld1ro (PR #69565)

Louis Dionne via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 19 18:09:56 PDT 2023


Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,Matt Arsenault
 <Matthew.Arsenault at amd.com>,"Yaxun (Sam) Liu" <yaxun.liu at amd.com>,Benjamin
 Maxwell <benjamin.maxwell at arm.com>,Caroline Concatto
 <caroline.concatto at arm.com>,Alexey Bataev <a.bataev at outlook.com>,Thurston
 Dang <thurston.dang at gmail.com>,Yinying Li
 <107574043+yinying-lisa-li at users.noreply.github.com>,Konstantin Zhuravlyov
 <kzhuravl_dev at outlook.com>,Caroline Concatto <caroline.concatto at arm.com>,Luke
 Lau <luke at igalia.com>,Alex Langford <alangford at apple.com>,Jonas Devlieghere
 <jonas at devlieghere.com>,"Yaxun (Sam) Liu" <yaxun.liu at amd.com>,bjacob
 <jacob.benoit.1 at gmail.com>,LLVM GN Syncbot <llvmgnsyncbot at gmail.com>,LLVM GN
 Syncbot <llvmgnsyncbot at gmail.com>,Arthur Eubanks <aeubanks at google.com>,Jonas
 Devlieghere <jonas at devlieghere.com>,Haowei <haowei at google.com>,Florian Hahn
 <flo at fhahn.com>,alfredfo <98554039+alfredfo at users.noreply.github.com>,Stanislav
 Mekhanoshin <rampitec at users.noreply.github.com>,Tobias Stadler
 <mail at stadler-tobias.de>,Wang Pengcheng
 <137158460+wangpc-pp at users.noreply.github.com>,Fangrui Song <i at maskray.me>,Alfred
 Persson Forsberg <cat at catcream.org>,erichkeane <ekeane at nvidia.com>,Jonas
 Devlieghere <jonas at devlieghere.com>,LLVM GN Syncbot <llvmgnsyncbot at gmail.com>
 =?utf-8?q?,?=Nico Weber <thakis at chromium.org>,Anton Rydahl
 <44206479+AntonRydahl at users.noreply.github.com>,Duo Wang <duow1 at uci.edu>,Florian
 Hahn <flo at fhahn.com>,Jacques Pienaar <jpienaar at google.com>,Aleksandr
 Platonov <platonov.aleksandr at huawei.com>,Jonas Devlieghere
 <jonas at devlieghere.com>,Owen Pan <owenpiano at gmail.com>,Austin Kerbow
 <Austin.Kerbow at amd.com>,Aart Bik <39774503+aartbik at users.noreply.github.com>,Arthur
 Eubanks <aeubanks at google.com>,Ryan Prichard <rprichard at google.com>,Nuri
 Amari <nuriamari at meta.com>,Joseph Huber
 <35342157+jhuber6 at users.noreply.github.com>,Joseph Huber
 <35342157+jhuber6 at users.noreply.github.com>,Tom Stellard
 <tstellar at redhat.com>,Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,Felipe
 de Azevedo Piovezan <fpiovezan at apple.com>,Ryan Prichard
 <rprichard at google.com>,Maksim Levental <maksim.levental at gmail.com>,Yinying
 Li <107574043+yinying-lisa-li at users.noreply.github.com>,Daniel Thornburgh
 <dthorn at google.com>,Min-Yih Hsu <min.hsu at sifive.com>,Jonas Devlieghere
 <jonas at devlieghere.com>,Emilio Cota <ecg at google.com>,Peiming Liu
 <36770114+PeimingLiu at users.noreply.github.com>,ChiaHungDuan
 <chiahungduan at google.com>,Finn Plummer
 <50529406+inbelic at users.noreply.github.com>,Nico Weber <thakis at chromium.org>,Maksim
 Levental <maksim.levental at gmail.com>,Ryosuke Niwa <rniwa at webkit.org>,Amara
 Emerson <amara at apple.com>,lntue <35648136+lntue at users.noreply.github.com>,Aart
 Bik <39774503+aartbik at users.noreply.github.com>,
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?=,Ryan Prichard
 <rprichard at google.com>,zhongyunde 00443407 <zhongyunde at huawei.com>
Message-ID:
In-Reply-To: <llvm/llvm-project/pull/69565/llvm at github.com>


https://github.com/ldionne updated https://github.com/llvm/llvm-project/pull/69565

>From 25002b7aeb2f9c64764861d8106a2def57cff9cc Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 19 Oct 2023 14:48:03 +0100
Subject: [PATCH 01/76] [InstCombine] Add additional aligned allocation tests
 for #69474.

---
 .../Transforms/InstCombine/malloc-free.ll     | 44 ++++++++++++++++++-
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/malloc-free.ll b/llvm/test/Transforms/InstCombine/malloc-free.ll
index 29c757f82564ad8..b77f70f239921e8 100644
--- a/llvm/test/Transforms/InstCombine/malloc-free.ll
+++ b/llvm/test/Transforms/InstCombine/malloc-free.ll
@@ -26,8 +26,8 @@ define i32 @dead_aligned_alloc(i32 %size, i32 %alignment, i8 %value) {
   ret i32 0
 }
 
-define i1 @aligned_alloc_only_pointe(i32 %size, i32 %alignment, i8 %value) {
-; CHECK-LABEL: @aligned_alloc_only_pointe(
+define i1 @aligned_alloc_pointer_only_used_by_cmp(i32 %size, i32 %alignment, i8 %value) {
+; CHECK-LABEL: @aligned_alloc_pointer_only_used_by_cmp(
 ; CHECK-NEXT:    ret i1 true
 ;
   %aligned_allocation = tail call ptr @aligned_alloc(i32 %alignment, i32 %size)
@@ -35,9 +35,49 @@ define i1 @aligned_alloc_only_pointe(i32 %size, i32 %alignment, i8 %value) {
   ret i1 %cmp
 }
 
+define i1 @aligned_alloc_pointer_only_used_by_cmp_alignment_and_value_known_ok(i32 %size, i32 %alignment, i8 %value) {
+; CHECK-LABEL: @aligned_alloc_pointer_only_used_by_cmp_alignment_and_value_known_ok(
+; CHECK-NEXT:    ret i1 true
+;
+  %aligned_allocation = tail call ptr @aligned_alloc(i32 8, i32 32)
+  %cmp = icmp ne ptr %aligned_allocation, null
+  ret i1 %cmp
+}
+
+define i1 @aligned_alloc_pointer_only_used_by_cmp_alignment_no_power_of_2(i32 %size, i32 %alignment, i8 %value) {
+; CHECK-LABEL: @aligned_alloc_pointer_only_used_by_cmp_alignment_no_power_of_2(
+; CHECK-NEXT:    ret i1 true
+;
+  %aligned_allocation = tail call ptr @aligned_alloc(i32 3, i32 32)
+  %cmp = icmp ne ptr %aligned_allocation, null
+  ret i1 %cmp
+}
+
+define i1 @aligned_alloc_pointer_only_used_by_cmp_size_not_multiple_of_alignment(i32 %size, i32 %alignment, i8 %value) {
+; CHECK-LABEL: @aligned_alloc_pointer_only_used_by_cmp_size_not_multiple_of_alignment(
+; CHECK-NEXT:    ret i1 true
+;
+  %aligned_allocation = tail call ptr @aligned_alloc(i32 8, i32 31)
+  %cmp = icmp ne ptr %aligned_allocation, null
+  ret i1 %cmp
+}
+
+; This test uses a aligned allocation function different to @aligned_alloc,
+; and should be treated as having @aligned_alloc's constraints on alignment
+; and size operands.
+define i1 @other_aligned_allocation_function(i32 %size, i32 %alignment, i8 %value) {
+; CHECK-LABEL: @other_aligned_allocation_function(
+; CHECK-NEXT:    ret i1 true
+;
+  %aligned_allocation = tail call ptr @other_aligned_alloc(i32 %alignment, i32 %size)
+  %cmp = icmp ne ptr %aligned_allocation, null
+  ret i1 %cmp
+}
+
 declare noalias ptr @calloc(i32, i32) nounwind allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
 declare noalias ptr @malloc(i32) allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
 declare noalias ptr @aligned_alloc(i32, i32) allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
+declare noalias ptr @other_aligned_alloc(i32, i32) allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
 declare void @free(ptr) allockind("free") "alloc-family"="malloc"
 
 define i1 @foo() {

>From 40a426fac6ff7c32bc239bf55ebe322368accc16 Mon Sep 17 00:00:00 2001
From: Pierre van Houtryve <pierre.vanhoutryve at amd.com>
Date: Thu, 19 Oct 2023 16:05:51 +0200
Subject: [PATCH 02/76] [AMDGPU] Constant fold FMAD_FTZ (#69443)

Solves #68315
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |   30 +
 .../AMDGPU/amdgpu-codegenprepare-idiv.ll      | 1151 ++++++-----------
 llvm/test/CodeGen/AMDGPU/udiv.ll              |  116 +-
 llvm/test/CodeGen/AMDGPU/udiv64.ll            |  160 +--
 llvm/test/CodeGen/AMDGPU/urem64.ll            |   85 +-
 5 files changed, 572 insertions(+), 970 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 607d59db7bcf709..adf4e0139e03c1d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5041,6 +5041,36 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
     return performAssertSZExtCombine(N, DCI);
   case ISD::INTRINSIC_WO_CHAIN:
     return performIntrinsicWOChainCombine(N, DCI);
+  case AMDGPUISD::FMAD_FTZ: {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    EVT VT = N->getValueType(0);
+
+    // FMAD_FTZ is a FMAD + flush denormals to zero.
+    // We flush the inputs, the intermediate step, and the output.
+    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+    ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+    if (N0CFP && N1CFP && N2CFP) {
+      const auto FTZ = [](const APFloat &V) {
+        if (V.isDenormal()) {
+          APFloat Zero(V.getSemantics(), 0);
+          return V.isNegative() ? -Zero : Zero;
+        }
+        return V;
+      };
+
+      APFloat V0 = FTZ(N0CFP->getValueAPF());
+      APFloat V1 = FTZ(N1CFP->getValueAPF());
+      APFloat V2 = FTZ(N2CFP->getValueAPF());
+      V0.multiply(V1, APFloat::rmNearestTiesToEven);
+      V0 = FTZ(V0);
+      V0.add(V2, APFloat::rmNearestTiesToEven);
+      return DAG.getConstantFP(FTZ(V0), DL, VT);
+    }
+    break;
+  }
   }
   return SDValue();
 }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index c793f9ee682f8c4..7eb1cb926c190de 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -7139,70 +7139,42 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ;
 ; GFX6-LABEL: udiv_i64_oddk_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_mov_b32_e32 v0, 0x4f176a73
-; GFX6-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; GFX6-NEXT:    v_madmk_f32 v0, v1, 0x438f8000, v0
-; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX6-NEXT:    s_movk_i32 s4, 0xfee0
-; GFX6-NEXT:    s_mov_b32 s5, 0x68958c89
+; GFX6-NEXT:    s_add_u32 s4, 3, 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, 0xe3e0f6
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
+; GFX6-NEXT:    s_addc_u32 s5, 0, 0
+; GFX6-NEXT:    s_or_b32 s4, vcc_lo, vcc_hi
+; GFX6-NEXT:    s_cmp_lg_u32 s4, 0
+; GFX6-NEXT:    s_mov_b32 s4, 0x68958c89
+; GFX6-NEXT:    s_movk_i32 s6, 0xfee0
+; GFX6-NEXT:    v_mul_lo_u32 v1, v0, s6
+; GFX6-NEXT:    v_mul_hi_u32 v2, v0, s4
+; GFX6-NEXT:    s_addc_u32 s5, s5, 0
+; GFX6-NEXT:    s_mul_i32 s6, s5, 0x68958c89
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    s_movk_i32 s8, 0x11f
-; GFX6-NEXT:    s_mov_b32 s9, 0x976a7377
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s4
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s5
-; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s5
-; GFX6-NEXT:    v_mul_lo_u32 v5, v0, s5
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v4, v0, v5
-; GFX6-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s6, v1
+; GFX6-NEXT:    v_mul_lo_u32 v3, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v4, v0, v2
+; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v6, s5, v1
+; GFX6-NEXT:    v_mul_lo_u32 v1, s5, v1
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v5
-; GFX6-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v4, v5, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s4
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s5
-; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s5
+; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
+; GFX6-NEXT:    v_mul_lo_u32 v5, s5, v2
+; GFX6-NEXT:    v_mul_hi_u32 v2, s5, v2
+; GFX6-NEXT:    s_movk_i32 s8, 0x11f
+; GFX6-NEXT:    s_mov_b32 s9, 0x976a7377
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v4, v2, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v6, vcc
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
+; GFX6-NEXT:    v_mov_b32_e32 v3, s5
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_mov_b32 s4, s0
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s5
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; GFX6-NEXT:    v_mul_lo_u32 v4, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v3
-; GFX6-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v3
-; GFX6-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GFX6-NEXT:    s_mov_b32 s5, s1
-; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v5, v3, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
 ; GFX6-NEXT:    v_mul_lo_u32 v2, s2, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v3, s2, v0
 ; GFX6-NEXT:    v_mul_hi_u32 v4, s2, v1
@@ -7212,6 +7184,8 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
 ; GFX6-NEXT:    v_mul_lo_u32 v4, s3, v0
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s3, v0
+; GFX6-NEXT:    s_mov_b32 s4, s0
+; GFX6-NEXT:    s_mov_b32 s5, s1
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
@@ -7221,6 +7195,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s9
 ; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s9
 ; GFX6-NEXT:    v_mov_b32_e32 v5, 0x11f
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s9
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
@@ -7253,6 +7228,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v6, v3, vcc
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -7260,44 +7236,14 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ;
 ; GFX9-LABEL: udiv_i64_oddk_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x4f176a73
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; GFX9-NEXT:    v_madmk_f32 v0, v1, 0x438f8000, v0
-; GFX9-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX9-NEXT:    s_mul_i32 s1, s0, 0xfffffee0
-; GFX9-NEXT:    s_mul_hi_u32 s2, s0, 0x68958c89
-; GFX9-NEXT:    s_add_i32 s1, s2, s1
-; GFX9-NEXT:    v_readfirstlane_b32 s2, v1
-; GFX9-NEXT:    s_mul_i32 s3, s2, 0x68958c89
-; GFX9-NEXT:    s_add_i32 s1, s1, s3
-; GFX9-NEXT:    s_mul_i32 s9, s0, 0x68958c89
-; GFX9-NEXT:    s_mul_hi_u32 s3, s0, s1
-; GFX9-NEXT:    s_mul_i32 s8, s0, s1
-; GFX9-NEXT:    s_mul_hi_u32 s0, s0, s9
-; GFX9-NEXT:    s_add_u32 s0, s0, s8
-; GFX9-NEXT:    s_addc_u32 s3, 0, s3
-; GFX9-NEXT:    s_mul_hi_u32 s10, s2, s9
-; GFX9-NEXT:    s_mul_i32 s9, s2, s9
-; GFX9-NEXT:    s_add_u32 s0, s0, s9
-; GFX9-NEXT:    s_mul_hi_u32 s8, s2, s1
-; GFX9-NEXT:    s_addc_u32 s0, s3, s10
-; GFX9-NEXT:    s_addc_u32 s3, s8, 0
-; GFX9-NEXT:    s_mul_i32 s1, s2, s1
-; GFX9-NEXT:    s_add_u32 s0, s0, s1
-; GFX9-NEXT:    s_addc_u32 s1, 0, s3
+; GFX9-NEXT:    s_add_u32 s0, 3, 0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0xe3e0f6
+; GFX9-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v0
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_addc_u32 s0, s2, s1
 ; GFX9-NEXT:    v_readfirstlane_b32 s2, v0
+; GFX9-NEXT:    s_addc_u32 s0, s1, 0
 ; GFX9-NEXT:    s_mul_i32 s3, s2, 0xfffffee0
 ; GFX9-NEXT:    s_mul_hi_u32 s8, s2, 0x68958c89
 ; GFX9-NEXT:    s_mul_i32 s1, s0, 0x68958c89
@@ -7385,6 +7331,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    v_mov_b32_e32 v4, s2
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
@@ -7529,74 +7476,53 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, <
 ;
 ; GFX6-LABEL: udiv_v2i64_mixed_pow2k_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_mov_b32_e32 v0, 0x4f800000
-; GFX6-NEXT:    v_madak_f32 v0, 0, v0, 0x457ff000
-; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX6-NEXT:    s_movk_i32 s6, 0xf001
-; GFX6-NEXT:    s_movk_i32 s8, 0xfff
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
-; GFX6-NEXT:    v_mul_hi_u32 v2, v0, s6
-; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s6
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s6
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_mov_b32 s2, 0x2ff2fc01
+; GFX6-NEXT:    v_bfrev_b32_e32 v0, 7
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], 12
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v3
-; GFX6-NEXT:    v_mul_lo_u32 v4, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v3
-; GFX6-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v5, v3, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mul_hi_u32 v2, v0, s6
-; GFX6-NEXT:    v_mul_lo_u32 v3, v1, s6
-; GFX6-NEXT:    v_mul_lo_u32 v4, v0, s6
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v4
-; GFX6-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v4
-; GFX6-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v5, v4, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, s2, v1
-; GFX6-NEXT:    v_mul_hi_u32 v3, s2, v0
-; GFX6-NEXT:    v_mul_hi_u32 v4, s2, v1
-; GFX6-NEXT:    v_mul_hi_u32 v5, s3, v1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s3, v1
+; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], 12
+; GFX6-NEXT:    s_add_u32 s2, 0xe037f, s2
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
+; GFX6-NEXT:    s_addc_u32 s3, 0, 0
+; GFX6-NEXT:    s_or_b32 s2, vcc_lo, vcc_hi
+; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX6-NEXT:    s_movk_i32 s2, 0xf001
+; GFX6-NEXT:    v_mul_hi_u32 v1, v0, s2
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s2
+; GFX6-NEXT:    s_addc_u32 s8, s3, 0x1000ff
+; GFX6-NEXT:    s_mul_i32 s3, s8, 0xfffff001
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v0
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s3, v1
+; GFX6-NEXT:    v_mul_lo_u32 v3, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v4, v0, v2
+; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v6, s8, v1
+; GFX6-NEXT:    v_mul_lo_u32 v1, s8, v1
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
+; GFX6-NEXT:    v_mul_lo_u32 v5, s8, v2
+; GFX6-NEXT:    v_mul_hi_u32 v2, s8, v2
+; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_mov_b32 s2, -1
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v4, v2, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v6, vcc
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
+; GFX6-NEXT:    v_mov_b32_e32 v3, s8
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
+; GFX6-NEXT:    v_mul_lo_u32 v2, s6, v1
+; GFX6-NEXT:    v_mul_hi_u32 v3, s6, v0
+; GFX6-NEXT:    v_mul_hi_u32 v4, s6, v1
+; GFX6-NEXT:    v_mul_hi_u32 v5, s7, v1
+; GFX6-NEXT:    v_mul_lo_u32 v1, s7, v1
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v4, s3, v0
-; GFX6-NEXT:    v_mul_hi_u32 v0, s3, v0
+; GFX6-NEXT:    v_mul_lo_u32 v4, s7, v0
+; GFX6-NEXT:    v_mul_hi_u32 v0, s7, v0
+; GFX6-NEXT:    s_movk_i32 s8, 0xfff
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
@@ -7610,73 +7536,46 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, <
 ; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 2, v0
 ; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, 0, v1, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GFX6-NEXT:    v_mov_b32_e32 v5, s3
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s2, v8
+; GFX6-NEXT:    v_mov_b32_e32 v5, s7
+; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s6, v8
 ; GFX6-NEXT:    v_subb_u32_e32 v4, vcc, v5, v4, vcc
 ; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s8, v8
 ; GFX6-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v4, vcc
-; GFX6-NEXT:    s_movk_i32 s2, 0xffe
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v5
+; GFX6-NEXT:    s_movk_i32 s6, 0xffe
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, -1, v5, vcc
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v8
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v8
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, -1, v5, vcc
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v1, v3, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v0, v2, vcc
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: udiv_v2i64_mixed_pow2k_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x457ff000
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0, v1
-; GFX9-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s8, 0x2ff2fc01
+; GFX9-NEXT:    v_bfrev_b32_e32 v0, 7
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_lshr_b64 s[0:1], s[4:5], 12
-; GFX9-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX9-NEXT:    s_mul_hi_u32 s5, s4, 0xfffff001
-; GFX9-NEXT:    v_readfirstlane_b32 s8, v1
-; GFX9-NEXT:    s_sub_i32 s5, s5, s4
-; GFX9-NEXT:    s_mul_i32 s9, s8, 0xfffff001
-; GFX9-NEXT:    s_add_i32 s5, s5, s9
-; GFX9-NEXT:    s_mul_i32 s11, s4, 0xfffff001
-; GFX9-NEXT:    s_mul_hi_u32 s9, s4, s5
-; GFX9-NEXT:    s_mul_i32 s10, s4, s5
-; GFX9-NEXT:    s_mul_hi_u32 s4, s4, s11
-; GFX9-NEXT:    s_add_u32 s4, s4, s10
-; GFX9-NEXT:    s_addc_u32 s9, 0, s9
-; GFX9-NEXT:    s_mul_hi_u32 s12, s8, s11
-; GFX9-NEXT:    s_mul_i32 s11, s8, s11
-; GFX9-NEXT:    s_add_u32 s4, s4, s11
-; GFX9-NEXT:    s_mul_hi_u32 s10, s8, s5
-; GFX9-NEXT:    s_addc_u32 s4, s9, s12
-; GFX9-NEXT:    s_addc_u32 s9, s10, 0
-; GFX9-NEXT:    s_mul_i32 s5, s8, s5
-; GFX9-NEXT:    s_add_u32 s4, s4, s5
-; GFX9-NEXT:    s_addc_u32 s5, 0, s9
+; GFX9-NEXT:    s_add_u32 s4, 0xe037f, s8
+; GFX9-NEXT:    s_addc_u32 s5, 0, 0
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_addc_u32 s4, s8, s5
 ; GFX9-NEXT:    v_readfirstlane_b32 s8, v0
+; GFX9-NEXT:    s_addc_u32 s4, s5, 0x1000ff
 ; GFX9-NEXT:    s_mul_hi_u32 s9, s8, 0xfffff001
 ; GFX9-NEXT:    s_mul_i32 s5, s4, 0xfffff001
 ; GFX9-NEXT:    s_sub_i32 s9, s9, s8
@@ -7824,165 +7723,110 @@ define amdgpu_kernel void @urem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ;
 ; GFX6-LABEL: urem_i64_oddk_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_mov_b32_e32 v0, 0x4f1761f8
-; GFX6-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; GFX6-NEXT:    v_madmk_f32 v0, v1, 0x438f8000, v0
-; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
+; GFX6-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
+; GFX6-NEXT:    s_add_u32 s0, 4, 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, 0xe3e0fc
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; GFX6-NEXT:    s_addc_u32 s1, 0, 0
+; GFX6-NEXT:    s_or_b32 s0, vcc_lo, vcc_hi
+; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX6-NEXT:    s_mov_b32 s0, 0x689e0837
 ; GFX6-NEXT:    s_movk_i32 s2, 0xfee0
-; GFX6-NEXT:    s_mov_b32 s3, 0x689e0837
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX6-NEXT:    v_mul_lo_u32 v1, v0, s2
+; GFX6-NEXT:    v_mul_hi_u32 v2, v0, s0
+; GFX6-NEXT:    s_addc_u32 s1, s1, 0
+; GFX6-NEXT:    s_mul_i32 s2, s1, 0x689e0837
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_mov_b32 s8, s4
-; GFX6-NEXT:    s_movk_i32 s4, 0x11f
-; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s2
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s3
-; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s3
-; GFX6-NEXT:    v_mul_lo_u32 v5, v0, s3
-; GFX6-NEXT:    s_mov_b32 s12, 0x9761f7c9
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v4, v0, v5
-; GFX6-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
+; GFX6-NEXT:    s_mov_b32 s4, s8
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s0
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s2, v1
+; GFX6-NEXT:    v_mul_lo_u32 v3, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v4, v0, v2
+; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v6, s1, v1
+; GFX6-NEXT:    v_mul_lo_u32 v1, s1, v1
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v5
-; GFX6-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GFX6-NEXT:    s_mov_b32 s9, s5
-; GFX6-NEXT:    s_movk_i32 s5, 0x11e
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v4, v5, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s2
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s3
-; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s3
-; GFX6-NEXT:    s_mov_b32 s11, 0xf000
-; GFX6-NEXT:    s_mov_b32 s10, -1
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s3
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; GFX6-NEXT:    v_mul_lo_u32 v4, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v3
-; GFX6-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v3
-; GFX6-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v5, v3, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, s6, v1
-; GFX6-NEXT:    v_mul_hi_u32 v3, s6, v0
-; GFX6-NEXT:    v_mul_hi_u32 v4, s6, v1
-; GFX6-NEXT:    v_mul_hi_u32 v5, s7, v1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s7, v1
+; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
+; GFX6-NEXT:    v_mul_lo_u32 v5, s1, v2
+; GFX6-NEXT:    v_mul_hi_u32 v2, s1, v2
+; GFX6-NEXT:    s_movk_i32 s8, 0x11f
+; GFX6-NEXT:    s_mov_b32 s12, 0x9761f7c9
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v4, v2, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v6, vcc
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
+; GFX6-NEXT:    v_mov_b32_e32 v3, s1
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
+; GFX6-NEXT:    v_mul_lo_u32 v2, s10, v1
+; GFX6-NEXT:    v_mul_hi_u32 v3, s10, v0
+; GFX6-NEXT:    v_mul_hi_u32 v4, s10, v1
+; GFX6-NEXT:    v_mul_hi_u32 v5, s11, v1
+; GFX6-NEXT:    v_mul_lo_u32 v1, s11, v1
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v4, s7, v0
-; GFX6-NEXT:    v_mul_hi_u32 v0, s7, v0
+; GFX6-NEXT:    v_mul_lo_u32 v4, s11, v0
+; GFX6-NEXT:    v_mul_hi_u32 v0, s11, v0
+; GFX6-NEXT:    s_mov_b32 s5, s9
+; GFX6-NEXT:    s_movk_i32 s9, 0x11e
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s4
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s8
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s12
 ; GFX6-NEXT:    v_mul_lo_u32 v1, v1, s12
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s12
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s7, v1
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s11, v1
 ; GFX6-NEXT:    v_mov_b32_e32 v3, 0x11f
-; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s6, v0
+; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s10, v0
 ; GFX6-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
 ; GFX6-NEXT:    v_subrev_i32_e64 v4, s[0:1], s12, v0
 ; GFX6-NEXT:    v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
-; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s5, v5
-; GFX6-NEXT:    s_mov_b32 s6, 0x9761f7c8
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s9, v5
+; GFX6-NEXT:    s_mov_b32 s10, 0x9761f7c8
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[2:3]
-; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s6, v4
+; GFX6-NEXT:    v_cmp_lt_u32_e64 s[2:3], s10, v4
 ; GFX6-NEXT:    v_subb_u32_e64 v2, s[0:1], v2, v3, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[2:3]
-; GFX6-NEXT:    v_cmp_eq_u32_e64 s[2:3], s4, v5
+; GFX6-NEXT:    v_cmp_eq_u32_e64 s[2:3], s8, v5
 ; GFX6-NEXT:    v_subrev_i32_e64 v3, s[0:1], s12, v4
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[2:3]
 ; GFX6-NEXT:    v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1]
 ; GFX6-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[0:1]
-; GFX6-NEXT:    v_mov_b32_e32 v4, s7
+; GFX6-NEXT:    v_mov_b32_e32 v4, s11
 ; GFX6-NEXT:    v_subb_u32_e32 v1, vcc, v4, v1, vcc
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s5, v1
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s9, v1
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v0
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s10, v0
 ; GFX6-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s8, v1
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: urem_i64_oddk_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x4f1761f8
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; GFX9-NEXT:    v_madmk_f32 v0, v1, 0x438f8000, v0
-; GFX9-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_mov_b32 s12, 0x9761f7c8
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX9-NEXT:    s_mul_i32 s1, s0, 0xfffffee0
-; GFX9-NEXT:    s_mul_hi_u32 s2, s0, 0x689e0837
-; GFX9-NEXT:    s_add_i32 s1, s2, s1
-; GFX9-NEXT:    v_readfirstlane_b32 s2, v1
-; GFX9-NEXT:    s_mul_i32 s3, s2, 0x689e0837
-; GFX9-NEXT:    s_add_i32 s1, s1, s3
-; GFX9-NEXT:    s_mul_i32 s9, s0, 0x689e0837
-; GFX9-NEXT:    s_mul_hi_u32 s3, s0, s1
-; GFX9-NEXT:    s_mul_i32 s8, s0, s1
-; GFX9-NEXT:    s_mul_hi_u32 s0, s0, s9
-; GFX9-NEXT:    s_add_u32 s0, s0, s8
-; GFX9-NEXT:    s_addc_u32 s3, 0, s3
-; GFX9-NEXT:    s_mul_hi_u32 s10, s2, s9
-; GFX9-NEXT:    s_mul_i32 s9, s2, s9
-; GFX9-NEXT:    s_add_u32 s0, s0, s9
-; GFX9-NEXT:    s_mul_hi_u32 s8, s2, s1
-; GFX9-NEXT:    s_addc_u32 s0, s3, s10
-; GFX9-NEXT:    s_addc_u32 s3, s8, 0
-; GFX9-NEXT:    s_mul_i32 s1, s2, s1
-; GFX9-NEXT:    s_add_u32 s0, s0, s1
-; GFX9-NEXT:    s_addc_u32 s1, 0, s3
+; GFX9-NEXT:    s_add_u32 s0, 4, 0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0xe3e0fc
+; GFX9-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v0
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_addc_u32 s0, s2, s1
 ; GFX9-NEXT:    v_readfirstlane_b32 s2, v0
+; GFX9-NEXT:    s_addc_u32 s0, s1, 0
 ; GFX9-NEXT:    s_mul_i32 s3, s2, 0xfffffee0
 ; GFX9-NEXT:    s_mul_hi_u32 s8, s2, 0x689e0837
 ; GFX9-NEXT:    s_mul_i32 s1, s0, 0x689e0837
@@ -8038,6 +7882,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    s_cmp_lg_u64 s[0:1], 0
 ; GFX9-NEXT:    s_subb_u32 s10, s6, 0
 ; GFX9-NEXT:    s_cmpk_gt_u32 s10, 0x11e
+; GFX9-NEXT:    s_mov_b32 s12, 0x9761f7c8
 ; GFX9-NEXT:    s_cselect_b32 s11, -1, 0
 ; GFX9-NEXT:    v_cmp_lt_u32_e64 s[2:3], s12, v1
 ; GFX9-NEXT:    s_cmpk_eq_i32 s10, 0x11f
@@ -8067,6 +7912,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s0
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
@@ -8274,69 +8120,44 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ;
 ; GFX6-LABEL: sdiv_i64_oddk_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_mov_b32_e32 v0, 0x4f800000
-; GFX6-NEXT:    v_madak_f32 v0, 0, v0, 0x4996c7d8
-; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX6-NEXT:    s_mov_b32 s8, 0xffed2705
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
-; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX6-NEXT:    s_mov_b32 s0, 0x33fe64
+; GFX6-NEXT:    s_add_u32 s1, 0x396, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, 0x28100000
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s1, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_mov_b32 s0, s4
-; GFX6-NEXT:    s_mov_b32 s4, 0x12d8fb
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, s8
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s8
-; GFX6-NEXT:    v_mul_lo_u32 v4, v0, s8
-; GFX6-NEXT:    s_mov_b32 s2, -1
-; GFX6-NEXT:    s_mov_b32 s1, s5
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, v4
-; GFX6-NEXT:    v_mul_lo_u32 v5, v0, v2
+; GFX6-NEXT:    s_addc_u32 s4, 0, 0
+; GFX6-NEXT:    s_or_b32 s1, vcc_lo, vcc_hi
+; GFX6-NEXT:    s_cmp_lg_u32 s1, 0
+; GFX6-NEXT:    s_mov_b32 s1, 0xffed2705
+; GFX6-NEXT:    v_mul_hi_u32 v1, v0, s1
+; GFX6-NEXT:    s_addc_u32 s4, s4, 0xd95
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s1
+; GFX6-NEXT:    s_mul_i32 s8, s4, 0xffed2705
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s8, v1
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v0
+; GFX6-NEXT:    v_mul_lo_u32 v5, v0, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v4
-; GFX6-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v5, v4, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, s8
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s8
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s8
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GFX6-NEXT:    v_mul_lo_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v0, v3
-; GFX6-NEXT:    v_mul_hi_u32 v8, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, v1, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, v1, v3
-; GFX6-NEXT:    v_mul_hi_u32 v4, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v7, v5, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GFX6-NEXT:    v_mul_hi_u32 v7, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v4, s4, v2
+; GFX6-NEXT:    v_mul_lo_u32 v2, s4, v2
+; GFX6-NEXT:    v_mul_hi_u32 v3, s4, v1
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
+; GFX6-NEXT:    v_mul_lo_u32 v1, s4, v1
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v6, v4, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
 ; GFX6-NEXT:    s_ashr_i32 s8, s7, 31
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
 ; GFX6-NEXT:    s_add_u32 s6, s6, s8
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX6-NEXT:    v_mov_b32_e32 v3, s4
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    s_mov_b32 s9, s8
 ; GFX6-NEXT:    s_addc_u32 s7, s7, s8
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
 ; GFX6-NEXT:    s_xor_b64 s[6:7], s[6:7], s[8:9]
 ; GFX6-NEXT:    v_mul_lo_u32 v2, s6, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v3, s6, v0
@@ -8347,6 +8168,8 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
 ; GFX6-NEXT:    v_mul_lo_u32 v4, s7, v0
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s7, v0
+; GFX6-NEXT:    s_mov_b32 s4, 0x12d8fb
+; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
@@ -8377,55 +8200,28 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, -1, v5, vcc
-; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
-; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX6-NEXT:    v_xor_b32_e32 v0, s8, v0
-; GFX6-NEXT:    v_xor_b32_e32 v1, s8, v1
-; GFX6-NEXT:    v_mov_b32_e32 v2, s8
-; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s8, v0
-; GFX6-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
-; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
-; GFX6-NEXT:    s_endpgm
-;
-; GFX9-LABEL: sdiv_i64_oddk_denom:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x4996c7d8
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0, v1
-; GFX9-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT:    v_readfirstlane_b32 s4, v1
-; GFX9-NEXT:    v_readfirstlane_b32 s5, v0
-; GFX9-NEXT:    s_mul_hi_u32 s6, s5, 0xffed2705
-; GFX9-NEXT:    s_mul_i32 s7, s4, 0xffed2705
-; GFX9-NEXT:    s_add_i32 s6, s6, s7
-; GFX9-NEXT:    s_sub_i32 s6, s6, s5
-; GFX9-NEXT:    s_mul_i32 s9, s5, 0xffed2705
-; GFX9-NEXT:    s_mul_hi_u32 s7, s5, s6
-; GFX9-NEXT:    s_mul_i32 s8, s5, s6
-; GFX9-NEXT:    s_mul_hi_u32 s5, s5, s9
-; GFX9-NEXT:    s_add_u32 s5, s5, s8
-; GFX9-NEXT:    s_addc_u32 s7, 0, s7
-; GFX9-NEXT:    s_mul_hi_u32 s10, s4, s9
-; GFX9-NEXT:    s_mul_i32 s9, s4, s9
-; GFX9-NEXT:    s_add_u32 s5, s5, s9
-; GFX9-NEXT:    s_mul_hi_u32 s8, s4, s6
-; GFX9-NEXT:    s_addc_u32 s5, s7, s10
-; GFX9-NEXT:    s_addc_u32 s7, s8, 0
-; GFX9-NEXT:    s_mul_i32 s6, s4, s6
-; GFX9-NEXT:    s_add_u32 s5, s5, s6
-; GFX9-NEXT:    s_addc_u32 s6, 0, s7
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v0
+; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
+; GFX6-NEXT:    v_xor_b32_e32 v0, s8, v0
+; GFX6-NEXT:    v_xor_b32_e32 v1, s8, v1
+; GFX6-NEXT:    v_mov_b32_e32 v2, s8
+; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s8, v0
+; GFX6-NEXT:    s_mov_b32 s2, -1
+; GFX6-NEXT:    s_mov_b32 s1, s5
+; GFX6-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
+; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX6-NEXT:    s_endpgm
+;
+; GFX9-LABEL: sdiv_i64_oddk_denom:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_mov_b32 s4, 0x33fe64
+; GFX9-NEXT:    s_add_u32 s4, 0x396, s4
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x28100000
+; GFX9-NEXT:    s_addc_u32 s5, 0, 0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_addc_u32 s4, s4, s6
+; GFX9-NEXT:    s_addc_u32 s4, s5, 0xd95
 ; GFX9-NEXT:    v_readfirstlane_b32 s6, v0
 ; GFX9-NEXT:    s_mul_i32 s5, s4, 0xffed2705
 ; GFX9-NEXT:    s_mul_hi_u32 s7, s6, 0xffed2705
@@ -8440,6 +8236,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    s_mul_i32 s10, s4, s8
 ; GFX9-NEXT:    s_addc_u32 s8, 0, s11
 ; GFX9-NEXT:    s_add_u32 s6, s6, s10
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX9-NEXT:    s_mul_hi_u32 s7, s4, s5
 ; GFX9-NEXT:    s_addc_u32 s6, s8, s9
 ; GFX9-NEXT:    s_addc_u32 s7, s7, 0
@@ -8455,65 +8252,59 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    s_mov_b32 s5, s4
 ; GFX9-NEXT:    s_addc_u32 s3, s3, s4
 ; GFX9-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
-; GFX9-NEXT:    v_readfirstlane_b32 s8, v0
-; GFX9-NEXT:    s_mul_i32 s7, s2, s6
-; GFX9-NEXT:    s_mul_hi_u32 s9, s2, s8
-; GFX9-NEXT:    s_mul_hi_u32 s5, s2, s6
-; GFX9-NEXT:    s_add_u32 s7, s9, s7
-; GFX9-NEXT:    s_addc_u32 s5, 0, s5
-; GFX9-NEXT:    s_mul_hi_u32 s10, s3, s8
-; GFX9-NEXT:    s_mul_i32 s8, s3, s8
-; GFX9-NEXT:    s_add_u32 s7, s7, s8
-; GFX9-NEXT:    s_mul_hi_u32 s9, s3, s6
-; GFX9-NEXT:    s_addc_u32 s5, s5, s10
-; GFX9-NEXT:    s_addc_u32 s7, s9, 0
+; GFX9-NEXT:    v_readfirstlane_b32 s9, v0
+; GFX9-NEXT:    s_mul_i32 s8, s2, s6
+; GFX9-NEXT:    s_mul_hi_u32 s10, s2, s9
+; GFX9-NEXT:    s_mul_hi_u32 s7, s2, s6
+; GFX9-NEXT:    s_add_u32 s8, s10, s8
+; GFX9-NEXT:    s_addc_u32 s7, 0, s7
+; GFX9-NEXT:    s_mul_hi_u32 s11, s3, s9
+; GFX9-NEXT:    s_mul_i32 s9, s3, s9
+; GFX9-NEXT:    s_add_u32 s8, s8, s9
+; GFX9-NEXT:    s_mul_hi_u32 s10, s3, s6
+; GFX9-NEXT:    s_addc_u32 s7, s7, s11
+; GFX9-NEXT:    s_addc_u32 s8, s10, 0
 ; GFX9-NEXT:    s_mul_i32 s6, s3, s6
-; GFX9-NEXT:    s_add_u32 s5, s5, s6
-; GFX9-NEXT:    s_addc_u32 s6, 0, s7
-; GFX9-NEXT:    s_add_u32 s7, s5, 1
-; GFX9-NEXT:    s_addc_u32 s8, s6, 0
-; GFX9-NEXT:    s_add_u32 s9, s5, 2
-; GFX9-NEXT:    s_mul_i32 s12, s6, 0x12d8fb
-; GFX9-NEXT:    s_mul_hi_u32 s13, s5, 0x12d8fb
-; GFX9-NEXT:    s_addc_u32 s10, s6, 0
-; GFX9-NEXT:    s_add_i32 s13, s13, s12
-; GFX9-NEXT:    s_mul_i32 s12, s5, 0x12d8fb
-; GFX9-NEXT:    v_mov_b32_e32 v0, s12
+; GFX9-NEXT:    s_add_u32 s6, s7, s6
+; GFX9-NEXT:    s_addc_u32 s7, 0, s8
+; GFX9-NEXT:    s_add_u32 s8, s6, 1
+; GFX9-NEXT:    s_addc_u32 s9, s7, 0
+; GFX9-NEXT:    s_add_u32 s10, s6, 2
+; GFX9-NEXT:    s_mul_i32 s13, s7, 0x12d8fb
+; GFX9-NEXT:    s_mul_hi_u32 s14, s6, 0x12d8fb
+; GFX9-NEXT:    s_addc_u32 s11, s7, 0
+; GFX9-NEXT:    s_add_i32 s14, s14, s13
+; GFX9-NEXT:    s_mul_i32 s13, s6, 0x12d8fb
+; GFX9-NEXT:    v_mov_b32_e32 v0, s13
 ; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, s2, v0
-; GFX9-NEXT:    s_mov_b32 s11, 0x12d8fb
+; GFX9-NEXT:    s_mov_b32 s12, 0x12d8fb
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_subb_u32 s2, s3, s13
-; GFX9-NEXT:    v_subrev_co_u32_e32 v1, vcc, s11, v0
+; GFX9-NEXT:    s_subb_u32 s2, s3, s14
+; GFX9-NEXT:    v_subrev_co_u32_e32 v1, vcc, s12, v0
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
 ; GFX9-NEXT:    s_subb_u32 s3, s2, 0
-; GFX9-NEXT:    s_mov_b32 s11, 0x12d8fa
-; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v1
+; GFX9-NEXT:    v_readfirstlane_b32 s12, v1
+; GFX9-NEXT:    s_cmp_gt_u32 s12, 0x12d8fa
+; GFX9-NEXT:    s_cselect_b32 s12, -1, 0
 ; GFX9-NEXT:    s_cmp_eq_u32 s3, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
-; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, -1, v1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s7
-; GFX9-NEXT:    v_mov_b32_e32 v4, s9
-; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-NEXT:    v_mov_b32_e32 v4, s10
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s11, v0
+; GFX9-NEXT:    s_cselect_b32 s3, s12, -1
+; GFX9-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX9-NEXT:    s_cselect_b32 s3, s11, s9
+; GFX9-NEXT:    v_readfirstlane_b32 s9, v0
+; GFX9-NEXT:    s_cselect_b32 s8, s10, s8
+; GFX9-NEXT:    s_cmp_gt_u32 s9, 0x12d8fa
+; GFX9-NEXT:    s_cselect_b32 s9, -1, 0
 ; GFX9-NEXT:    s_cmp_eq_u32 s2, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, -1, vcc
-; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v4, s6
-; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v4, v3, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s5
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v1
-; GFX9-NEXT:    v_xor_b32_e32 v3, s4, v0
-; GFX9-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-NEXT:    v_subrev_co_u32_e32 v0, vcc, s4, v1
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v3, v4, vcc
+; GFX9-NEXT:    s_cselect_b32 s2, s9, -1
+; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX9-NEXT:    s_cselect_b32 s3, s3, s7
+; GFX9-NEXT:    s_cselect_b32 s2, s8, s6
+; GFX9-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX9-NEXT:    s_sub_u32 s2, s2, s4
+; GFX9-NEXT:    s_subb_u32 s3, s3, s4
+; GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %r = sdiv i64 %x, 1235195
@@ -8940,84 +8731,62 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out,
 ;
 ; GFX6-LABEL: ssdiv_v2i64_mixed_pow2k_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_mov_b32_e32 v0, 0x457ff000
-; GFX6-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0, v1
-; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
-; GFX6-NEXT:    s_movk_i32 s6, 0xf001
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_mov_b32 s2, 0x2ff2fc01
+; GFX6-NEXT:    v_bfrev_b32_e32 v0, 7
+; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_ashr_i32 s8, s1, 31
+; GFX6-NEXT:    s_ashr_i32 s8, s5, 31
 ; GFX6-NEXT:    s_lshr_b32 s8, s8, 20
-; GFX6-NEXT:    v_mul_hi_u32 v2, v0, s6
-; GFX6-NEXT:    v_mul_lo_u32 v3, v1, s6
-; GFX6-NEXT:    s_add_u32 s0, s0, s8
-; GFX6-NEXT:    s_addc_u32 s1, s1, 0
-; GFX6-NEXT:    s_ashr_i64 s[0:1], s[0:1], 12
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s6
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GFX6-NEXT:    v_mul_lo_u32 v4, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v3
+; GFX6-NEXT:    s_add_u32 s4, s4, s8
+; GFX6-NEXT:    s_addc_u32 s5, s5, 0
+; GFX6-NEXT:    s_ashr_i64 s[4:5], s[4:5], 12
+; GFX6-NEXT:    s_add_u32 s2, 0xe037f, s2
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
+; GFX6-NEXT:    s_addc_u32 s8, 0, 0
+; GFX6-NEXT:    s_or_b32 s2, vcc_lo, vcc_hi
+; GFX6-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX6-NEXT:    s_movk_i32 s2, 0xf001
+; GFX6-NEXT:    v_mul_hi_u32 v1, v0, s2
+; GFX6-NEXT:    s_addc_u32 s8, s8, 0x1000ff
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s2
+; GFX6-NEXT:    s_mul_i32 s9, s8, 0xfffff001
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s9, v1
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v0
+; GFX6-NEXT:    v_mul_lo_u32 v5, v0, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v3
-; GFX6-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GFX6-NEXT:    s_ashr_i32 s8, s3, 31
-; GFX6-NEXT:    s_add_u32 s2, s2, s8
-; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v5, v3, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, s6
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s6
+; GFX6-NEXT:    v_mul_hi_u32 v7, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v4, s8, v2
+; GFX6-NEXT:    v_mul_lo_u32 v2, s8, v2
+; GFX6-NEXT:    v_mul_hi_u32 v3, s8, v1
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
+; GFX6-NEXT:    v_mul_lo_u32 v1, s8, v1
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v6, v4, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
+; GFX6-NEXT:    v_mov_b32_e32 v3, s8
+; GFX6-NEXT:    s_ashr_i32 s8, s7, 31
+; GFX6-NEXT:    s_add_u32 s6, s6, s8
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    s_mov_b32 s9, s8
-; GFX6-NEXT:    s_addc_u32 s3, s3, s8
-; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], s[8:9]
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s6
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GFX6-NEXT:    v_mul_lo_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v0, v3
-; GFX6-NEXT:    v_mul_hi_u32 v8, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, v1, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, v1, v3
-; GFX6-NEXT:    v_mul_hi_u32 v4, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v7, v5, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, s2, v1
-; GFX6-NEXT:    v_mul_hi_u32 v3, s2, v0
-; GFX6-NEXT:    v_mul_hi_u32 v4, s2, v1
-; GFX6-NEXT:    v_mul_hi_u32 v5, s3, v1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s3, v1
+; GFX6-NEXT:    s_addc_u32 s7, s7, s8
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
+; GFX6-NEXT:    s_xor_b64 s[6:7], s[6:7], s[8:9]
+; GFX6-NEXT:    v_mul_lo_u32 v2, s6, v1
+; GFX6-NEXT:    v_mul_hi_u32 v3, s6, v0
+; GFX6-NEXT:    v_mul_hi_u32 v4, s6, v1
+; GFX6-NEXT:    v_mul_hi_u32 v5, s7, v1
+; GFX6-NEXT:    v_mul_lo_u32 v1, s7, v1
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v4, s3, v0
-; GFX6-NEXT:    v_mul_hi_u32 v0, s3, v0
+; GFX6-NEXT:    v_mul_lo_u32 v4, s7, v0
+; GFX6-NEXT:    v_mul_hi_u32 v0, s7, v0
 ; GFX6-NEXT:    s_movk_i32 s9, 0xfff
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
@@ -9031,20 +8800,20 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out,
 ; GFX6-NEXT:    v_add_i32_e32 v6, vcc, 2, v0
 ; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, 0, v1, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
-; GFX6-NEXT:    v_mov_b32_e32 v5, s3
-; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s2, v8
+; GFX6-NEXT:    v_mov_b32_e32 v5, s7
+; GFX6-NEXT:    v_sub_i32_e32 v8, vcc, s6, v8
 ; GFX6-NEXT:    v_subb_u32_e32 v4, vcc, v5, v4, vcc
 ; GFX6-NEXT:    v_subrev_i32_e32 v5, vcc, s9, v8
 ; GFX6-NEXT:    v_subbrev_u32_e32 v9, vcc, 0, v4, vcc
-; GFX6-NEXT:    s_movk_i32 s2, 0xffe
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v5
+; GFX6-NEXT:    s_movk_i32 s6, 0xffe
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
 ; GFX6-NEXT:    v_cndmask_b32_e32 v5, -1, v5, vcc
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s2, v8
+; GFX6-NEXT:    v_cmp_lt_u32_e32 vcc, s6, v8
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
 ; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v4, -1, v5, vcc
@@ -9056,57 +8825,30 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out,
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s8
 ; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s8, v0
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX6-NEXT:    v_mov_b32_e32 v0, s4
+; GFX6-NEXT:    v_mov_b32_e32 v1, s5
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: ssdiv_v2i64_mixed_pow2k_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x457ff000
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x4f800000
-; GFX9-NEXT:    v_mac_f32_e32 v1, 0, v2
-; GFX9-NEXT:    v_rcp_f32_e32 v1, v1
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
-; GFX9-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
-; GFX9-NEXT:    v_trunc_f32_e32 v2, v2
-; GFX9-NEXT:    v_madmk_f32 v1, v2, 0xcf800000, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s8, 0x2ff2fc01
+; GFX9-NEXT:    v_bfrev_b32_e32 v0, 7
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_ashr_i32 s0, s5, 31
 ; GFX9-NEXT:    s_lshr_b32 s0, s0, 20
 ; GFX9-NEXT:    s_add_u32 s0, s4, s0
-; GFX9-NEXT:    v_readfirstlane_b32 s4, v1
-; GFX9-NEXT:    v_readfirstlane_b32 s8, v2
 ; GFX9-NEXT:    s_addc_u32 s1, s5, 0
-; GFX9-NEXT:    s_mul_hi_u32 s5, s4, 0xfffff001
-; GFX9-NEXT:    s_mul_i32 s9, s8, 0xfffff001
-; GFX9-NEXT:    s_add_i32 s5, s5, s9
-; GFX9-NEXT:    s_sub_i32 s5, s5, s4
-; GFX9-NEXT:    s_mul_i32 s11, s4, 0xfffff001
 ; GFX9-NEXT:    s_ashr_i64 s[0:1], s[0:1], 12
-; GFX9-NEXT:    s_mul_hi_u32 s9, s4, s5
-; GFX9-NEXT:    s_mul_i32 s10, s4, s5
-; GFX9-NEXT:    s_mul_hi_u32 s4, s4, s11
-; GFX9-NEXT:    s_add_u32 s4, s4, s10
-; GFX9-NEXT:    s_addc_u32 s9, 0, s9
-; GFX9-NEXT:    s_mul_hi_u32 s12, s8, s11
-; GFX9-NEXT:    s_mul_i32 s11, s8, s11
-; GFX9-NEXT:    s_add_u32 s4, s4, s11
-; GFX9-NEXT:    s_mul_hi_u32 s10, s8, s5
-; GFX9-NEXT:    s_addc_u32 s4, s9, s12
-; GFX9-NEXT:    s_addc_u32 s9, s10, 0
-; GFX9-NEXT:    s_mul_i32 s5, s8, s5
-; GFX9-NEXT:    s_add_u32 s4, s4, s5
-; GFX9-NEXT:    s_addc_u32 s5, 0, s9
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, s4, v1
+; GFX9-NEXT:    s_add_u32 s4, 0xe037f, s8
+; GFX9-NEXT:    s_addc_u32 s5, 0, 0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_addc_u32 s4, s8, s5
-; GFX9-NEXT:    v_readfirstlane_b32 s8, v1
+; GFX9-NEXT:    s_addc_u32 s4, s5, 0x1000ff
+; GFX9-NEXT:    v_readfirstlane_b32 s8, v0
 ; GFX9-NEXT:    s_mul_i32 s5, s4, 0xfffff001
 ; GFX9-NEXT:    s_mul_hi_u32 s9, s8, 0xfffff001
 ; GFX9-NEXT:    s_add_i32 s9, s9, s5
@@ -9126,7 +8868,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out,
 ; GFX9-NEXT:    s_mul_i32 s5, s4, s5
 ; GFX9-NEXT:    s_add_u32 s5, s8, s5
 ; GFX9-NEXT:    s_addc_u32 s8, 0, s9
-; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, s5, v1
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v0
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
 ; GFX9-NEXT:    s_addc_u32 s8, s4, s8
 ; GFX9-NEXT:    s_ashr_i32 s4, s7, 31
@@ -9134,68 +8876,61 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out,
 ; GFX9-NEXT:    s_mov_b32 s5, s4
 ; GFX9-NEXT:    s_addc_u32 s7, s7, s4
 ; GFX9-NEXT:    s_xor_b64 s[6:7], s[6:7], s[4:5]
-; GFX9-NEXT:    v_readfirstlane_b32 s10, v1
-; GFX9-NEXT:    s_mul_i32 s9, s6, s8
-; GFX9-NEXT:    s_mul_hi_u32 s11, s6, s10
-; GFX9-NEXT:    s_mul_hi_u32 s5, s6, s8
-; GFX9-NEXT:    s_add_u32 s9, s11, s9
-; GFX9-NEXT:    s_addc_u32 s5, 0, s5
-; GFX9-NEXT:    s_mul_hi_u32 s12, s7, s10
-; GFX9-NEXT:    s_mul_i32 s10, s7, s10
-; GFX9-NEXT:    s_add_u32 s9, s9, s10
-; GFX9-NEXT:    s_mul_hi_u32 s11, s7, s8
-; GFX9-NEXT:    s_addc_u32 s5, s5, s12
-; GFX9-NEXT:    s_addc_u32 s9, s11, 0
+; GFX9-NEXT:    v_readfirstlane_b32 s11, v0
+; GFX9-NEXT:    s_mul_i32 s10, s6, s8
+; GFX9-NEXT:    s_mul_hi_u32 s12, s6, s11
+; GFX9-NEXT:    s_mul_hi_u32 s9, s6, s8
+; GFX9-NEXT:    s_add_u32 s10, s12, s10
+; GFX9-NEXT:    s_addc_u32 s9, 0, s9
+; GFX9-NEXT:    s_mul_hi_u32 s13, s7, s11
+; GFX9-NEXT:    s_mul_i32 s11, s7, s11
+; GFX9-NEXT:    s_add_u32 s10, s10, s11
+; GFX9-NEXT:    s_mul_hi_u32 s12, s7, s8
+; GFX9-NEXT:    s_addc_u32 s9, s9, s13
+; GFX9-NEXT:    s_addc_u32 s10, s12, 0
 ; GFX9-NEXT:    s_mul_i32 s8, s7, s8
-; GFX9-NEXT:    s_add_u32 s5, s5, s8
-; GFX9-NEXT:    s_addc_u32 s8, 0, s9
-; GFX9-NEXT:    s_add_u32 s9, s5, 1
-; GFX9-NEXT:    s_addc_u32 s10, s8, 0
-; GFX9-NEXT:    s_add_u32 s11, s5, 2
-; GFX9-NEXT:    s_mul_i32 s14, s8, 0xfff
-; GFX9-NEXT:    s_mul_hi_u32 s15, s5, 0xfff
-; GFX9-NEXT:    s_addc_u32 s12, s8, 0
-; GFX9-NEXT:    s_add_i32 s15, s15, s14
-; GFX9-NEXT:    s_mul_i32 s14, s5, 0xfff
-; GFX9-NEXT:    v_mov_b32_e32 v1, s14
-; GFX9-NEXT:    v_sub_co_u32_e32 v1, vcc, s6, v1
-; GFX9-NEXT:    s_movk_i32 s13, 0xfff
+; GFX9-NEXT:    s_add_u32 s8, s9, s8
+; GFX9-NEXT:    s_addc_u32 s9, 0, s10
+; GFX9-NEXT:    s_add_u32 s10, s8, 1
+; GFX9-NEXT:    s_addc_u32 s11, s9, 0
+; GFX9-NEXT:    s_add_u32 s12, s8, 2
+; GFX9-NEXT:    s_mul_i32 s15, s9, 0xfff
+; GFX9-NEXT:    s_mul_hi_u32 s16, s8, 0xfff
+; GFX9-NEXT:    s_addc_u32 s13, s9, 0
+; GFX9-NEXT:    s_add_i32 s16, s16, s15
+; GFX9-NEXT:    s_mul_i32 s15, s8, 0xfff
+; GFX9-NEXT:    v_mov_b32_e32 v0, s15
+; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, s6, v0
+; GFX9-NEXT:    s_movk_i32 s14, 0xfff
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_subb_u32 s6, s7, s15
-; GFX9-NEXT:    v_subrev_co_u32_e32 v2, vcc, s13, v1
+; GFX9-NEXT:    s_subb_u32 s6, s7, s16
+; GFX9-NEXT:    v_subrev_co_u32_e32 v1, vcc, s14, v0
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
 ; GFX9-NEXT:    s_subb_u32 s7, s6, 0
-; GFX9-NEXT:    s_movk_i32 s13, 0xffe
-; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s13, v2
+; GFX9-NEXT:    v_readfirstlane_b32 s14, v1
+; GFX9-NEXT:    s_cmpk_gt_u32 s14, 0xffe
+; GFX9-NEXT:    s_cselect_b32 s14, -1, 0
 ; GFX9-NEXT:    s_cmp_eq_u32 s7, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, -1, vcc
-; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, -1, v2, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-NEXT:    v_mov_b32_e32 v4, s11
-; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s10
-; GFX9-NEXT:    v_mov_b32_e32 v4, s12
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, s13, v1
+; GFX9-NEXT:    s_cselect_b32 s7, s14, -1
+; GFX9-NEXT:    s_cmp_lg_u32 s7, 0
+; GFX9-NEXT:    s_cselect_b32 s7, s13, s11
+; GFX9-NEXT:    v_readfirstlane_b32 s11, v0
+; GFX9-NEXT:    s_cselect_b32 s10, s12, s10
+; GFX9-NEXT:    s_cmpk_gt_u32 s11, 0xffe
+; GFX9-NEXT:    s_cselect_b32 s11, -1, 0
 ; GFX9-NEXT:    s_cmp_eq_u32 s6, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc
-; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, -1, v1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v4, s8
-; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v3, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s5
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v2, s4, v2
-; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v1
-; GFX9-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-NEXT:    v_subrev_co_u32_e32 v3, vcc, s4, v2
-; GFX9-NEXT:    v_subb_co_u32_e32 v4, vcc, v1, v4, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v1, s0
-; GFX9-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3]
+; GFX9-NEXT:    s_cselect_b32 s6, s11, -1
+; GFX9-NEXT:    s_cmp_lg_u32 s6, 0
+; GFX9-NEXT:    s_cselect_b32 s7, s7, s9
+; GFX9-NEXT:    s_cselect_b32 s6, s10, s8
+; GFX9-NEXT:    s_xor_b64 s[6:7], s[6:7], s[4:5]
+; GFX9-NEXT:    s_sub_u32 s5, s6, s4
+; GFX9-NEXT:    s_subb_u32 s4, s7, s4
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    v_mov_b32_e32 v2, s5
+; GFX9-NEXT:    v_mov_b32_e32 v3, s4
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
 ; GFX9-NEXT:    s_endpgm
   %r = sdiv <2 x i64> %x, <i64 4096, i64 4095>
   store <2 x i64> %r, ptr addrspace(1) %out
@@ -9780,66 +9515,43 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ;
 ; GFX6-LABEL: srem_i64_oddk_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    v_mov_b32_e32 v0, 0x4f800000
-; GFX6-NEXT:    v_madak_f32 v0, 0, v0, 0x4996c7d8
-; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX6-NEXT:    s_mov_b32 s8, 0xffed2705
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
-; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT:    s_mov_b32 s2, -1
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, s8
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s8
-; GFX6-NEXT:    v_mul_lo_u32 v4, v0, s8
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, v4
-; GFX6-NEXT:    v_mul_lo_u32 v5, v0, v2
+; GFX6-NEXT:    s_mov_b32 s0, 0x33fe64
+; GFX6-NEXT:    s_add_u32 s0, 0x396, s0
+; GFX6-NEXT:    v_mov_b32_e32 v0, 0x28100000
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; GFX6-NEXT:    s_addc_u32 s1, 0, 0
+; GFX6-NEXT:    s_or_b32 s0, vcc_lo, vcc_hi
+; GFX6-NEXT:    s_cmp_lg_u32 s0, 0
+; GFX6-NEXT:    s_mov_b32 s0, 0xffed2705
+; GFX6-NEXT:    v_mul_hi_u32 v1, v0, s0
+; GFX6-NEXT:    s_addc_u32 s1, s1, 0xd95
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s0
+; GFX6-NEXT:    s_mul_i32 s8, s1, 0xffed2705
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s8, v1
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v0
+; GFX6-NEXT:    v_mul_lo_u32 v5, v0, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v4
-; GFX6-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v5, v4, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, s8
-; GFX6-NEXT:    v_mul_hi_u32 v3, v0, s8
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s8
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GFX6-NEXT:    v_mul_lo_u32 v6, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v7, v0, v3
-; GFX6-NEXT:    v_mul_hi_u32 v8, v0, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, v1, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, v1, v3
-; GFX6-NEXT:    v_mul_hi_u32 v4, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v7, v5, vcc
-; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GFX6-NEXT:    v_mul_hi_u32 v7, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v4, s1, v2
+; GFX6-NEXT:    v_mul_lo_u32 v2, s1, v2
+; GFX6-NEXT:    v_mul_hi_u32 v3, s1, v1
+; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
+; GFX6-NEXT:    v_mul_lo_u32 v1, s1, v1
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v6, v4, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_ashr_i32 s8, s7, 31
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
 ; GFX6-NEXT:    s_add_u32 s0, s6, s8
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX6-NEXT:    v_mov_b32_e32 v3, s1
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; GFX6-NEXT:    s_mov_b32 s9, s8
 ; GFX6-NEXT:    s_addc_u32 s1, s7, s8
-; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
 ; GFX6-NEXT:    s_xor_b64 s[6:7], s[0:1], s[8:9]
 ; GFX6-NEXT:    v_mul_lo_u32 v2, s6, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v3, s6, v0
@@ -9860,7 +9572,8 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_mul_lo_u32 v1, v1, s4
 ; GFX6-NEXT:    v_mul_hi_u32 v2, v0, s4
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s4
-; GFX6-NEXT:    s_mov_b32 s1, s5
+; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s7
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s6, v0
@@ -9888,48 +9601,20 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_xor_b32_e32 v1, s8, v1
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s8, v0
+; GFX6-NEXT:    s_mov_b32 s1, s5
 ; GFX6-NEXT:    v_subb_u32_e32 v1, vcc, v1, v2, vcc
 ; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: srem_i64_oddk_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0x4996c7d8
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4f800000
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0, v1
-; GFX9-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT:    v_readfirstlane_b32 s4, v1
-; GFX9-NEXT:    v_readfirstlane_b32 s5, v0
-; GFX9-NEXT:    s_mul_hi_u32 s6, s5, 0xffed2705
-; GFX9-NEXT:    s_mul_i32 s7, s4, 0xffed2705
-; GFX9-NEXT:    s_add_i32 s6, s6, s7
-; GFX9-NEXT:    s_sub_i32 s6, s6, s5
-; GFX9-NEXT:    s_mul_i32 s9, s5, 0xffed2705
-; GFX9-NEXT:    s_mul_hi_u32 s7, s5, s6
-; GFX9-NEXT:    s_mul_i32 s8, s5, s6
-; GFX9-NEXT:    s_mul_hi_u32 s5, s5, s9
-; GFX9-NEXT:    s_add_u32 s5, s5, s8
-; GFX9-NEXT:    s_addc_u32 s7, 0, s7
-; GFX9-NEXT:    s_mul_hi_u32 s10, s4, s9
-; GFX9-NEXT:    s_mul_i32 s9, s4, s9
-; GFX9-NEXT:    s_add_u32 s5, s5, s9
-; GFX9-NEXT:    s_mul_hi_u32 s8, s4, s6
-; GFX9-NEXT:    s_addc_u32 s5, s7, s10
-; GFX9-NEXT:    s_addc_u32 s7, s8, 0
-; GFX9-NEXT:    s_mul_i32 s6, s4, s6
-; GFX9-NEXT:    s_add_u32 s5, s5, s6
-; GFX9-NEXT:    s_addc_u32 s6, 0, s7
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s5, v0
+; GFX9-NEXT:    s_mov_b32 s4, 0x33fe64
+; GFX9-NEXT:    s_add_u32 s4, 0x396, s4
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0x28100000
+; GFX9-NEXT:    s_addc_u32 s5, 0, 0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v0
 ; GFX9-NEXT:    s_cmp_lg_u64 vcc, 0
-; GFX9-NEXT:    s_addc_u32 s4, s4, s6
+; GFX9-NEXT:    s_addc_u32 s4, s5, 0xd95
 ; GFX9-NEXT:    v_readfirstlane_b32 s6, v0
 ; GFX9-NEXT:    s_mul_i32 s5, s4, 0xffed2705
 ; GFX9-NEXT:    s_mul_hi_u32 s7, s6, 0xffed2705
@@ -9944,6 +9629,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    s_mul_i32 s10, s4, s8
 ; GFX9-NEXT:    s_addc_u32 s8, 0, s11
 ; GFX9-NEXT:    s_add_u32 s6, s6, s10
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX9-NEXT:    s_mul_hi_u32 s7, s4, s5
 ; GFX9-NEXT:    s_addc_u32 s6, s8, s9
 ; GFX9-NEXT:    s_addc_u32 s7, s7, 0
@@ -10013,6 +9699,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s4
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v0, vcc, s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index e554f912ff64886..5d1db03a1a35bbd 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -2520,59 +2520,38 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
 ; SI-LABEL: v_test_udiv64_mulhi_fold:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v2, 0x4f800000
-; SI-NEXT:    v_madak_f32 v2, 0, v2, 0x47c35000
-; SI-NEXT:    v_rcp_f32_e32 v2, v2
+; SI-NEXT:    s_mov_b32 s4, 0x346d900
+; SI-NEXT:    s_add_u32 s4, 0x4237, s4
+; SI-NEXT:    v_mov_b32_e32 v2, 0xa9000000
+; SI-NEXT:    v_add_i32_e32 v2, vcc, s4, v2
+; SI-NEXT:    s_addc_u32 s5, 0, 0
+; SI-NEXT:    s_or_b32 s4, vcc_lo, vcc_hi
+; SI-NEXT:    s_cmp_lg_u32 s4, 0
 ; SI-NEXT:    s_mov_b32 s4, 0xfffe7960
-; SI-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; SI-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
-; SI-NEXT:    v_trunc_f32_e32 v3, v3
-; SI-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
-; SI-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; SI-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; SI-NEXT:    v_mul_hi_u32 v4, v2, s4
-; SI-NEXT:    v_mul_lo_u32 v6, v3, s4
-; SI-NEXT:    v_mul_lo_u32 v5, v2, s4
-; SI-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
-; SI-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; SI-NEXT:    v_mul_hi_u32 v7, v2, v5
-; SI-NEXT:    v_mul_lo_u32 v6, v2, v4
-; SI-NEXT:    v_mul_hi_u32 v8, v2, v4
-; SI-NEXT:    v_mul_hi_u32 v9, v3, v4
-; SI-NEXT:    v_mul_lo_u32 v4, v3, v4
-; SI-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; SI-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
-; SI-NEXT:    v_mul_lo_u32 v8, v3, v5
-; SI-NEXT:    v_mul_hi_u32 v5, v3, v5
-; SI-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; SI-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
-; SI-NEXT:    v_addc_u32_e32 v6, vcc, 0, v9, vcc
-; SI-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; SI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; SI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
-; SI-NEXT:    v_mul_hi_u32 v4, v2, s4
-; SI-NEXT:    v_mul_lo_u32 v5, v3, s4
-; SI-NEXT:    v_mul_lo_u32 v6, v2, s4
+; SI-NEXT:    v_mul_hi_u32 v3, v2, s4
+; SI-NEXT:    v_mul_lo_u32 v4, v2, s4
+; SI-NEXT:    s_addc_u32 s5, s5, 0xa7c5
+; SI-NEXT:    s_mul_i32 s6, s5, 0xfffe7960
+; SI-NEXT:    v_sub_i32_e32 v3, vcc, v3, v2
+; SI-NEXT:    v_add_i32_e32 v3, vcc, s6, v3
+; SI-NEXT:    v_mul_lo_u32 v5, v2, v3
+; SI-NEXT:    v_mul_hi_u32 v6, v2, v4
+; SI-NEXT:    v_mul_hi_u32 v7, v2, v3
+; SI-NEXT:    v_mul_hi_u32 v8, s5, v3
+; SI-NEXT:    v_mul_lo_u32 v3, s5, v3
+; SI-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
+; SI-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
+; SI-NEXT:    v_mul_lo_u32 v7, s5, v4
+; SI-NEXT:    v_mul_hi_u32 v4, s5, v4
 ; SI-NEXT:    s_mov_b32 s4, 0x186a0
-; SI-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
-; SI-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; SI-NEXT:    v_mul_lo_u32 v5, v2, v4
-; SI-NEXT:    v_mul_hi_u32 v7, v2, v6
-; SI-NEXT:    v_mul_hi_u32 v8, v2, v4
-; SI-NEXT:    v_mul_hi_u32 v9, v3, v4
-; SI-NEXT:    v_mul_lo_u32 v4, v3, v4
-; SI-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; SI-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
-; SI-NEXT:    v_mul_lo_u32 v8, v3, v6
-; SI-NEXT:    v_mul_hi_u32 v6, v3, v6
-; SI-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; SI-NEXT:    v_addc_u32_e32 v5, vcc, v7, v6, vcc
-; SI-NEXT:    v_addc_u32_e32 v6, vcc, 0, v9, vcc
-; SI-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; SI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; SI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; SI-NEXT:    v_add_i32_e32 v5, vcc, v5, v7
+; SI-NEXT:    v_addc_u32_e32 v4, vcc, v6, v4, vcc
+; SI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v8, vcc
+; SI-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
+; SI-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
+; SI-NEXT:    v_mov_b32_e32 v5, s5
+; SI-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
+; SI-NEXT:    v_addc_u32_e32 v3, vcc, v5, v4, vcc
 ; SI-NEXT:    v_mul_lo_u32 v4, v0, v3
 ; SI-NEXT:    v_mul_hi_u32 v5, v0, v2
 ; SI-NEXT:    v_mul_hi_u32 v6, v0, v3
@@ -2687,39 +2666,20 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
 ; GCN-LABEL: v_test_udiv64_mulhi_fold:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x4f800000
-; GCN-NEXT:    v_madak_f32 v2, 0, v2, 0x47c35000
-; GCN-NEXT:    v_rcp_f32_e32 v2, v2
+; GCN-NEXT:    v_mov_b32_e32 v4, 0xa7c5
+; GCN-NEXT:    v_mul_u32_u24_e32 v3, 0x500, v4
+; GCN-NEXT:    v_mul_hi_u32_u24_e32 v2, 0x500, v4
+; GCN-NEXT:    v_add_u32_e32 v3, vcc, 0x4237, v3
+; GCN-NEXT:    v_addc_u32_e32 v5, vcc, 0, v2, vcc
+; GCN-NEXT:    v_add_u32_e32 v6, vcc, 0xa9000000, v3
 ; GCN-NEXT:    s_mov_b32 s6, 0xfffe7960
-; GCN-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; GCN-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
-; GCN-NEXT:    v_trunc_f32_e32 v3, v3
-; GCN-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
-; GCN-NEXT:    v_cvt_u32_f32_e32 v6, v2
-; GCN-NEXT:    v_cvt_u32_f32_e32 v7, v3
-; GCN-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
-; GCN-NEXT:    v_mul_lo_u32 v4, v7, s6
-; GCN-NEXT:    v_sub_u32_e32 v3, vcc, v3, v6
-; GCN-NEXT:    v_add_u32_e32 v8, vcc, v3, v4
-; GCN-NEXT:    v_mul_hi_u32 v5, v6, v2
-; GCN-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0
-; GCN-NEXT:    v_add_u32_e32 v9, vcc, v5, v3
-; GCN-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0
-; GCN-NEXT:    v_addc_u32_e32 v10, vcc, 0, v4, vcc
-; GCN-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0
-; GCN-NEXT:    v_add_u32_e32 v2, vcc, v9, v2
-; GCN-NEXT:    v_addc_u32_e32 v2, vcc, v10, v3, vcc
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v5, vcc
-; GCN-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; GCN-NEXT:    v_add_u32_e32 v6, vcc, v6, v2
-; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v7, v3, vcc
 ; GCN-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
+; GCN-NEXT:    v_addc_u32_e32 v7, vcc, v5, v4, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v4, v7, s6
 ; GCN-NEXT:    v_sub_u32_e32 v3, vcc, v3, v6
+; GCN-NEXT:    v_mul_hi_u32 v8, v6, v2
 ; GCN-NEXT:    v_add_u32_e32 v5, vcc, v4, v3
 ; GCN-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0
-; GCN-NEXT:    v_mul_hi_u32 v8, v6, v2
 ; GCN-NEXT:    v_add_u32_e32 v8, vcc, v8, v3
 ; GCN-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0
 ; GCN-NEXT:    v_addc_u32_e32 v9, vcc, 0, v4, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index e23f3cfad89bc88..7aa36a8b377bff5 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -1326,64 +1326,37 @@ define i64 @v_test_udiv_pow2_k_den_i64(i64 %x) {
 define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x) {
 ; GCN-LABEL: s_test_udiv_k_den_i64:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_mov_b32_e32 v0, 0x4f800000
-; GCN-NEXT:    v_madak_f32 v0, 0, v0, 0x41c00000
-; GCN-NEXT:    v_rcp_f32_e32 v0, v0
-; GCN-NEXT:    s_movk_i32 s8, 0xffe8
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GCN-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_add_u32 s1, 0, 0xaaaa0000
+; GCN-NEXT:    v_mov_b32_e32 v0, 0xffffffe8
+; GCN-NEXT:    v_mul_hi_u32 v0, s1, v0
+; GCN-NEXT:    s_addc_u32 s8, 0, 42
+; GCN-NEXT:    s_add_i32 s8, s8, 0xaaaaa80
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b32 s0, s4
-; GCN-NEXT:    v_mul_hi_u32 v2, v0, s8
-; GCN-NEXT:    v_mul_lo_u32 v4, v1, s8
-; GCN-NEXT:    v_mul_lo_u32 v3, v0, s8
-; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GCN-NEXT:    v_mul_hi_u32 v5, v0, v3
-; GCN-NEXT:    v_mul_lo_u32 v4, v0, v2
-; GCN-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GCN-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GCN-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GCN-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GCN-NEXT:    v_mul_lo_u32 v6, v1, v3
-; GCN-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v5, v3, vcc
-; GCN-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GCN-NEXT:    v_mul_hi_u32 v2, v0, s8
-; GCN-NEXT:    v_mul_lo_u32 v3, v1, s8
-; GCN-NEXT:    v_mul_lo_u32 v4, v0, s8
-; GCN-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GCN-NEXT:    v_mul_lo_u32 v3, v0, v2
-; GCN-NEXT:    v_mul_hi_u32 v5, v0, v4
-; GCN-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GCN-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GCN-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GCN-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; GCN-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GCN-NEXT:    v_mul_lo_u32 v6, v1, v4
-; GCN-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v5, v4, vcc
-; GCN-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GCN-NEXT:    s_mul_i32 s4, s1, 0xffffffe8
+; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s1, v0
+; GCN-NEXT:    s_mul_i32 s9, s8, 0xffffffe8
+; GCN-NEXT:    v_mov_b32_e32 v1, s4
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, s9, v0
+; GCN-NEXT:    v_mul_hi_u32 v2, s8, v1
+; GCN-NEXT:    v_mul_lo_u32 v3, s1, v0
+; GCN-NEXT:    v_mul_hi_u32 v1, s1, v1
+; GCN-NEXT:    v_mul_hi_u32 v4, s1, v0
+; GCN-NEXT:    s_mul_i32 s4, s8, s4
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
 ; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GCN-NEXT:    v_mul_hi_u32 v4, s8, v0
+; GCN-NEXT:    v_mul_lo_u32 v0, s8, v0
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, s4, v1
+; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
+; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v4, vcc
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GCN-NEXT:    v_mov_b32_e32 v2, s8
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, s1, v0
+; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v2, s6, v1
 ; GCN-NEXT:    v_mul_hi_u32 v3, s6, v0
 ; GCN-NEXT:    v_mul_hi_u32 v4, s6, v1
@@ -1393,6 +1366,8 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v4, s7, v0
 ; GCN-NEXT:    v_mul_hi_u32 v0, s7, v0
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s1, s5
 ; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GCN-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
@@ -1502,58 +1477,33 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) {
 ; GCN-LABEL: v_test_udiv_k_den_i64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x4f800000
-; GCN-NEXT:    v_madak_f32 v2, 0, v2, 0x41c00000
-; GCN-NEXT:    v_rcp_f32_e32 v2, v2
-; GCN-NEXT:    s_movk_i32 s4, 0xffe8
-; GCN-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
-; GCN-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
-; GCN-NEXT:    v_trunc_f32_e32 v3, v3
-; GCN-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
-; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GCN-NEXT:    v_mul_hi_u32 v4, v2, s4
-; GCN-NEXT:    v_mul_lo_u32 v6, v3, s4
-; GCN-NEXT:    v_mul_lo_u32 v5, v2, s4
-; GCN-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GCN-NEXT:    v_mul_hi_u32 v7, v2, v5
-; GCN-NEXT:    v_mul_lo_u32 v6, v2, v4
-; GCN-NEXT:    v_mul_hi_u32 v8, v2, v4
-; GCN-NEXT:    v_mul_hi_u32 v9, v3, v4
-; GCN-NEXT:    v_mul_lo_u32 v4, v3, v4
-; GCN-NEXT:    v_add_i32_e32 v6, vcc, v7, v6
-; GCN-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
-; GCN-NEXT:    v_mul_lo_u32 v8, v3, v5
-; GCN-NEXT:    v_mul_hi_u32 v5, v3, v5
-; GCN-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
-; GCN-NEXT:    v_addc_u32_e32 v5, vcc, v7, v5, vcc
-; GCN-NEXT:    v_addc_u32_e32 v6, vcc, 0, v9, vcc
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GCN-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
-; GCN-NEXT:    v_mul_hi_u32 v4, v2, s4
-; GCN-NEXT:    v_mul_lo_u32 v5, v3, s4
-; GCN-NEXT:    v_mul_lo_u32 v6, v2, s4
-; GCN-NEXT:    v_sub_i32_e32 v4, vcc, v4, v2
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GCN-NEXT:    v_mul_lo_u32 v5, v2, v4
-; GCN-NEXT:    v_mul_hi_u32 v7, v2, v6
-; GCN-NEXT:    v_mul_hi_u32 v8, v2, v4
-; GCN-NEXT:    v_mul_hi_u32 v9, v3, v4
-; GCN-NEXT:    v_mul_lo_u32 v4, v3, v4
-; GCN-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
-; GCN-NEXT:    v_addc_u32_e32 v7, vcc, 0, v8, vcc
-; GCN-NEXT:    v_mul_lo_u32 v8, v3, v6
-; GCN-NEXT:    v_mul_hi_u32 v6, v3, v6
-; GCN-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
-; GCN-NEXT:    v_addc_u32_e32 v5, vcc, v7, v6, vcc
-; GCN-NEXT:    v_addc_u32_e32 v6, vcc, 0, v9, vcc
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
+; GCN-NEXT:    s_add_u32 s4, 0, 0xaaaa0000
+; GCN-NEXT:    v_mov_b32_e32 v2, 0xffffffe8
+; GCN-NEXT:    v_mul_hi_u32 v2, s4, v2
+; GCN-NEXT:    s_addc_u32 s5, 0, 42
+; GCN-NEXT:    s_add_i32 s5, s5, 0xaaaaa80
+; GCN-NEXT:    s_mul_i32 s6, s4, 0xffffffe8
+; GCN-NEXT:    v_subrev_i32_e32 v2, vcc, s4, v2
+; GCN-NEXT:    s_mul_i32 s7, s5, 0xffffffe8
+; GCN-NEXT:    v_mov_b32_e32 v3, s6
+; GCN-NEXT:    v_add_i32_e32 v2, vcc, s7, v2
+; GCN-NEXT:    v_mul_hi_u32 v4, s5, v3
+; GCN-NEXT:    v_mul_lo_u32 v5, s4, v2
+; GCN-NEXT:    v_mul_hi_u32 v3, s4, v3
+; GCN-NEXT:    v_mul_hi_u32 v6, s4, v2
+; GCN-NEXT:    s_mul_i32 s6, s5, s6
+; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
 ; GCN-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; GCN-NEXT:    v_mul_hi_u32 v6, s5, v2
+; GCN-NEXT:    v_mul_lo_u32 v2, s5, v2
+; GCN-NEXT:    v_add_i32_e32 v3, vcc, s6, v3
+; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v5, v4, vcc
+; GCN-NEXT:    v_addc_u32_e32 v4, vcc, 0, v6, vcc
+; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
+; GCN-NEXT:    v_mov_b32_e32 v4, s5
+; GCN-NEXT:    v_add_i32_e32 v2, vcc, s4, v2
+; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v4, v3, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v4, v0, v3
 ; GCN-NEXT:    v_mul_hi_u32 v5, v0, v2
 ; GCN-NEXT:    v_mul_hi_u32 v6, v0, v3
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index f68d14a32b929a5..91d09c01639ffcb 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -894,64 +894,36 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
 define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x) {
 ; GCN-LABEL: s_test_urem_k_den_i64:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    v_mov_b32_e32 v0, 0x4f800000
-; GCN-NEXT:    v_madak_f32 v0, 0, v0, 0x41c00000
-; GCN-NEXT:    v_rcp_f32_e32 v0, v0
-; GCN-NEXT:    s_movk_i32 s2, 0xffe8
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
+; GCN-NEXT:    s_add_u32 s0, 0, 0xaaaa0000
+; GCN-NEXT:    v_mov_b32_e32 v0, 0xffffffe8
+; GCN-NEXT:    v_mul_hi_u32 v0, s0, v0
+; GCN-NEXT:    s_addc_u32 s1, 0, 42
+; GCN-NEXT:    s_add_i32 s1, s1, 0xaaaaa80
+; GCN-NEXT:    s_mul_i32 s8, s0, 0xffffffe8
+; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
+; GCN-NEXT:    s_mul_i32 s9, s1, 0xffffffe8
+; GCN-NEXT:    v_mov_b32_e32 v1, s8
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, s9, v0
+; GCN-NEXT:    v_mul_hi_u32 v2, s1, v1
+; GCN-NEXT:    v_mul_lo_u32 v3, s0, v0
+; GCN-NEXT:    v_mul_hi_u32 v1, s0, v1
+; GCN-NEXT:    v_mul_hi_u32 v4, s0, v0
+; GCN-NEXT:    s_mul_i32 s8, s1, s8
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
-; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
-; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s0, s4
-; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    v_mul_hi_u32 v2, v0, s2
-; GCN-NEXT:    v_mul_lo_u32 v4, v1, s2
-; GCN-NEXT:    v_mul_lo_u32 v3, v0, s2
-; GCN-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GCN-NEXT:    v_mul_hi_u32 v5, v0, v3
-; GCN-NEXT:    v_mul_lo_u32 v4, v0, v2
-; GCN-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GCN-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GCN-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
-; GCN-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GCN-NEXT:    v_mul_lo_u32 v6, v1, v3
-; GCN-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GCN-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v5, v3, vcc
-; GCN-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
 ; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GCN-NEXT:    v_mul_hi_u32 v2, v0, s2
-; GCN-NEXT:    v_mul_lo_u32 v3, v1, s2
-; GCN-NEXT:    v_mul_lo_u32 v4, v0, s2
-; GCN-NEXT:    s_mov_b32 s2, -1
-; GCN-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GCN-NEXT:    v_mul_lo_u32 v3, v0, v2
-; GCN-NEXT:    v_mul_hi_u32 v5, v0, v4
-; GCN-NEXT:    v_mul_hi_u32 v6, v0, v2
-; GCN-NEXT:    v_mul_hi_u32 v7, v1, v2
-; GCN-NEXT:    v_mul_lo_u32 v2, v1, v2
-; GCN-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
-; GCN-NEXT:    v_addc_u32_e32 v5, vcc, 0, v6, vcc
-; GCN-NEXT:    v_mul_lo_u32 v6, v1, v4
-; GCN-NEXT:    v_mul_hi_u32 v4, v1, v4
-; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, v5, v4, vcc
-; GCN-NEXT:    v_addc_u32_e32 v4, vcc, 0, v7, vcc
-; GCN-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
-; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GCN-NEXT:    v_mul_hi_u32 v4, s1, v0
+; GCN-NEXT:    v_mul_lo_u32 v0, s1, v0
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, s8, v1
+; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v3, v2, vcc
+; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v4, vcc
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
+; GCN-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
+; GCN-NEXT:    v_mov_b32_e32 v2, s1
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
+; GCN-NEXT:    v_addc_u32_e32 v1, vcc, v2, v1, vcc
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_mul_lo_u32 v2, s6, v1
 ; GCN-NEXT:    v_mul_hi_u32 v3, s6, v0
 ; GCN-NEXT:    v_mul_hi_u32 v4, s6, v1
@@ -961,6 +933,8 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
 ; GCN-NEXT:    v_mul_lo_u32 v4, s7, v0
 ; GCN-NEXT:    v_mul_hi_u32 v0, s7, v0
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GCN-NEXT:    v_addc_u32_e32 v0, vcc, v3, v0, vcc
 ; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
@@ -969,6 +943,7 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_mul_lo_u32 v1, v1, 24
 ; GCN-NEXT:    v_mul_hi_u32 v2, v0, 24
 ; GCN-NEXT:    v_mul_lo_u32 v0, v0, 24
+; GCN-NEXT:    s_mov_b32 s1, s5
 ; GCN-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; GCN-NEXT:    v_mov_b32_e32 v2, s7
 ; GCN-NEXT:    v_sub_i32_e32 v0, vcc, s6, v0

>From 6959332804579b0e9273c89ecb2d2cdbaa73d04d Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Thu, 19 Oct 2023 10:16:20 -0400
Subject: [PATCH 03/76] [DIAG][msan] fix libc check string for dladdr1 call
 (#69359)

The check for dladdr1 for shared libc is too strict. Depending on how the system is setup we sometimes pick up the none generic lib name with the version string in it.

Update check to for libc to account for version string.
---
 compiler-rt/test/msan/dladdr1_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/msan/dladdr1_test.c b/compiler-rt/test/msan/dladdr1_test.c
index dc5f6fdf9f91728..534f5f9cc193a2f 100644
--- a/compiler-rt/test/msan/dladdr1_test.c
+++ b/compiler-rt/test/msan/dladdr1_test.c
@@ -60,7 +60,7 @@ int main(int argc, char *argv[]) {
       fflush(stdout);
       map_ptr = map_ptr->l_next;
     }
-    // CHECK: libc.so
+    // CHECK: libc{{[\-]*.*}}.so
     // CHECK: dladdr1_test
   }
 

>From 3fd5113cbac9ccb68ca52115f23074f2feb22ac4 Mon Sep 17 00:00:00 2001
From: lntue <35648136+lntue at users.noreply.github.com>
Date: Thu, 19 Oct 2023 10:30:11 -0400
Subject: [PATCH 04/76] [libc][math][NFC] Remove global scope constants
 declaration in math tests (#69558)

Clean up usage of `DECLARE_SPECIAL_CONSTANTS` in global scope.
---
 libc/src/__support/FPUtil/FPBits.h          | 38 +++++++++++----------
 libc/test/UnitTest/FPMatcher.h              | 10 ++++++
 libc/test/src/math/acosf_test.cpp           | 10 +++---
 libc/test/src/math/acoshf_test.cpp          | 14 ++++----
 libc/test/src/math/asinf_test.cpp           | 10 +++---
 libc/test/src/math/asinhf_test.cpp          | 14 ++++----
 libc/test/src/math/atanf_test.cpp           | 10 +++---
 libc/test/src/math/atanhf_test.cpp          | 10 +++---
 libc/test/src/math/cos_test.cpp             |  6 ++--
 libc/test/src/math/cosf_test.cpp            | 12 +++----
 libc/test/src/math/coshf_test.cpp           | 12 +++----
 libc/test/src/math/erff_test.cpp            | 10 +++---
 libc/test/src/math/exp10_test.cpp           | 10 +++---
 libc/test/src/math/exp10f_test.cpp          | 14 ++++----
 libc/test/src/math/exp2_test.cpp            | 10 +++---
 libc/test/src/math/exp2f_test.cpp           | 14 ++++----
 libc/test/src/math/exp_test.cpp             | 10 +++---
 libc/test/src/math/expf_test.cpp            | 14 ++++----
 libc/test/src/math/explogxf_test.cpp        | 10 +++---
 libc/test/src/math/expm1_test.cpp           |  8 ++---
 libc/test/src/math/expm1f_test.cpp          | 14 ++++----
 libc/test/src/math/inv_trigf_utils_test.cpp |  8 ++---
 libc/test/src/math/log10_test.cpp           | 12 +++----
 libc/test/src/math/log10f_test.cpp          | 10 +++---
 libc/test/src/math/log1p_test.cpp           | 12 +++----
 libc/test/src/math/log1pf_test.cpp          | 10 +++---
 libc/test/src/math/log2_test.cpp            | 12 +++----
 libc/test/src/math/log2f_test.cpp           | 10 +++---
 libc/test/src/math/log_test.cpp             | 12 +++----
 libc/test/src/math/logf_test.cpp            | 10 +++---
 libc/test/src/math/sin_test.cpp             |  6 ++--
 libc/test/src/math/sincosf_test.cpp         | 13 ++++---
 libc/test/src/math/sinf_test.cpp            | 15 ++++----
 libc/test/src/math/sinhf_test.cpp           | 14 ++++----
 libc/test/src/math/smoke/acosf_test.cpp     |  6 ++--
 libc/test/src/math/smoke/acoshf_test.cpp    |  6 ++--
 libc/test/src/math/smoke/asinf_test.cpp     |  6 ++--
 libc/test/src/math/smoke/asinhf_test.cpp    |  6 ++--
 libc/test/src/math/smoke/atanf_test.cpp     |  6 ++--
 libc/test/src/math/smoke/atanhf_test.cpp    |  6 ++--
 libc/test/src/math/smoke/cosf_test.cpp      |  6 ++--
 libc/test/src/math/smoke/coshf_test.cpp     |  8 ++---
 libc/test/src/math/smoke/erff_test.cpp      |  6 ++--
 libc/test/src/math/smoke/exp10_test.cpp     |  6 ++--
 libc/test/src/math/smoke/exp10f_test.cpp    |  6 ++--
 libc/test/src/math/smoke/exp2_test.cpp      |  6 ++--
 libc/test/src/math/smoke/exp2f_test.cpp     |  6 ++--
 libc/test/src/math/smoke/exp_test.cpp       |  6 ++--
 libc/test/src/math/smoke/expf_test.cpp      |  6 ++--
 libc/test/src/math/smoke/expm1_test.cpp     |  8 ++---
 libc/test/src/math/smoke/expm1f_test.cpp    |  6 ++--
 libc/test/src/math/smoke/log10_test.cpp     |  6 ++--
 libc/test/src/math/smoke/log10f_test.cpp    |  4 +--
 libc/test/src/math/smoke/log1p_test.cpp     |  6 ++--
 libc/test/src/math/smoke/log1pf_test.cpp    |  4 +--
 libc/test/src/math/smoke/log2_test.cpp      |  6 ++--
 libc/test/src/math/smoke/log2f_test.cpp     |  4 +--
 libc/test/src/math/smoke/log_test.cpp       |  6 ++--
 libc/test/src/math/smoke/logf_test.cpp      |  4 +--
 libc/test/src/math/smoke/sincosf_test.cpp   |  6 ++--
 libc/test/src/math/smoke/sinf_test.cpp      |  6 ++--
 libc/test/src/math/smoke/sinhf_test.cpp     | 10 +++---
 libc/test/src/math/smoke/tanf_test.cpp      |  6 ++--
 libc/test/src/math/smoke/tanhf_test.cpp     |  6 ++--
 libc/test/src/math/tan_test.cpp             |  6 ++--
 libc/test/src/math/tanf_test.cpp            | 13 ++++---
 libc/test/src/math/tanhf_test.cpp           | 10 +++---
 67 files changed, 278 insertions(+), 335 deletions(-)

diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
index 8d3b87c70e3c0e1..b423ba41f11c4be 100644
--- a/libc/src/__support/FPUtil/FPBits.h
+++ b/libc/src/__support/FPUtil/FPBits.h
@@ -51,23 +51,23 @@ template <typename T> struct FPBits {
 
   UIntType bits;
 
-  LIBC_INLINE void set_mantissa(UIntType mantVal) {
+  LIBC_INLINE constexpr void set_mantissa(UIntType mantVal) {
     mantVal &= (FloatProp::MANTISSA_MASK);
     bits &= ~(FloatProp::MANTISSA_MASK);
     bits |= mantVal;
   }
 
-  LIBC_INLINE UIntType get_mantissa() const {
+  LIBC_INLINE constexpr UIntType get_mantissa() const {
     return bits & FloatProp::MANTISSA_MASK;
   }
 
-  LIBC_INLINE void set_unbiased_exponent(UIntType expVal) {
+  LIBC_INLINE constexpr void set_unbiased_exponent(UIntType expVal) {
     expVal = (expVal << (FloatProp::MANTISSA_WIDTH)) & FloatProp::EXPONENT_MASK;
     bits &= ~(FloatProp::EXPONENT_MASK);
     bits |= expVal;
   }
 
-  LIBC_INLINE uint16_t get_unbiased_exponent() const {
+  LIBC_INLINE constexpr uint16_t get_unbiased_exponent() const {
     return uint16_t((bits & FloatProp::EXPONENT_MASK) >>
                     (FloatProp::MANTISSA_WIDTH));
   }
@@ -81,13 +81,13 @@ template <typename T> struct FPBits {
            (FloatProp::MANTISSA_MASK & bits);
   }
 
-  LIBC_INLINE void set_sign(bool signVal) {
+  LIBC_INLINE constexpr void set_sign(bool signVal) {
     bits |= FloatProp::SIGN_MASK;
     if (!signVal)
       bits -= FloatProp::SIGN_MASK;
   }
 
-  LIBC_INLINE bool get_sign() const {
+  LIBC_INLINE constexpr bool get_sign() const {
     return (bits & FloatProp::SIGN_MASK) != 0;
   }
 
@@ -118,13 +118,15 @@ template <typename T> struct FPBits {
 
   LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(bits); }
 
-  LIBC_INLINE void set_val(T value) { bits = cpp::bit_cast<UIntType>(value); }
+  LIBC_INLINE constexpr void set_val(T value) {
+    bits = cpp::bit_cast<UIntType>(value);
+  }
 
-  LIBC_INLINE explicit operator T() const { return get_val(); }
+  LIBC_INLINE constexpr explicit operator T() const { return get_val(); }
 
-  LIBC_INLINE UIntType uintval() const { return bits; }
+  LIBC_INLINE constexpr UIntType uintval() const { return bits; }
 
-  LIBC_INLINE int get_exponent() const {
+  LIBC_INLINE constexpr int get_exponent() const {
     return int(get_unbiased_exponent()) - EXPONENT_BIAS;
   }
 
@@ -134,7 +136,7 @@ template <typename T> struct FPBits {
   // values are calculated from the exponent, since just subtracting the bias
   // will give a slightly incorrect result. Additionally, zero has an exponent
   // of zero, and that should actually be treated as zero.
-  LIBC_INLINE int get_explicit_exponent() const {
+  LIBC_INLINE constexpr int get_explicit_exponent() const {
     const int unbiased_exp = int(get_unbiased_exponent());
     if (is_zero()) {
       return 0;
@@ -145,25 +147,25 @@ template <typename T> struct FPBits {
     }
   }
 
-  LIBC_INLINE bool is_zero() const {
+  LIBC_INLINE constexpr bool is_zero() const {
     // Remove sign bit by shift
     return (bits << 1) == 0;
   }
 
-  LIBC_INLINE bool is_inf() const {
+  LIBC_INLINE constexpr bool is_inf() const {
     return (bits & FloatProp::EXP_MANT_MASK) == FloatProp::EXPONENT_MASK;
   }
 
-  LIBC_INLINE bool is_nan() const {
+  LIBC_INLINE constexpr bool is_nan() const {
     return (bits & FloatProp::EXP_MANT_MASK) > FloatProp::EXPONENT_MASK;
   }
 
-  LIBC_INLINE bool is_quiet_nan() const {
+  LIBC_INLINE constexpr bool is_quiet_nan() const {
     return (bits & FloatProp::EXP_MANT_MASK) ==
            (FloatProp::EXPONENT_MASK | FloatProp::QUIET_NAN_MASK);
   }
 
-  LIBC_INLINE bool is_inf_or_nan() const {
+  LIBC_INLINE constexpr bool is_inf_or_nan() const {
     return (bits & FloatProp::EXPONENT_MASK) == FloatProp::EXPONENT_MASK;
   }
 
@@ -226,8 +228,8 @@ template <typename T> struct FPBits {
     return result;
   }
 
-  LIBC_INLINE static FPBits<T> create_value(bool sign, UIntType unbiased_exp,
-                                            UIntType mantissa) {
+  LIBC_INLINE static constexpr FPBits<T>
+  create_value(bool sign, UIntType unbiased_exp, UIntType mantissa) {
     FPBits<T> result;
     result.set_sign(sign);
     result.set_unbiased_exponent(unbiased_exp);
diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h
index fb60916a0402f05..14c8a85ba7ad480 100644
--- a/libc/test/UnitTest/FPMatcher.h
+++ b/libc/test/UnitTest/FPMatcher.h
@@ -59,6 +59,16 @@ template <TestCond C, typename T> FPMatcher<T, C> getMatcher(T expectedValue) {
   return FPMatcher<T, C>(expectedValue);
 }
 
+template <typename T> struct FPTest : public Test {
+  using FPBits = LIBC_NAMESPACE::fputil::FPBits<T>;
+  using UIntType = typename FPBits::UIntType;
+  static constexpr T zero = T(FPBits::zero());
+  static constexpr T neg_zero = T(FPBits::neg_zero());
+  static constexpr T aNaN = T(FPBits::build_quiet_nan(1));
+  static constexpr T inf = T(FPBits::inf());
+  static constexpr T neg_inf = T(FPBits::neg_inf());
+};
+
 } // namespace testing
 } // namespace LIBC_NAMESPACE
 
diff --git a/libc/test/src/math/acosf_test.cpp b/libc/test/src/math/acosf_test.cpp
index cb1a07448b471fc..409cf2bc891332f 100644
--- a/libc/test/src/math/acosf_test.cpp
+++ b/libc/test/src/math/acosf_test.cpp
@@ -17,13 +17,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
-
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+using LlvmLibcAcosfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-TEST(LlvmLibcAcosfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAcosfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acosf(aNaN));
@@ -45,7 +43,7 @@ TEST(LlvmLibcAcosfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-TEST(LlvmLibcAcosfTest, InFloatRange) {
+TEST_F(LlvmLibcAcosfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -57,7 +55,7 @@ TEST(LlvmLibcAcosfTest, InFloatRange) {
   }
 }
 
-TEST(LlvmLibcAcosfTest, SpecificBitPatterns) {
+TEST_F(LlvmLibcAcosfTest, SpecificBitPatterns) {
   constexpr int N = 13;
   constexpr uint32_t INPUTS[N] = {
       0x3f000000, // x = 0.5f
diff --git a/libc/test/src/math/acoshf_test.cpp b/libc/test/src/math/acoshf_test.cpp
index 846f5033fb39a09..fe8d76918d486b2 100644
--- a/libc/test/src/math/acoshf_test.cpp
+++ b/libc/test/src/math/acoshf_test.cpp
@@ -17,13 +17,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits_t = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAcoshfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAcoshfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAcoshfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acoshf(aNaN));
@@ -42,11 +40,11 @@ TEST(LlvmLibcAcoshfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-TEST(LlvmLibcAcoshfTest, InFloatRange) {
+TEST_F(LlvmLibcAcoshfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
-    float x = float(FPBits_t(v));
+    float x = float(FPBits(v));
     if (isnan(x) || isinf(x))
       continue;
     ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Acosh, x,
@@ -54,7 +52,7 @@ TEST(LlvmLibcAcoshfTest, InFloatRange) {
   }
 }
 
-TEST(LlvmLibcAcoshfTest, SpecificBitPatterns) {
+TEST_F(LlvmLibcAcoshfTest, SpecificBitPatterns) {
   constexpr int N = 12;
   constexpr uint32_t INPUTS[N] = {
       0x3f800000, // x = 1.0f
@@ -72,7 +70,7 @@ TEST(LlvmLibcAcoshfTest, SpecificBitPatterns) {
   };
 
   for (int i = 0; i < N; ++i) {
-    float x = float(FPBits_t(INPUTS[i]));
+    float x = float(FPBits(INPUTS[i]));
     EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Acosh, x,
                                    LIBC_NAMESPACE::acoshf(x), 0.5);
   }
diff --git a/libc/test/src/math/asinf_test.cpp b/libc/test/src/math/asinf_test.cpp
index 49dcd38e82bf59d..db9dd0d78404aad 100644
--- a/libc/test/src/math/asinf_test.cpp
+++ b/libc/test/src/math/asinf_test.cpp
@@ -18,13 +18,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAsinfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAsinfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAsinfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::asinf(aNaN));
@@ -43,7 +41,7 @@ TEST(LlvmLibcAsinfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-TEST(LlvmLibcAsinfTest, InFloatRange) {
+TEST_F(LlvmLibcAsinfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -55,7 +53,7 @@ TEST(LlvmLibcAsinfTest, InFloatRange) {
   }
 }
 
-TEST(LlvmLibcAsinfTest, SpecificBitPatterns) {
+TEST_F(LlvmLibcAsinfTest, SpecificBitPatterns) {
   constexpr int N = 11;
   constexpr uint32_t INPUTS[N] = {
       0x3f000000, // x = 0.5f
diff --git a/libc/test/src/math/asinhf_test.cpp b/libc/test/src/math/asinhf_test.cpp
index 0bbd5db031e079f..2afb5b3a9ff8d22 100644
--- a/libc/test/src/math/asinhf_test.cpp
+++ b/libc/test/src/math/asinhf_test.cpp
@@ -17,13 +17,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits_t = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAsinhfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAsinhfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAsinhfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::asinhf(aNaN));
@@ -42,11 +40,11 @@ TEST(LlvmLibcAsinhfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcAsinhfTest, InFloatRange) {
+TEST_F(LlvmLibcAsinhfTest, InFloatRange) {
   constexpr uint32_t COUNT = 1'001;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
-    float x = float(FPBits_t(v));
+    float x = float(FPBits(v));
     if (isnan(x) || isinf(x))
       continue;
     ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Asinh, x,
@@ -56,7 +54,7 @@ TEST(LlvmLibcAsinhfTest, InFloatRange) {
   }
 }
 
-TEST(LlvmLibcAsinhfTest, SpecificBitPatterns) {
+TEST_F(LlvmLibcAsinhfTest, SpecificBitPatterns) {
   constexpr int N = 11;
   constexpr uint32_t INPUTS[N] = {
       0x45abaf26, // |x| = 0x1.575e4cp12f
@@ -73,7 +71,7 @@ TEST(LlvmLibcAsinhfTest, SpecificBitPatterns) {
   };
 
   for (int i = 0; i < N; ++i) {
-    float x = float(FPBits_t(INPUTS[i]));
+    float x = float(FPBits(INPUTS[i]));
     EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Asinh, x,
                                    LIBC_NAMESPACE::asinhf(x), 0.5);
     EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Asinh, -x,
diff --git a/libc/test/src/math/atanf_test.cpp b/libc/test/src/math/atanf_test.cpp
index c7eab7b66872396..cd4f7b89ee282e8 100644
--- a/libc/test/src/math/atanf_test.cpp
+++ b/libc/test/src/math/atanf_test.cpp
@@ -19,13 +19,11 @@
 
 #include <initializer_list>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAtanfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAtanfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAtanfTest, SpecialNumbers) {
   libc_errno = 0;
   LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT);
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::atanf(aNaN));
@@ -43,7 +41,7 @@ TEST(LlvmLibcAtanfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcAtanfTest, InFloatRange) {
+TEST_F(LlvmLibcAtanfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   const uint32_t STEP = FPBits(inf).uintval() / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -56,7 +54,7 @@ TEST(LlvmLibcAtanfTest, InFloatRange) {
 }
 
 // For small values, tanh(x) is x.
-TEST(LlvmLibcAtanfTest, SpecialValues) {
+TEST_F(LlvmLibcAtanfTest, SpecialValues) {
   for (uint32_t v : {0x3d8d6b23U, 0x3feefcfbU, 0xbd8d6b23U, 0xbfeefcfbU,
                      0x7F800000U, 0xFF800000U}) {
     float x = float(FPBits(v));
diff --git a/libc/test/src/math/atanhf_test.cpp b/libc/test/src/math/atanhf_test.cpp
index c1a8d1997187728..f62830ac9d4ef78 100644
--- a/libc/test/src/math/atanhf_test.cpp
+++ b/libc/test/src/math/atanhf_test.cpp
@@ -17,13 +17,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAtanhfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAtanhfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAtanhfTest, SpecialNumbers) {
   libc_errno = 0;
   LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT);
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::atanhf(aNaN));
@@ -85,7 +83,7 @@ TEST(LlvmLibcAtanhfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-TEST(LlvmLibcAtanhfTest, InFloatRange) {
+TEST_F(LlvmLibcAtanhfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   const uint32_t STEP = FPBits(1.0f).uintval() / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -98,7 +96,7 @@ TEST(LlvmLibcAtanhfTest, InFloatRange) {
 }
 
 // For small values, atanh(x) is x.
-TEST(LlvmLibcAtanhfTest, SmallValues) {
+TEST_F(LlvmLibcAtanhfTest, SmallValues) {
   float x = float(FPBits(uint32_t(0x17800000)));
   float result = LIBC_NAMESPACE::atanhf(x);
   EXPECT_MPFR_MATCH(mpfr::Operation::Atanh, x, result, 0.5);
diff --git a/libc/test/src/math/cos_test.cpp b/libc/test/src/math/cos_test.cpp
index 6d8aeaf2230dae9..738d66034f18d21 100644
--- a/libc/test/src/math/cos_test.cpp
+++ b/libc/test/src/math/cos_test.cpp
@@ -13,11 +13,11 @@
 
 #include <math.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcCosTest = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibccosTest, Range) {
+TEST_F(LlvmLibcCosTest, Range) {
   static constexpr double _2pi = 6.283185307179586;
   constexpr UIntType COUNT = 100'000;
   constexpr UIntType STEP = UIntType(-1) / COUNT;
diff --git a/libc/test/src/math/cosf_test.cpp b/libc/test/src/math/cosf_test.cpp
index 7b2fdf9f1b6bffa..5a16520439af07e 100644
--- a/libc/test/src/math/cosf_test.cpp
+++ b/libc/test/src/math/cosf_test.cpp
@@ -19,13 +19,11 @@
 #include <stdint.h>
 
 using LIBC_NAMESPACE::testing::SDCOMP26094_VALUES;
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcCosfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcCosfTest, SpecialNumbers) {
+TEST_F(LlvmLibcCosfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::cosf(aNaN));
@@ -44,7 +42,7 @@ TEST(LlvmLibcCosfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-TEST(LlvmLibcCosfTest, InFloatRange) {
+TEST_F(LlvmLibcCosfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -56,7 +54,7 @@ TEST(LlvmLibcCosfTest, InFloatRange) {
   }
 }
 
-TEST(LlvmLibcCosfTest, SpecificBitPatterns) {
+TEST_F(LlvmLibcCosfTest, SpecificBitPatterns) {
   constexpr int N = 42;
   constexpr uint32_t INPUTS[N] = {
       0x3f06'0a92U, // x = pi/6
@@ -114,7 +112,7 @@ TEST(LlvmLibcCosfTest, SpecificBitPatterns) {
 
 // SDCOMP-26094: check cosf in the cases for which the range reducer
 // returns values furthest beyond its nominal upper bound of pi/4.
-TEST(LlvmLibcCosfTest, SDCOMP_26094) {
+TEST_F(LlvmLibcCosfTest, SDCOMP_26094) {
   for (uint32_t v : SDCOMP26094_VALUES) {
     float x = float(FPBits(v));
     ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, LIBC_NAMESPACE::cosf(x), 0.5);
diff --git a/libc/test/src/math/coshf_test.cpp b/libc/test/src/math/coshf_test.cpp
index f45f35ad3ea0585..797cfec566ac821 100644
--- a/libc/test/src/math/coshf_test.cpp
+++ b/libc/test/src/math/coshf_test.cpp
@@ -18,13 +18,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcCoshfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcCoshfTest, SpecialNumbers) {
+TEST_F(LlvmLibcCoshfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::coshf(aNaN));
@@ -43,7 +41,7 @@ TEST(LlvmLibcCoshfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcCoshfTest, Overflow) {
+TEST_F(LlvmLibcCoshfTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::coshf(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
@@ -58,7 +56,7 @@ TEST(LlvmLibcCoshfTest, Overflow) {
   EXPECT_MATH_ERRNO(ERANGE);
 }
 
-TEST(LlvmLibcCoshfTest, InFloatRange) {
+TEST_F(LlvmLibcCoshfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -69,7 +67,7 @@ TEST(LlvmLibcCoshfTest, InFloatRange) {
   }
 }
 
-TEST(LlvmLibcCoshfTest, SmallValues) {
+TEST_F(LlvmLibcCoshfTest, SmallValues) {
   float x = float(FPBits(0x17800000U));
   float result = LIBC_NAMESPACE::coshf(x);
   EXPECT_MPFR_MATCH(mpfr::Operation::Cosh, x, result, 0.5);
diff --git a/libc/test/src/math/erff_test.cpp b/libc/test/src/math/erff_test.cpp
index 3cb24778d96a151..933f77c0b5a67d2 100644
--- a/libc/test/src/math/erff_test.cpp
+++ b/libc/test/src/math/erff_test.cpp
@@ -16,12 +16,12 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcErffTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcErffTest, SpecialNumbers) {
+TEST_F(LlvmLibcErffTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::erff(aNaN));
   EXPECT_FP_EQ_ALL_ROUNDING(1.0f, LIBC_NAMESPACE::erff(inf));
   EXPECT_FP_EQ_ALL_ROUNDING(-1.0f, LIBC_NAMESPACE::erff(neg_inf));
@@ -29,7 +29,7 @@ TEST(LlvmLibcErffTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::erff(neg_zero));
 }
 
-TEST(LlvmLibcErffTest, TrickyInputs) {
+TEST_F(LlvmLibcErffTest, TrickyInputs) {
   constexpr int N = 2;
   constexpr uint32_t INPUTS[N] = {
       0x3f65'9229U, // |x| = 0x1.cb2452p-1f
@@ -44,7 +44,7 @@ TEST(LlvmLibcErffTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcErffTest, InFloatRange) {
+TEST_F(LlvmLibcErffTest, InFloatRange) {
   constexpr uint32_t COUNT = 234561;
   constexpr uint32_t START = 0;           // 0
   constexpr uint32_t STOP = 0x4080'0000U; // 4.0f
diff --git a/libc/test/src/math/exp10_test.cpp b/libc/test/src/math/exp10_test.cpp
index 86d902eaad306b2..ec3925846dba4b1 100644
--- a/libc/test/src/math/exp10_test.cpp
+++ b/libc/test/src/math/exp10_test.cpp
@@ -17,12 +17,12 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcExp10Test = LIBC_NAMESPACE::testing::FPTest<double>;
+
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcExp10Test, SpecialNumbers) {
+TEST_F(LlvmLibcExp10Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::exp10(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::exp10(inf));
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp10(neg_inf));
@@ -34,7 +34,7 @@ TEST(LlvmLibcExp10Test, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(1.0, LIBC_NAMESPACE::exp10(-0.0));
 }
 
-TEST(LlvmLibcExp10Test, TrickyInputs) {
+TEST_F(LlvmLibcExp10Test, TrickyInputs) {
   constexpr int N = 41;
   constexpr uint64_t INPUTS[N] = {
       0x40033093317082F8, 0x3FD79289C6E6A5C0,
@@ -85,7 +85,7 @@ TEST(LlvmLibcExp10Test, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcExp10Test, InDoubleRange) {
+TEST_F(LlvmLibcExp10Test, InDoubleRange) {
   constexpr uint64_t COUNT = 1'231;
   uint64_t START = LIBC_NAMESPACE::fputil::FPBits<double>(0.25).uintval();
   uint64_t STOP = LIBC_NAMESPACE::fputil::FPBits<double>(4.0).uintval();
diff --git a/libc/test/src/math/exp10f_test.cpp b/libc/test/src/math/exp10f_test.cpp
index ac8f5515347054c..e3151dafa942938 100644
--- a/libc/test/src/math/exp10f_test.cpp
+++ b/libc/test/src/math/exp10f_test.cpp
@@ -16,11 +16,11 @@
 
 #include <stdint.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcExp10fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibcExp10fTest, SpecialNumbers) {
+TEST_F(LlvmLibcExp10fTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::exp10f(aNaN));
@@ -39,7 +39,7 @@ TEST(LlvmLibcExp10fTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcExp10fTest, Overflow) {
+TEST_F(LlvmLibcExp10fTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::exp10f(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
@@ -54,7 +54,7 @@ TEST(LlvmLibcExp10fTest, Overflow) {
   EXPECT_MATH_ERRNO(ERANGE);
 }
 
-TEST(LlvmLibcExp10fTest, Underflow) {
+TEST_F(LlvmLibcExp10fTest, Underflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       0.0f, LIBC_NAMESPACE::exp10f(float(FPBits(0xff7fffffU))), FE_UNDERFLOW);
@@ -71,7 +71,7 @@ TEST(LlvmLibcExp10fTest, Underflow) {
   EXPECT_MATH_ERRNO(ERANGE);
 }
 
-TEST(LlvmLibcExp10fTest, TrickyInputs) {
+TEST_F(LlvmLibcExp10fTest, TrickyInputs) {
   constexpr int N = 20;
   constexpr uint32_t INPUTS[N] = {
       0x325e5bd8, // x = 0x1.bcb7bp-27f
@@ -105,7 +105,7 @@ TEST(LlvmLibcExp10fTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcExp10fTest, InFloatRange) {
+TEST_F(LlvmLibcExp10fTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
diff --git a/libc/test/src/math/exp2_test.cpp b/libc/test/src/math/exp2_test.cpp
index 6a90c9ba911bd3f..539ec7b9368f4c7 100644
--- a/libc/test/src/math/exp2_test.cpp
+++ b/libc/test/src/math/exp2_test.cpp
@@ -17,12 +17,12 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcExp2Test = LIBC_NAMESPACE::testing::FPTest<double>;
+
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcExp2Test, SpecialNumbers) {
+TEST_F(LlvmLibcExp2Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::exp2(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::exp2(inf));
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp2(neg_inf));
@@ -33,7 +33,7 @@ TEST(LlvmLibcExp2Test, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(1.0, LIBC_NAMESPACE::exp2(-0.0));
 }
 
-TEST(LlvmLibcExp2Test, TrickyInputs) {
+TEST_F(LlvmLibcExp2Test, TrickyInputs) {
   constexpr int N = 16;
   constexpr uint64_t INPUTS[N] = {
       0x3FD79289C6E6A5C0,
@@ -60,7 +60,7 @@ TEST(LlvmLibcExp2Test, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcExp2Test, InDoubleRange) {
+TEST_F(LlvmLibcExp2Test, InDoubleRange) {
   constexpr uint64_t COUNT = 1'231;
   uint64_t START = LIBC_NAMESPACE::fputil::FPBits<double>(0.25).uintval();
   uint64_t STOP = LIBC_NAMESPACE::fputil::FPBits<double>(4.0).uintval();
diff --git a/libc/test/src/math/exp2f_test.cpp b/libc/test/src/math/exp2f_test.cpp
index 987e1b9f59cbb7b..4c69c1d48d621c2 100644
--- a/libc/test/src/math/exp2f_test.cpp
+++ b/libc/test/src/math/exp2f_test.cpp
@@ -17,11 +17,11 @@
 
 #include <stdint.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcExp2fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibcExp2fTest, SpecialNumbers) {
+TEST_F(LlvmLibcExp2fTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::exp2f(aNaN));
@@ -40,7 +40,7 @@ TEST(LlvmLibcExp2fTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcExp2fTest, Overflow) {
+TEST_F(LlvmLibcExp2fTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::exp2f(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
@@ -55,7 +55,7 @@ TEST(LlvmLibcExp2fTest, Overflow) {
   EXPECT_MATH_ERRNO(ERANGE);
 }
 
-TEST(LlvmLibcExp2fTest, TrickyInputs) {
+TEST_F(LlvmLibcExp2fTest, TrickyInputs) {
   constexpr int N = 12;
   constexpr uint32_t INPUTS[N] = {
       0x3b429d37U, /*0x1.853a6ep-9f*/
@@ -80,7 +80,7 @@ TEST(LlvmLibcExp2fTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcExp2fTest, Underflow) {
+TEST_F(LlvmLibcExp2fTest, Underflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       0.0f, LIBC_NAMESPACE::exp2f(float(FPBits(0xff7fffffU))), FE_UNDERFLOW);
@@ -102,7 +102,7 @@ TEST(LlvmLibcExp2fTest, Underflow) {
   EXPECT_MATH_ERRNO(ERANGE);
 }
 
-TEST(LlvmLibcExp2fTest, InFloatRange) {
+TEST_F(LlvmLibcExp2fTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
diff --git a/libc/test/src/math/exp_test.cpp b/libc/test/src/math/exp_test.cpp
index 06894bce70699e5..1de2d7507acc303 100644
--- a/libc/test/src/math/exp_test.cpp
+++ b/libc/test/src/math/exp_test.cpp
@@ -17,12 +17,12 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcExpTest = LIBC_NAMESPACE::testing::FPTest<double>;
+
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcExpTest, SpecialNumbers) {
+TEST_F(LlvmLibcExpTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::exp(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::exp(inf));
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp(neg_inf));
@@ -33,7 +33,7 @@ TEST(LlvmLibcExpTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(1.0, LIBC_NAMESPACE::exp(-0.0));
 }
 
-TEST(LlvmLibcExpTest, TrickyInputs) {
+TEST_F(LlvmLibcExpTest, TrickyInputs) {
   constexpr int N = 14;
   constexpr uint64_t INPUTS[N] = {
       0x3FD79289C6E6A5C0,
@@ -58,7 +58,7 @@ TEST(LlvmLibcExpTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcExpTest, InDoubleRange) {
+TEST_F(LlvmLibcExpTest, InDoubleRange) {
   constexpr uint64_t COUNT = 1'231;
   uint64_t START = LIBC_NAMESPACE::fputil::FPBits<double>(0.25).uintval();
   uint64_t STOP = LIBC_NAMESPACE::fputil::FPBits<double>(4.0).uintval();
diff --git a/libc/test/src/math/expf_test.cpp b/libc/test/src/math/expf_test.cpp
index f015e46d50b6b2d..521eba705b69fd0 100644
--- a/libc/test/src/math/expf_test.cpp
+++ b/libc/test/src/math/expf_test.cpp
@@ -16,11 +16,11 @@
 
 #include <stdint.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcExpfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibcExpfTest, SpecialNumbers) {
+TEST_F(LlvmLibcExpfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::expf(aNaN));
@@ -39,7 +39,7 @@ TEST(LlvmLibcExpfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcExpfTest, Overflow) {
+TEST_F(LlvmLibcExpfTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::expf(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
@@ -54,7 +54,7 @@ TEST(LlvmLibcExpfTest, Overflow) {
   EXPECT_MATH_ERRNO(ERANGE);
 }
 
-TEST(LlvmLibcExpfTest, Underflow) {
+TEST_F(LlvmLibcExpfTest, Underflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       0.0f, LIBC_NAMESPACE::expf(float(FPBits(0xff7fffffU))), FE_UNDERFLOW);
@@ -73,7 +73,7 @@ TEST(LlvmLibcExpfTest, Underflow) {
 
 // Test with inputs which are the borders of underflow/overflow but still
 // produce valid results without setting errno.
-TEST(LlvmLibcExpfTest, Borderline) {
+TEST_F(LlvmLibcExpfTest, Borderline) {
   float x;
 
   libc_errno = 0;
@@ -103,7 +103,7 @@ TEST(LlvmLibcExpfTest, Borderline) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcExpfTest, InFloatRange) {
+TEST_F(LlvmLibcExpfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
diff --git a/libc/test/src/math/explogxf_test.cpp b/libc/test/src/math/explogxf_test.cpp
index 7b402e17fbc7f3f..24f9f3c0f8e45e1 100644
--- a/libc/test/src/math/explogxf_test.cpp
+++ b/libc/test/src/math/explogxf_test.cpp
@@ -14,9 +14,9 @@
 #include "utils/MPFRWrapper/MPFRUtils.h"
 #include <math.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcExplogfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
 constexpr int def_count = 100003;
 constexpr float def_prec = 0.500001f;
@@ -25,7 +25,7 @@ auto f_normal = [](float x) -> bool {
   return !(isnan(x) || isinf(x) || fabs(x) < 2E-38);
 };
 
-TEST(LlvmLibcExpxfTest, InFloatRange) {
+TEST_F(LlvmLibcExplogfTest, ExpInFloatRange) {
   auto fx = [](float x) -> float {
     auto result = LIBC_NAMESPACE::exp_b_range_reduc<LIBC_NAMESPACE::ExpBase>(x);
     double r = LIBC_NAMESPACE::ExpBase::powb_lo(result.lo);
@@ -39,12 +39,12 @@ TEST(LlvmLibcExpxfTest, InFloatRange) {
              def_prec);
 }
 
-TEST(LlvmLibcLog2xfTest, InFloatRange) {
+TEST_F(LlvmLibcExplogfTest, Log2InFloatRange) {
   CHECK_DATA(0.0f, inf, mpfr::Operation::Log2, LIBC_NAMESPACE::log2_eval,
              f_normal, def_count, def_prec);
 }
 
-TEST(LlvmLibcLogxfTest, InFloatRange) {
+TEST_F(LlvmLibcExplogfTest, LogInFloatRange) {
   CHECK_DATA(0.0f, inf, mpfr::Operation::Log, LIBC_NAMESPACE::log_eval,
              f_normal, def_count, def_prec);
 }
diff --git a/libc/test/src/math/expm1_test.cpp b/libc/test/src/math/expm1_test.cpp
index 76e715e00c40d4c..ad53ffb6e8af1af 100644
--- a/libc/test/src/math/expm1_test.cpp
+++ b/libc/test/src/math/expm1_test.cpp
@@ -17,12 +17,12 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcExpm1Test = LIBC_NAMESPACE::testing::FPTest<double>;
+
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcExpm1Test, TrickyInputs) {
+TEST_F(LlvmLibcExpm1Test, TrickyInputs) {
   constexpr int N = 21;
   constexpr uint64_t INPUTS[N] = {
       0x3FD79289C6E6A5C0, // x=0x1.79289c6e6a5cp-2
@@ -54,7 +54,7 @@ TEST(LlvmLibcExpm1Test, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcExpm1Test, InDoubleRange) {
+TEST_F(LlvmLibcExpm1Test, InDoubleRange) {
   constexpr uint64_t COUNT = 1'231;
   uint64_t START = LIBC_NAMESPACE::fputil::FPBits<double>(0.25).uintval();
   uint64_t STOP = LIBC_NAMESPACE::fputil::FPBits<double>(4.0).uintval();
diff --git a/libc/test/src/math/expm1f_test.cpp b/libc/test/src/math/expm1f_test.cpp
index f63c9f464c6d7d1..a9eaa4dbe5381a0 100644
--- a/libc/test/src/math/expm1f_test.cpp
+++ b/libc/test/src/math/expm1f_test.cpp
@@ -16,11 +16,11 @@
 
 #include <stdint.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcExpm1fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibcExpm1fTest, SpecialNumbers) {
+TEST_F(LlvmLibcExpm1fTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::expm1f(aNaN));
@@ -39,7 +39,7 @@ TEST(LlvmLibcExpm1fTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcExpm1fTest, Overflow) {
+TEST_F(LlvmLibcExpm1fTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::expm1f(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
@@ -54,7 +54,7 @@ TEST(LlvmLibcExpm1fTest, Overflow) {
   EXPECT_MATH_ERRNO(ERANGE);
 }
 
-TEST(LlvmLibcExpm1fTest, Underflow) {
+TEST_F(LlvmLibcExpm1fTest, Underflow) {
   libc_errno = 0;
   EXPECT_FP_EQ(-1.0f, LIBC_NAMESPACE::expm1f(float(FPBits(0xff7fffffU))));
 
@@ -67,7 +67,7 @@ TEST(LlvmLibcExpm1fTest, Underflow) {
 
 // Test with inputs which are the borders of underflow/overflow but still
 // produce valid results without setting errno.
-TEST(LlvmLibcExpm1fTest, Borderline) {
+TEST_F(LlvmLibcExpm1fTest, Borderline) {
   float x;
 
   libc_errno = 0;
@@ -112,7 +112,7 @@ TEST(LlvmLibcExpm1fTest, Borderline) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcExpm1fTest, InFloatRange) {
+TEST_F(LlvmLibcExpm1fTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
diff --git a/libc/test/src/math/inv_trigf_utils_test.cpp b/libc/test/src/math/inv_trigf_utils_test.cpp
index 4f50027bf82a83a..23420edcd0ca1ab 100644
--- a/libc/test/src/math/inv_trigf_utils_test.cpp
+++ b/libc/test/src/math/inv_trigf_utils_test.cpp
@@ -14,21 +14,21 @@
 #include "utils/MPFRWrapper/MPFRUtils.h"
 #include <math.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcAtanfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
 constexpr int def_count = 100003;
 constexpr float def_prec = 0.500001f;
 
 auto f_normal = [](float x) -> bool { return !(isnan(x) || isinf(x)); };
 
-TEST(LlvmLibcAtanfPosTest, InFloatRange) {
+TEST_F(LlvmLibcAtanfTest, InPositiveRange) {
   CHECK_DATA(0.0f, inf, mpfr::Operation::Atan, LIBC_NAMESPACE::atan_eval,
              f_normal, def_count, def_prec);
 }
 
-TEST(LlvmLibcAtanfNegTest, InFloatRange) {
+TEST_F(LlvmLibcAtanfTest, InNegativeRange) {
   CHECK_DATA(-0.0f, neg_inf, mpfr::Operation::Atan, LIBC_NAMESPACE::atan_eval,
              f_normal, def_count, def_prec);
 }
diff --git a/libc/test/src/math/log10_test.cpp b/libc/test/src/math/log10_test.cpp
index 46458c324673b02..72224c24718005c 100644
--- a/libc/test/src/math/log10_test.cpp
+++ b/libc/test/src/math/log10_test.cpp
@@ -17,12 +17,12 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcLog10Test = LIBC_NAMESPACE::testing::FPTest<double>;
+
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcLog10Test, SpecialNumbers) {
+TEST_F(LlvmLibcLog10Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log10(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log10(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log10(neg_inf), FE_INVALID);
@@ -34,7 +34,7 @@ TEST(LlvmLibcLog10Test, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log10(1.0));
 }
 
-TEST(LlvmLibcLog10Test, TrickyInputs) {
+TEST_F(LlvmLibcLog10Test, TrickyInputs) {
   constexpr int N = 36;
   constexpr uint64_t INPUTS[N] = {
       0x3ff0000000000000, // x = 1.0
@@ -72,7 +72,7 @@ TEST(LlvmLibcLog10Test, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcLog10Test, AllExponents) {
+TEST_F(LlvmLibcLog10Test, AllExponents) {
   double x = 0x1.0p-1074;
   for (int i = -1074; i < 1024; ++i, x *= 2.0) {
     ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log10, x,
@@ -80,7 +80,7 @@ TEST(LlvmLibcLog10Test, AllExponents) {
   }
 }
 
-TEST(LlvmLibcLog10Test, InDoubleRange) {
+TEST_F(LlvmLibcLog10Test, InDoubleRange) {
   constexpr uint64_t COUNT = 1'001;
   constexpr uint64_t START = 0x3FD0'0000'0000'0000ULL; // 0.25
   constexpr uint64_t STOP = 0x4010'0000'0000'0000ULL;  // 4.0
diff --git a/libc/test/src/math/log10f_test.cpp b/libc/test/src/math/log10f_test.cpp
index 05c42b9265ad927..3448ea7570eec67 100644
--- a/libc/test/src/math/log10f_test.cpp
+++ b/libc/test/src/math/log10f_test.cpp
@@ -16,11 +16,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcLog10fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibcLog10fTest, SpecialNumbers) {
+TEST_F(LlvmLibcLog10fTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log10f(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log10f(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log10f(neg_inf), FE_INVALID);
@@ -32,7 +32,7 @@ TEST(LlvmLibcLog10fTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log10f(1.0f));
 }
 
-TEST(LlvmLibcLog10fTest, TrickyInputs) {
+TEST_F(LlvmLibcLog10fTest, TrickyInputs) {
   constexpr int N = 21;
   constexpr uint32_t INPUTS[N] = {
       0x3f800000U /*1.0f*/,
@@ -65,7 +65,7 @@ TEST(LlvmLibcLog10fTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcLog10fTest, InFloatRange) {
+TEST_F(LlvmLibcLog10fTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
diff --git a/libc/test/src/math/log1p_test.cpp b/libc/test/src/math/log1p_test.cpp
index b677e7b416e1a6c..5bec911937dca68 100644
--- a/libc/test/src/math/log1p_test.cpp
+++ b/libc/test/src/math/log1p_test.cpp
@@ -17,12 +17,12 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcLog1pTest = LIBC_NAMESPACE::testing::FPTest<double>;
+
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcLog1pTest, SpecialNumbers) {
+TEST_F(LlvmLibcLog1pTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log1p(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log1p(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log1p(neg_inf), FE_INVALID);
@@ -33,7 +33,7 @@ TEST(LlvmLibcLog1pTest, SpecialNumbers) {
                               FE_DIVBYZERO);
 }
 
-TEST(LlvmLibcLog1pTest, TrickyInputs) {
+TEST_F(LlvmLibcLog1pTest, TrickyInputs) {
   constexpr int N = 41;
   constexpr uint64_t INPUTS[N] = {
       0x3ff0000000000000, // x = 1.0
@@ -73,7 +73,7 @@ TEST(LlvmLibcLog1pTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcLog1pTest, AllExponents) {
+TEST_F(LlvmLibcLog1pTest, AllExponents) {
   double x = 0x1.0p-1074;
   for (int i = -1074; i < 1024; ++i, x *= 2.0) {
     ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log1p, x,
@@ -81,7 +81,7 @@ TEST(LlvmLibcLog1pTest, AllExponents) {
   }
 }
 
-TEST(LlvmLibcLog1pTest, InDoubleRange) {
+TEST_F(LlvmLibcLog1pTest, InDoubleRange) {
   constexpr uint64_t COUNT = 4501;
 
   auto test = [&](uint64_t start, uint64_t stop,
diff --git a/libc/test/src/math/log1pf_test.cpp b/libc/test/src/math/log1pf_test.cpp
index d8132e479e83dba..16cdc4704cb8a35 100644
--- a/libc/test/src/math/log1pf_test.cpp
+++ b/libc/test/src/math/log1pf_test.cpp
@@ -17,11 +17,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcLog1pfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibclog1pfTest, SpecialNumbers) {
+TEST_F(LlvmLibcLog1pfTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log1pf(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log1pf(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log1pf(neg_inf), FE_INVALID);
@@ -31,7 +31,7 @@ TEST(LlvmLibclog1pfTest, SpecialNumbers) {
                               FE_DIVBYZERO);
 }
 
-TEST(LlvmLibclog1pfTest, TrickyInputs) {
+TEST_F(LlvmLibcLog1pfTest, TrickyInputs) {
   constexpr int N = 27;
   constexpr uint32_t INPUTS[N] = {
       0x35c00006U, /*0x1.80000cp-20f*/
@@ -69,7 +69,7 @@ TEST(LlvmLibclog1pfTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibclog1pfTest, InFloatRange) {
+TEST_F(LlvmLibcLog1pfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
diff --git a/libc/test/src/math/log2_test.cpp b/libc/test/src/math/log2_test.cpp
index 42643fa7083fc13..b471b8eb540fe83 100644
--- a/libc/test/src/math/log2_test.cpp
+++ b/libc/test/src/math/log2_test.cpp
@@ -17,12 +17,12 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcLog2Test = LIBC_NAMESPACE::testing::FPTest<double>;
+
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcLog2Test, SpecialNumbers) {
+TEST_F(LlvmLibcLog2Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log2(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log2(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log2(neg_inf), FE_INVALID);
@@ -33,7 +33,7 @@ TEST(LlvmLibcLog2Test, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log2(1.0));
 }
 
-TEST(LlvmLibcLog2Test, TrickyInputs) {
+TEST_F(LlvmLibcLog2Test, TrickyInputs) {
   constexpr int N = 30;
   constexpr uint64_t INPUTS[N] = {
       0x3ff0000000000000, // x = 1.0
@@ -70,7 +70,7 @@ TEST(LlvmLibcLog2Test, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcLog2Test, AllExponents) {
+TEST_F(LlvmLibcLog2Test, AllExponents) {
   double x = 0x1.0p-1074;
   for (int i = -1074; i < 1024; ++i, x *= 2.0) {
     ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log2, x,
@@ -78,7 +78,7 @@ TEST(LlvmLibcLog2Test, AllExponents) {
   }
 }
 
-TEST(LlvmLibcLog2Test, InDoubleRange) {
+TEST_F(LlvmLibcLog2Test, InDoubleRange) {
   constexpr uint64_t COUNT = 1'001;
   constexpr uint64_t START = 0x3FD0'0000'0000'0000ULL; // 0.25
   constexpr uint64_t STOP = 0x4010'0000'0000'0000ULL;  // 4.0
diff --git a/libc/test/src/math/log2f_test.cpp b/libc/test/src/math/log2f_test.cpp
index d4f4b937c955437..aaa6320fb26b131 100644
--- a/libc/test/src/math/log2f_test.cpp
+++ b/libc/test/src/math/log2f_test.cpp
@@ -16,11 +16,11 @@
 
 #include <stdint.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcLog2fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibcLog2fTest, SpecialNumbers) {
+TEST_F(LlvmLibcLog2fTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log2f(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log2f(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log2f(neg_inf), FE_INVALID);
@@ -32,7 +32,7 @@ TEST(LlvmLibcLog2fTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log2f(1.0f));
 }
 
-TEST(LlvmLibcLog2fTest, TrickyInputs) {
+TEST_F(LlvmLibcLog2fTest, TrickyInputs) {
   constexpr int N = 10;
   constexpr uint32_t INPUTS[N] = {
       0x3f7d57f5U, 0x3f7e3274U, 0x3f7ed848U, 0x3f7fd6ccU, 0x3f7fffffU,
@@ -45,7 +45,7 @@ TEST(LlvmLibcLog2fTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcLog2fTest, InFloatRange) {
+TEST_F(LlvmLibcLog2fTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
diff --git a/libc/test/src/math/log_test.cpp b/libc/test/src/math/log_test.cpp
index caa274828fd66ac..e3f41e1e264a31e 100644
--- a/libc/test/src/math/log_test.cpp
+++ b/libc/test/src/math/log_test.cpp
@@ -17,12 +17,12 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcLogTest = LIBC_NAMESPACE::testing::FPTest<double>;
+
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 using LIBC_NAMESPACE::testing::tlog;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcLogTest, SpecialNumbers) {
+TEST_F(LlvmLibcLogTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log(neg_inf), FE_INVALID);
@@ -32,7 +32,7 @@ TEST(LlvmLibcLogTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log(1.0));
 }
 
-TEST(LlvmLibcLogTest, TrickyInputs) {
+TEST_F(LlvmLibcLogTest, TrickyInputs) {
   constexpr int N = 30;
   constexpr uint64_t INPUTS[N] = {
       0x3ff0000000000000, // x = 1.0
@@ -69,7 +69,7 @@ TEST(LlvmLibcLogTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcLogTest, AllExponents) {
+TEST_F(LlvmLibcLogTest, AllExponents) {
   double x = 0x1.0p-1074;
   for (int i = -1074; i < 1024; ++i, x *= 2.0) {
     ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log, x,
@@ -77,7 +77,7 @@ TEST(LlvmLibcLogTest, AllExponents) {
   }
 }
 
-TEST(LlvmLibcLogTest, InDoubleRange) {
+TEST_F(LlvmLibcLogTest, InDoubleRange) {
   constexpr uint64_t COUNT = 234561;
   constexpr uint64_t START = 0x3FD0'0000'0000'0000ULL; // 0.25
   constexpr uint64_t STOP = 0x4010'0000'0000'0000ULL;  // 4.0
diff --git a/libc/test/src/math/logf_test.cpp b/libc/test/src/math/logf_test.cpp
index 832bdd21edd417a..c02e95cb6f80041 100644
--- a/libc/test/src/math/logf_test.cpp
+++ b/libc/test/src/math/logf_test.cpp
@@ -15,11 +15,11 @@
 
 #include <stdint.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcLogfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibcLogfTest, SpecialNumbers) {
+TEST_F(LlvmLibcLogfTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::logf(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::logf(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::logf(neg_inf), FE_INVALID);
@@ -31,7 +31,7 @@ TEST(LlvmLibcLogfTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::logf(1.0f));
 }
 
-TEST(LlvmLibcLogfTest, TrickyInputs) {
+TEST_F(LlvmLibcLogfTest, TrickyInputs) {
   constexpr int N = 35;
   constexpr uint32_t INPUTS[N] = {
       0x1b7679ffU, /*0x1.ecf3fep-73f*/
@@ -77,7 +77,7 @@ TEST(LlvmLibcLogfTest, TrickyInputs) {
   }
 }
 
-TEST(LlvmLibcLogfTest, InFloatRange) {
+TEST_F(LlvmLibcLogfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
diff --git a/libc/test/src/math/sin_test.cpp b/libc/test/src/math/sin_test.cpp
index ee8451f5008106a..b18219577a79257 100644
--- a/libc/test/src/math/sin_test.cpp
+++ b/libc/test/src/math/sin_test.cpp
@@ -14,11 +14,11 @@
 
 #include <math.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcSinTest = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibcSinTest, Range) {
+TEST_F(LlvmLibcSinTest, Range) {
   static constexpr double _2pi = 6.283185307179586;
   constexpr UIntType COUNT = 100'000;
   constexpr UIntType STEP = UIntType(-1) / COUNT;
diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp
index f67fba1d31dfcdb..fde707f892a9e28 100644
--- a/libc/test/src/math/sincosf_test.cpp
+++ b/libc/test/src/math/sincosf_test.cpp
@@ -18,14 +18,13 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcSinCosfTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
 using LIBC_NAMESPACE::testing::SDCOMP26094_VALUES;
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcSinCosfTest, SpecialNumbers) {
+TEST_F(LlvmLibcSinCosfTest, SpecialNumbers) {
   libc_errno = 0;
   float sin, cos;
 
@@ -97,7 +96,7 @@ TEST(LlvmLibcSinCosfTest, SpecialNumbers) {
     }                                                                          \
   }
 
-TEST(LlvmLibcSinCosfTest, InFloatRange) {
+TEST_F(LlvmLibcSinCosfTest, InFloatRange) {
   constexpr uint32_t COUNT = 1'001;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -111,7 +110,7 @@ TEST(LlvmLibcSinCosfTest, InFloatRange) {
 }
 
 // For hard to round inputs.
-TEST(LlvmLibcSinCosfTest, SpecialValues) {
+TEST_F(LlvmLibcSinCosfTest, SpecialValues) {
   constexpr int N = 43;
   constexpr uint32_t INPUTS[N] = {
       0x3b56'37f5U, // x = 0x1.ac6feap-9f
@@ -168,7 +167,7 @@ TEST(LlvmLibcSinCosfTest, SpecialValues) {
 
 // SDCOMP-26094: check sinf in the cases for which the range reducer
 // returns values furthest beyond its nominal upper bound of pi/4.
-TEST(LlvmLibcSinCosfTest, SDCOMP_26094) {
+TEST_F(LlvmLibcSinCosfTest, SDCOMP_26094) {
   for (uint32_t v : SDCOMP26094_VALUES) {
     float x = float(FPBits((v)));
     EXPECT_SINCOS_MATCH_ALL_ROUNDING(x);
diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp
index 24251104421269c..107b015512990bd 100644
--- a/libc/test/src/math/sinf_test.cpp
+++ b/libc/test/src/math/sinf_test.cpp
@@ -18,14 +18,13 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcSinfTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
 using LIBC_NAMESPACE::testing::SDCOMP26094_VALUES;
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcSinfTest, SpecialNumbers) {
+TEST_F(LlvmLibcSinfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinf(aNaN));
@@ -44,7 +43,7 @@ TEST(LlvmLibcSinfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-TEST(LlvmLibcSinfTest, InFloatRange) {
+TEST_F(LlvmLibcSinfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -56,7 +55,7 @@ TEST(LlvmLibcSinfTest, InFloatRange) {
   }
 }
 
-TEST(LlvmLibcSinfTest, SpecificBitPatterns) {
+TEST_F(LlvmLibcSinfTest, SpecificBitPatterns) {
   constexpr int N = 36;
   constexpr uint32_t INPUTS[N] = {
       0x3f06'0a92U, // x = pi/6
@@ -107,7 +106,7 @@ TEST(LlvmLibcSinfTest, SpecificBitPatterns) {
 }
 
 // For small values, sin(x) is x.
-TEST(LlvmLibcSinfTest, SmallValues) {
+TEST_F(LlvmLibcSinfTest, SmallValues) {
   float x = float(FPBits(0x1780'0000U));
   EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x,
                                  LIBC_NAMESPACE::sinf(x), 0.5);
@@ -119,7 +118,7 @@ TEST(LlvmLibcSinfTest, SmallValues) {
 
 // SDCOMP-26094: check sinf in the cases for which the range reducer
 // returns values furthest beyond its nominal upper bound of pi/4.
-TEST(LlvmLibcSinfTest, SDCOMP_26094) {
+TEST_F(LlvmLibcSinfTest, SDCOMP_26094) {
   for (uint32_t v : SDCOMP26094_VALUES) {
     float x = float(FPBits((v)));
     EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x,
diff --git a/libc/test/src/math/sinhf_test.cpp b/libc/test/src/math/sinhf_test.cpp
index 92284ca7d0988df..dc80a1b7aedf983 100644
--- a/libc/test/src/math/sinhf_test.cpp
+++ b/libc/test/src/math/sinhf_test.cpp
@@ -18,13 +18,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcSinhfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcSinhfTest, SpecialNumbers) {
+TEST_F(LlvmLibcSinhfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinhf(aNaN));
@@ -43,7 +41,7 @@ TEST(LlvmLibcSinhfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcSinhfTest, InFloatRange) {
+TEST_F(LlvmLibcSinhfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -55,7 +53,7 @@ TEST(LlvmLibcSinhfTest, InFloatRange) {
 }
 
 // For small values, sinh(x) is x.
-TEST(LlvmLibcSinhfTest, SmallValues) {
+TEST_F(LlvmLibcSinhfTest, SmallValues) {
   float x = float(FPBits(uint32_t(0x17800000)));
   float result = LIBC_NAMESPACE::sinhf(x);
   EXPECT_MPFR_MATCH(mpfr::Operation::Sinh, x, result, 0.5);
@@ -67,7 +65,7 @@ TEST(LlvmLibcSinhfTest, SmallValues) {
   EXPECT_FP_EQ(x, result);
 }
 
-TEST(LlvmLibcSinhfTest, Overflow) {
+TEST_F(LlvmLibcSinhfTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::sinhf(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
@@ -82,7 +80,7 @@ TEST(LlvmLibcSinhfTest, Overflow) {
   EXPECT_MATH_ERRNO(ERANGE);
 }
 
-TEST(LlvmLibcSinhfTest, ExceptionalValues) {
+TEST_F(LlvmLibcSinhfTest, ExceptionalValues) {
   float x = float(FPBits(uint32_t(0x3a12'85ffU)));
   EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinh, x,
                                  LIBC_NAMESPACE::sinhf(x), 0.5);
diff --git a/libc/test/src/math/smoke/acosf_test.cpp b/libc/test/src/math/smoke/acosf_test.cpp
index b8ba3d5f85cee7f..2d8fba96718c5c7 100644
--- a/libc/test/src/math/smoke/acosf_test.cpp
+++ b/libc/test/src/math/smoke/acosf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAcosfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAcosfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAcosfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acosf(aNaN));
diff --git a/libc/test/src/math/smoke/acoshf_test.cpp b/libc/test/src/math/smoke/acoshf_test.cpp
index b8d7453bfdb212a..85c24e08ab79abd 100644
--- a/libc/test/src/math/smoke/acoshf_test.cpp
+++ b/libc/test/src/math/smoke/acoshf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits_t = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAcoshfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAcoshfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAcoshfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acoshf(aNaN));
diff --git a/libc/test/src/math/smoke/asinf_test.cpp b/libc/test/src/math/smoke/asinf_test.cpp
index e2eca352e1d8824..57bb7dcf6e84110 100644
--- a/libc/test/src/math/smoke/asinf_test.cpp
+++ b/libc/test/src/math/smoke/asinf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAsinfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAsinfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAsinfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::asinf(aNaN));
diff --git a/libc/test/src/math/smoke/asinhf_test.cpp b/libc/test/src/math/smoke/asinhf_test.cpp
index f10cf934c533af6..0c6ac537ef1c817 100644
--- a/libc/test/src/math/smoke/asinhf_test.cpp
+++ b/libc/test/src/math/smoke/asinhf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits_t = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAsinhfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAsinhfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAsinhfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::asinhf(aNaN));
diff --git a/libc/test/src/math/smoke/atanf_test.cpp b/libc/test/src/math/smoke/atanf_test.cpp
index 6e6f854002cc500..156ce0744b0e963 100644
--- a/libc/test/src/math/smoke/atanf_test.cpp
+++ b/libc/test/src/math/smoke/atanf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAtanfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAtanfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAtanfTest, SpecialNumbers) {
   libc_errno = 0;
 
   LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT);
diff --git a/libc/test/src/math/smoke/atanhf_test.cpp b/libc/test/src/math/smoke/atanhf_test.cpp
index 841cb884c37fe5e..0f7dc0d30634904 100644
--- a/libc/test/src/math/smoke/atanhf_test.cpp
+++ b/libc/test/src/math/smoke/atanhf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcAtanhfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcAtanhfTest, SpecialNumbers) {
+TEST_F(LlvmLibcAtanhfTest, SpecialNumbers) {
   libc_errno = 0;
 
   LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT);
diff --git a/libc/test/src/math/smoke/cosf_test.cpp b/libc/test/src/math/smoke/cosf_test.cpp
index 6a2c6a5b2529955..c9efc791885f014 100644
--- a/libc/test/src/math/smoke/cosf_test.cpp
+++ b/libc/test/src/math/smoke/cosf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcCosfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcCosfTest, SpecialNumbers) {
+TEST_F(LlvmLibcCosfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::cosf(aNaN));
diff --git a/libc/test/src/math/smoke/coshf_test.cpp b/libc/test/src/math/smoke/coshf_test.cpp
index dd08f1924abc38c..93fdd126896cf8b 100644
--- a/libc/test/src/math/smoke/coshf_test.cpp
+++ b/libc/test/src/math/smoke/coshf_test.cpp
@@ -17,11 +17,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcCoshfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcCoshfTest, SpecialNumbers) {
+TEST_F(LlvmLibcCoshfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::coshf(aNaN));
@@ -40,7 +38,7 @@ TEST(LlvmLibcCoshfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcCoshfTest, Overflow) {
+TEST_F(LlvmLibcCoshfTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::coshf(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
diff --git a/libc/test/src/math/smoke/erff_test.cpp b/libc/test/src/math/smoke/erff_test.cpp
index 9c9105167b8c711..843cc7f4c8c04de 100644
--- a/libc/test/src/math/smoke/erff_test.cpp
+++ b/libc/test/src/math/smoke/erff_test.cpp
@@ -15,11 +15,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using LIBC_NAMESPACE::testing::tlog;
+using LlvmLibcErffTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcErffTest, SpecialNumbers) {
+TEST_F(LlvmLibcErffTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::erff(aNaN));
   EXPECT_FP_EQ_ALL_ROUNDING(1.0f, LIBC_NAMESPACE::erff(inf));
   EXPECT_FP_EQ_ALL_ROUNDING(-1.0f, LIBC_NAMESPACE::erff(neg_inf));
diff --git a/libc/test/src/math/smoke/exp10_test.cpp b/libc/test/src/math/smoke/exp10_test.cpp
index cf699bab6ebeeef..daa7b5b48842bce 100644
--- a/libc/test/src/math/smoke/exp10_test.cpp
+++ b/libc/test/src/math/smoke/exp10_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using LIBC_NAMESPACE::testing::tlog;
+using LlvmLibcExp10Test = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcExp10Test, SpecialNumbers) {
+TEST_F(LlvmLibcExp10Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::exp10(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::exp10(inf));
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp10(neg_inf));
diff --git a/libc/test/src/math/smoke/exp10f_test.cpp b/libc/test/src/math/smoke/exp10f_test.cpp
index c4d3cb6002df97b..8b758dd3f62f995 100644
--- a/libc/test/src/math/smoke/exp10f_test.cpp
+++ b/libc/test/src/math/smoke/exp10f_test.cpp
@@ -15,9 +15,9 @@
 
 #include <stdint.h>
 
-DECLARE_SPECIAL_CONSTANTS(float)
+using LlvmLibcExp10fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-TEST(LlvmLibcExp10fTest, SpecialNumbers) {
+TEST_F(LlvmLibcExp10fTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::exp10f(aNaN));
@@ -40,7 +40,7 @@ TEST(LlvmLibcExp10fTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(1000.0f, LIBC_NAMESPACE::exp10f(3.0f));
 }
 
-TEST(LlvmLibcExp10fTest, Overflow) {
+TEST_F(LlvmLibcExp10fTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::exp10f(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
diff --git a/libc/test/src/math/smoke/exp2_test.cpp b/libc/test/src/math/smoke/exp2_test.cpp
index ec076c15fed019a..7bcc2a3cb6c7424 100644
--- a/libc/test/src/math/smoke/exp2_test.cpp
+++ b/libc/test/src/math/smoke/exp2_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using LIBC_NAMESPACE::testing::tlog;
+using LlvmLibcExp2Test = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcExp2Test, SpecialNumbers) {
+TEST_F(LlvmLibcExp2Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::exp2(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::exp2(inf));
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp2(neg_inf));
diff --git a/libc/test/src/math/smoke/exp2f_test.cpp b/libc/test/src/math/smoke/exp2f_test.cpp
index f26f155cc870a5d..6623d449aeebdf7 100644
--- a/libc/test/src/math/smoke/exp2f_test.cpp
+++ b/libc/test/src/math/smoke/exp2f_test.cpp
@@ -16,9 +16,9 @@
 
 #include <stdint.h>
 
-DECLARE_SPECIAL_CONSTANTS(float)
+using LlvmLibcExp2fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-TEST(LlvmLibcExp2fTest, SpecialNumbers) {
+TEST_F(LlvmLibcExp2fTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::exp2f(aNaN));
@@ -42,7 +42,7 @@ TEST(LlvmLibcExp2fTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(0.25f, LIBC_NAMESPACE::exp2f(-2.0f));
 }
 
-TEST(LlvmLibcExp2fTest, Overflow) {
+TEST_F(LlvmLibcExp2fTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::exp2f(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
diff --git a/libc/test/src/math/smoke/exp_test.cpp b/libc/test/src/math/smoke/exp_test.cpp
index 7973c37257c53d8..455cb2ea1353289 100644
--- a/libc/test/src/math/smoke/exp_test.cpp
+++ b/libc/test/src/math/smoke/exp_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using LIBC_NAMESPACE::testing::tlog;
+using LlvmLibcExpTest = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcExpTest, SpecialNumbers) {
+TEST_F(LlvmLibcExpTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::exp(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::exp(inf));
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp(neg_inf));
diff --git a/libc/test/src/math/smoke/expf_test.cpp b/libc/test/src/math/smoke/expf_test.cpp
index e5c5afbd182238d..ff33e95f90427f1 100644
--- a/libc/test/src/math/smoke/expf_test.cpp
+++ b/libc/test/src/math/smoke/expf_test.cpp
@@ -15,9 +15,9 @@
 
 #include <stdint.h>
 
-DECLARE_SPECIAL_CONSTANTS(float)
+using LlvmLibcExpfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-TEST(LlvmLibcExpfTest, SpecialNumbers) {
+TEST_F(LlvmLibcExpfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::expf(aNaN));
@@ -36,7 +36,7 @@ TEST(LlvmLibcExpfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcExpfTest, Overflow) {
+TEST_F(LlvmLibcExpfTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::expf(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
diff --git a/libc/test/src/math/smoke/expm1_test.cpp b/libc/test/src/math/smoke/expm1_test.cpp
index 8bb4a11ed67fd36..4581060075e32c9 100644
--- a/libc/test/src/math/smoke/expm1_test.cpp
+++ b/libc/test/src/math/smoke/expm1_test.cpp
@@ -11,18 +11,14 @@
 #include "src/math/expm1.h"
 #include "test/UnitTest/FPMatcher.h"
 #include "test/UnitTest/Test.h"
-#include "utils/MPFRWrapper/MPFRUtils.h"
 #include <math.h>
 
 #include <errno.h>
 #include <stdint.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
-using LIBC_NAMESPACE::testing::tlog;
+using LlvmLibcExpm1Test = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcExpm1Test, SpecialNumbers) {
+TEST_F(LlvmLibcExpm1Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::expm1(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::expm1(inf));
   EXPECT_FP_EQ_ALL_ROUNDING(-1.0, LIBC_NAMESPACE::expm1(neg_inf));
diff --git a/libc/test/src/math/smoke/expm1f_test.cpp b/libc/test/src/math/smoke/expm1f_test.cpp
index 03fe1413e95e12f..6999a987f9d6a68 100644
--- a/libc/test/src/math/smoke/expm1f_test.cpp
+++ b/libc/test/src/math/smoke/expm1f_test.cpp
@@ -15,9 +15,9 @@
 
 #include <stdint.h>
 
-DECLARE_SPECIAL_CONSTANTS(float)
+using LlvmLibcExpm1fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-TEST(LlvmLibcExpm1fTest, SpecialNumbers) {
+TEST_F(LlvmLibcExpm1fTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::expm1f(aNaN));
@@ -36,7 +36,7 @@ TEST(LlvmLibcExpm1fTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcExpm1fTest, Overflow) {
+TEST_F(LlvmLibcExpm1fTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::expm1f(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
diff --git a/libc/test/src/math/smoke/log10_test.cpp b/libc/test/src/math/smoke/log10_test.cpp
index be4900a6a4dd6be..c1658a3d5c9653c 100644
--- a/libc/test/src/math/smoke/log10_test.cpp
+++ b/libc/test/src/math/smoke/log10_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using LIBC_NAMESPACE::testing::tlog;
+using LlvmLibcLog10Test = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcLog10Test, SpecialNumbers) {
+TEST_F(LlvmLibcLog10Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log10(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log10(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log10(neg_inf), FE_INVALID);
diff --git a/libc/test/src/math/smoke/log10f_test.cpp b/libc/test/src/math/smoke/log10f_test.cpp
index 2369ab0458847a2..53950233cabf5ba 100644
--- a/libc/test/src/math/smoke/log10f_test.cpp
+++ b/libc/test/src/math/smoke/log10f_test.cpp
@@ -15,9 +15,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-DECLARE_SPECIAL_CONSTANTS(float)
+using LlvmLibcLog10fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-TEST(LlvmLibcLog10fTest, SpecialNumbers) {
+TEST_F(LlvmLibcLog10fTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log10f(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log10f(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log10f(neg_inf), FE_INVALID);
diff --git a/libc/test/src/math/smoke/log1p_test.cpp b/libc/test/src/math/smoke/log1p_test.cpp
index 9cb0cf17ff797de..e1966c28397c086 100644
--- a/libc/test/src/math/smoke/log1p_test.cpp
+++ b/libc/test/src/math/smoke/log1p_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using LIBC_NAMESPACE::testing::tlog;
+using LlvmLibcLog1pTest = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcLog1pTest, SpecialNumbers) {
+TEST_F(LlvmLibcLog1pTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log1p(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log1p(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log1p(neg_inf), FE_INVALID);
diff --git a/libc/test/src/math/smoke/log1pf_test.cpp b/libc/test/src/math/smoke/log1pf_test.cpp
index 983601024371508..377a46adef9c792 100644
--- a/libc/test/src/math/smoke/log1pf_test.cpp
+++ b/libc/test/src/math/smoke/log1pf_test.cpp
@@ -16,9 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-DECLARE_SPECIAL_CONSTANTS(float)
+using LlvmLibcLog1pfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-TEST(LlvmLibclog1pfTest, SpecialNumbers) {
+TEST_F(LlvmLibcLog1pfTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log1pf(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log1pf(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log1pf(neg_inf), FE_INVALID);
diff --git a/libc/test/src/math/smoke/log2_test.cpp b/libc/test/src/math/smoke/log2_test.cpp
index dab8929115d7260..826c51eb8c1b091 100644
--- a/libc/test/src/math/smoke/log2_test.cpp
+++ b/libc/test/src/math/smoke/log2_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using LIBC_NAMESPACE::testing::tlog;
+using LlvmLibcLog2Test = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcLog2Test, SpecialNumbers) {
+TEST_F(LlvmLibcLog2Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log2(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log2(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log2(neg_inf), FE_INVALID);
diff --git a/libc/test/src/math/smoke/log2f_test.cpp b/libc/test/src/math/smoke/log2f_test.cpp
index d388230a801591e..2387ff59d70d9e6 100644
--- a/libc/test/src/math/smoke/log2f_test.cpp
+++ b/libc/test/src/math/smoke/log2f_test.cpp
@@ -15,9 +15,9 @@
 
 #include <stdint.h>
 
-DECLARE_SPECIAL_CONSTANTS(float)
+using LlvmLibcLog2fTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-TEST(LlvmLibcLog2fTest, SpecialNumbers) {
+TEST_F(LlvmLibcLog2fTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log2f(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log2f(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log2f(neg_inf), FE_INVALID);
diff --git a/libc/test/src/math/smoke/log_test.cpp b/libc/test/src/math/smoke/log_test.cpp
index 34d608a81a14e0b..423b9d8bb818a79 100644
--- a/libc/test/src/math/smoke/log_test.cpp
+++ b/libc/test/src/math/smoke/log_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using LIBC_NAMESPACE::testing::tlog;
+using LlvmLibcLogTest = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
-
-TEST(LlvmLibcLogTest, SpecialNumbers) {
+TEST_F(LlvmLibcLogTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::log(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::log(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log(neg_inf), FE_INVALID);
diff --git a/libc/test/src/math/smoke/logf_test.cpp b/libc/test/src/math/smoke/logf_test.cpp
index 0edaa8da556e282..21b39ee6896dd25 100644
--- a/libc/test/src/math/smoke/logf_test.cpp
+++ b/libc/test/src/math/smoke/logf_test.cpp
@@ -14,9 +14,9 @@
 
 #include <stdint.h>
 
-DECLARE_SPECIAL_CONSTANTS(float)
+using LlvmLibcLogfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-TEST(LlvmLibcLogfTest, SpecialNumbers) {
+TEST_F(LlvmLibcLogfTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::logf(aNaN));
   EXPECT_FP_EQ(inf, LIBC_NAMESPACE::logf(inf));
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::logf(neg_inf), FE_INVALID);
diff --git a/libc/test/src/math/smoke/sincosf_test.cpp b/libc/test/src/math/smoke/sincosf_test.cpp
index 4e2b1c69e41877e..0de47f3307fe6f9 100644
--- a/libc/test/src/math/smoke/sincosf_test.cpp
+++ b/libc/test/src/math/smoke/sincosf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcSinCosfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcSinCosfTest, SpecialNumbers) {
+TEST_F(LlvmLibcSinCosfTest, SpecialNumbers) {
   libc_errno = 0;
   float sin, cos;
 
diff --git a/libc/test/src/math/smoke/sinf_test.cpp b/libc/test/src/math/smoke/sinf_test.cpp
index b93011bbdc2baf7..bbd7634e0028ab9 100644
--- a/libc/test/src/math/smoke/sinf_test.cpp
+++ b/libc/test/src/math/smoke/sinf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcSinfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcSinfTest, SpecialNumbers) {
+TEST_F(LlvmLibcSinfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinf(aNaN));
diff --git a/libc/test/src/math/smoke/sinhf_test.cpp b/libc/test/src/math/smoke/sinhf_test.cpp
index b2b6bddf91c2a58..aa11ed9cbe10502 100644
--- a/libc/test/src/math/smoke/sinhf_test.cpp
+++ b/libc/test/src/math/smoke/sinhf_test.cpp
@@ -17,11 +17,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcSinhfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcSinhfTest, SpecialNumbers) {
+TEST_F(LlvmLibcSinhfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinhf(aNaN));
@@ -41,7 +39,7 @@ TEST(LlvmLibcSinhfTest, SpecialNumbers) {
 }
 
 // For small values, sinh(x) is x.
-TEST(LlvmLibcSinhfTest, SmallValues) {
+TEST_F(LlvmLibcSinhfTest, SmallValues) {
   float x = float(FPBits(uint32_t(0x17800000)));
   float result = LIBC_NAMESPACE::sinhf(x);
   EXPECT_FP_EQ(x, result);
@@ -51,7 +49,7 @@ TEST(LlvmLibcSinhfTest, SmallValues) {
   EXPECT_FP_EQ(x, result);
 }
 
-TEST(LlvmLibcSinhfTest, Overflow) {
+TEST_F(LlvmLibcSinhfTest, Overflow) {
   libc_errno = 0;
   EXPECT_FP_EQ_WITH_EXCEPTION(
       inf, LIBC_NAMESPACE::sinhf(float(FPBits(0x7f7fffffU))), FE_OVERFLOW);
diff --git a/libc/test/src/math/smoke/tanf_test.cpp b/libc/test/src/math/smoke/tanf_test.cpp
index 60444b3ad15cc0e..fa93da29829a4c8 100644
--- a/libc/test/src/math/smoke/tanf_test.cpp
+++ b/libc/test/src/math/smoke/tanf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcTanfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcTanfTest, SpecialNumbers) {
+TEST_F(LlvmLibcTanfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::tanf(aNaN));
diff --git a/libc/test/src/math/smoke/tanhf_test.cpp b/libc/test/src/math/smoke/tanhf_test.cpp
index db20be724b53f4a..f32e8cc9591f98c 100644
--- a/libc/test/src/math/smoke/tanhf_test.cpp
+++ b/libc/test/src/math/smoke/tanhf_test.cpp
@@ -16,11 +16,9 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcTanhfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcTanhfTest, SpecialNumbers) {
+TEST_F(LlvmLibcTanhfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::tanhf(aNaN));
diff --git a/libc/test/src/math/tan_test.cpp b/libc/test/src/math/tan_test.cpp
index a6f7dcb4329303e..b35658cc50f8a84 100644
--- a/libc/test/src/math/tan_test.cpp
+++ b/libc/test/src/math/tan_test.cpp
@@ -13,11 +13,11 @@
 
 #include <math.h>
 
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+using LlvmLibcTanTest = LIBC_NAMESPACE::testing::FPTest<double>;
 
-DECLARE_SPECIAL_CONSTANTS(double)
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-TEST(LlvmLibctanTest, Range) {
+TEST_F(LlvmLibcTanTest, Range) {
   static constexpr double _2pi = 6.283185307179586;
   constexpr UIntType COUNT = 100'000;
   constexpr UIntType STEP = UIntType(-1) / COUNT;
diff --git a/libc/test/src/math/tanf_test.cpp b/libc/test/src/math/tanf_test.cpp
index 3c3694a5eb68475..6005202e754b1bc 100644
--- a/libc/test/src/math/tanf_test.cpp
+++ b/libc/test/src/math/tanf_test.cpp
@@ -18,14 +18,13 @@
 #include <errno.h>
 #include <stdint.h>
 
+using LlvmLibcTanfTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
 using LIBC_NAMESPACE::testing::SDCOMP26094_VALUES;
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcTanfTest, SpecialNumbers) {
+TEST_F(LlvmLibcTanfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::tanf(aNaN));
@@ -44,7 +43,7 @@ TEST(LlvmLibcTanfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-TEST(LlvmLibcTanfTest, InFloatRange) {
+TEST_F(LlvmLibcTanfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'000;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -56,7 +55,7 @@ TEST(LlvmLibcTanfTest, InFloatRange) {
   }
 }
 
-TEST(LlvmLibcTanfTest, SpecificBitPatterns) {
+TEST_F(LlvmLibcTanfTest, SpecificBitPatterns) {
   constexpr int N = 54;
   constexpr uint32_t INPUTS[N] = {
       0x3a7a'8d2fU, // x = 0x1.f51a5ep-11f
@@ -126,7 +125,7 @@ TEST(LlvmLibcTanfTest, SpecificBitPatterns) {
 
 // SDCOMP-26094: check tanf in the cases for which the range reducer
 // returns values furthest beyond its nominal upper bound of pi/4.
-TEST(LlvmLibcTanfTest, SDCOMP_26094) {
+TEST_F(LlvmLibcTanfTest, SDCOMP_26094) {
   for (uint32_t v : SDCOMP26094_VALUES) {
     float x = float(FPBits(v));
     ASSERT_MPFR_MATCH(mpfr::Operation::Tan, x, LIBC_NAMESPACE::tanf(x), 0.5);
diff --git a/libc/test/src/math/tanhf_test.cpp b/libc/test/src/math/tanhf_test.cpp
index 06ad02c3370940c..1dc736d369303ca 100644
--- a/libc/test/src/math/tanhf_test.cpp
+++ b/libc/test/src/math/tanhf_test.cpp
@@ -17,13 +17,11 @@
 #include <errno.h>
 #include <stdint.h>
 
-using FPBits = LIBC_NAMESPACE::fputil::FPBits<float>;
+using LlvmLibcTanhfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
-DECLARE_SPECIAL_CONSTANTS(float)
-
-TEST(LlvmLibcTanhfTest, SpecialNumbers) {
+TEST_F(LlvmLibcTanhfTest, SpecialNumbers) {
   libc_errno = 0;
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::tanhf(aNaN));
@@ -42,7 +40,7 @@ TEST(LlvmLibcTanhfTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(0);
 }
 
-TEST(LlvmLibcTanhfTest, InFloatRange) {
+TEST_F(LlvmLibcTanhfTest, InFloatRange) {
   constexpr uint32_t COUNT = 100'001;
   constexpr uint32_t STEP = UINT32_MAX / COUNT;
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
@@ -54,7 +52,7 @@ TEST(LlvmLibcTanhfTest, InFloatRange) {
   }
 }
 
-TEST(LlvmLibcTanhfTest, ExceptionalValues) {
+TEST_F(LlvmLibcTanhfTest, ExceptionalValues) {
   constexpr int N = 4;
   constexpr uint32_t INPUTS[N] = {
       0x0040'0000,

>From 180eae1f1e5a08595ed2278d93f01fb321284649 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 19 Oct 2023 16:31:15 +0200
Subject: [PATCH 05/76] [MemoryBuiltins] Simplify getAllocFnKind()
 implementation (NFC)

---
 llvm/lib/Analysis/MemoryBuiltins.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 6ee1984f908b8a1..9e6811f3bf88158 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -282,10 +282,7 @@ static AllocFnKind getAllocFnKind(const Value *V) {
 }
 
 static AllocFnKind getAllocFnKind(const Function *F) {
-  Attribute Attr = F->getFnAttribute(Attribute::AllocKind);
-  if (Attr.isValid())
-    return AllocFnKind(Attr.getValueAsInt());
-  return AllocFnKind::Unknown;
+  return F->getAttributes().getAllocKind();
 }
 
 static bool checkFnAllocKind(const Value *V, AllocFnKind Wanted) {

>From b781c7ab574f54f54e1b32421398c723f3690f05 Mon Sep 17 00:00:00 2001
From: JackAKirk <jack.kirk at codeplay.com>
Date: Tue, 17 Oct 2023 16:59:31 +0100
Subject: [PATCH 06/76] Fixed some wmma store builtins that had non-const src
 param

Now all wmma store builtins have src param marked const.

Reviewers: Tra
---
 clang/include/clang/Basic/BuiltinsNVPTX.def | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index f645ad25cbd86da..d74a7d1e55dd281 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -920,22 +920,22 @@ TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
 TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
 TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
 TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
-TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60))
-TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
+TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
 
 TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
 TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
 TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
 TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
-TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
-TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
 
 TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
 TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
 TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
 TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
-TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
-TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
+TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
 
 TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
 TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))

>From e6e90840708099425b7b69dd053634ff25d4907f Mon Sep 17 00:00:00 2001
From: Pete Steinfeld <47540744+psteinfeld at users.noreply.github.com>
Date: Thu, 19 Oct 2023 07:43:10 -0700
Subject: [PATCH 07/76] [flang] Put ISO_Fortran_binding.h where it can be
 easily used (#69121)

The update stems from the discussion in

https://discourse.llvm.org/t/adding-flang-specific-header-files-to-clang/72442

This is my second attempt at this. My first attempt was in pull request
#68756.

I decided to put ISO_Fortran_binding.h in a place where it would be
accessible with the include: "#include<ISO_Fortran_binding.h>" rather
than "#include<fortran/ISO_Fortran_binding.h>" because this is what
gfortran implements.

Note that the file is also installed into ".../include/flang", so if a
user wanted to access the file from a compiler other than clang, it
would be available.

I added a test in ".../flang/test/Examples". To make the test work, I
also needed to put ISO_Fortran_binding.h into the build area.

Although the flang project depends on clang, clang may not always be
available in a flang build. For example, when building just the
"check-flang" target, the "clang" executable may not be available at the
time the new test gets run. To account for this, I made the test's
script check for the existence of the "clang" executable. If "clang" is
not available, it simply prints "PASS". If it is available, it fully
builds and executes the test. On success, this will also print "PASS"
---
 flang/CMakeLists.txt               | 20 +++++++--
 flang/test/Examples/ctofortran.f90 | 70 ++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 3 deletions(-)
 create mode 100644 flang/test/Examples/ctofortran.f90

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index ac30da89995ed31..74245374166ba9c 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -273,10 +273,10 @@ if (NOT(FLANG_DEFAULT_RTLIB STREQUAL ""))
       "Default runtime library to use (empty for platform default)" FORCE)
 endif()
 
-
-
 set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}")
-
+if (NOT PACKAGE_VERSION)
+  set(PACKAGE_VERSION ${LLVM_VERSION_MAJOR})
+endif()
 
 if (NOT DEFINED FLANG_VERSION_MAJOR)
   set(FLANG_VERSION_MAJOR ${LLVM_VERSION_MAJOR})
@@ -490,3 +490,17 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
     PATTERN "*.inc"
     )
 endif()
+
+# Put ISO_Fortran_binding.h into the include files of the build area now
+# so that we can run tests before installing
+include(GetClangResourceDir)
+get_clang_resource_dir(HEADER_BINARY_DIR PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. SUBDIR include)
+configure_file(
+  ${FLANG_SOURCE_DIR}/include/flang/ISO_Fortran_binding.h
+  ${HEADER_BINARY_DIR}/ISO_Fortran_binding.h)
+
+# And also install it into the install area
+get_clang_resource_dir(HEADER_INSTALL_DIR PREFIX ${CMAKE_INSTALL_PREFIX} SUBDIR include)
+install(
+  FILES ${CMAKE_INSTALL_PREFIX}/include/flang/ISO_Fortran_binding.h 
+  DESTINATION ${HEADER_INSTALL_DIR})
diff --git a/flang/test/Examples/ctofortran.f90 b/flang/test/Examples/ctofortran.f90
new file mode 100644
index 000000000000000..e47fa25f48482fd
--- /dev/null
+++ b/flang/test/Examples/ctofortran.f90
@@ -0,0 +1,70 @@
+! UNSUPPORTED: system-windows
+! RUN: split-file %s %t
+! RUN: chmod +x %t/runtest.sh
+! RUN: %t/runtest.sh %t %flang $t/ffile.f90 $t/cfile.c | FileCheck %s
+
+!--- ffile.f90
+subroutine foo(a) bind(c)
+  integer :: a(:)
+  if (lbound(a, 1) .ne. 1) then
+     print *, 'FAIL expected 1 for lbound but got ',lbound(a, 1)
+     stop 1
+  endif
+
+  if (ubound(a, 1) .ne. 10) then
+     print *, 'FAIL expected 10 for ubound but got ',ubound(a, 1)
+     stop 1
+  endif
+
+  do i = lbound(a,1),ubound(a,1)
+     !print *, a(i)
+     if (a(i) .ne. i) then
+        print *, 'FAIL expected', i, ' for index ',i, ' but got ',a(i)
+        stop 1
+     endif
+  enddo
+  print *, 'PASS'
+end subroutine foo
+
+! CHECK: PASS
+!--- cfile.c
+#include <stdio.h>
+#include <stdlib.h>
+#include <ISO_Fortran_binding.h>
+
+void foo(CFI_cdesc_t*);
+
+int a[10];
+
+int main() {
+  int i, res;
+  static CFI_CDESC_T(1) r1;
+  CFI_cdesc_t *desc = (CFI_cdesc_t*)&r1;
+  CFI_index_t extent[1] = {10};
+
+  for(i=0; i<10; ++i) {
+    a[i] = i+1;
+  }
+
+  res = CFI_establish(desc, (void*)a, CFI_attribute_other, CFI_type_int32_t,
+                      sizeof(int), 1, extent);
+  if (res != 0) {
+    printf("FAIL CFI_establish returned %d instead of 0.\n",res);
+    exit(1);
+  }
+
+  foo(desc);
+  return 0;
+}
+!--- runtest.sh
+#!/bin/bash
+export CCOMP=`dirname $2`/clang
+if [ -x $CCOMP ]
+then
+  $CCOMP -c $1/$4 -o $1/cfile.o
+  $2 $1/$3 $1/cfile.o -o $1/ctofortran
+  $1/ctofortran # should print "PASS"
+else
+  # No clang compiler, just pass by default
+  echo "PASS"
+fi

>From e4b75d836d24ee935d0c9f21a4d6853c6f065be9 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Thu, 19 Oct 2023 13:24:15 +0000
Subject: [PATCH 08/76] [Clang][SVE2.1] Add builtins for Multi-vector load and
 store

 Patch by : David Sherwood <david.sherwood at arm.com>

As described in: https://github.com/ARM-software/acle/pull/257

Reviewed By: kmclaughlin

Differential Revision: https://reviews.llvm.org/D151433
---
 clang/include/clang/Basic/arm_sve.td          |   72 +
 clang/lib/CodeGen/CGBuiltin.cpp               |   13 +
 .../acle_sve2p1_ld1.c                         | 1250 +++++++++++++++++
 .../acle_sve2p1_ldnt1.c                       | 1250 +++++++++++++++++
 .../acle_sve2p1_st1.c                         |  998 +++++++++++++
 .../acle_sve2p1_stnt1.c                       | 1042 ++++++++++++++
 6 files changed, 4625 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 8f9bdd18829ff6b..8684b4ff1b6053e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1880,6 +1880,78 @@ def SVWHILELO_COUNT  : SInst<"svwhilelo_{d}",  "}nni", "QcQsQiQl", MergeNone, "a
 def SVWHILELS_COUNT  : SInst<"svwhilels_{d}",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEHI_COUNT  : SInst<"svwhilehi_{d}",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 def SVWHILEHS_COUNT  : SInst<"svwhilehs_{d}",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
+
+def SVLD1B_X2 : MInst<"svld1[_{2}]_x2", "2}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">;
+def SVLD1H_X2 : MInst<"svld1[_{2}]_x2", "2}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">;
+def SVLD1W_X2 : MInst<"svld1[_{2}]_x2", "2}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">;
+def SVLD1D_X2 : MInst<"svld1[_{2}]_x2", "2}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">;
+def SVLD1B_X4 : MInst<"svld1[_{2}]_x4", "4}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">;
+def SVLD1H_X4 : MInst<"svld1[_{2}]_x4", "4}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">;
+def SVLD1W_X4 : MInst<"svld1[_{2}]_x4", "4}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">;
+def SVLD1D_X4 : MInst<"svld1[_{2}]_x4", "4}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">;
+
+def SVLDNT1B_X2 : MInst<"svldnt1[_{2}]_x2", "2}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">;
+def SVLDNT1H_X2 : MInst<"svldnt1[_{2}]_x2", "2}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">;
+def SVLDNT1W_X2 : MInst<"svldnt1[_{2}]_x2", "2}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">;
+def SVLDNT1D_X2 : MInst<"svldnt1[_{2}]_x2", "2}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">;
+def SVLDNT1B_X4 : MInst<"svldnt1[_{2}]_x4", "4}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">;
+def SVLDNT1H_X4 : MInst<"svldnt1[_{2}]_x4", "4}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">;
+def SVLDNT1W_X4 : MInst<"svldnt1[_{2}]_x4", "4}c", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">;
+def SVLDNT1D_X4 : MInst<"svldnt1[_{2}]_x4", "4}c", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">;
+
+def SVLD1B_VNUM_X2 : MInst<"svld1_vnum[_{2}]_x2", "2}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">;
+def SVLD1H_VNUM_X2 : MInst<"svld1_vnum[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">;
+def SVLD1W_VNUM_X2 : MInst<"svld1_vnum[_{2}]_x2", "2}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">;
+def SVLD1D_VNUM_X2 : MInst<"svld1_vnum[_{2}]_x2", "2}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">;
+def SVLD1B_VNUM_X4 : MInst<"svld1_vnum[_{2}]_x4", "4}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">;
+def SVLD1H_VNUM_X4 : MInst<"svld1_vnum[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">;
+def SVLD1W_VNUM_X4 : MInst<"svld1_vnum[_{2}]_x4", "4}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">;
+def SVLD1D_VNUM_X4 : MInst<"svld1_vnum[_{2}]_x4", "4}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x4">;
+
+def SVLDNT1B_VNUM_X2 : MInst<"svldnt1_vnum[_{2}]_x2", "2}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">;
+def SVLDNT1H_VNUM_X2 : MInst<"svldnt1_vnum[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">;
+def SVLDNT1W_VNUM_X2 : MInst<"svldnt1_vnum[_{2}]_x2", "2}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">;
+def SVLDNT1D_VNUM_X2 : MInst<"svldnt1_vnum[_{2}]_x2", "2}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x2">;
+def SVLDNT1B_VNUM_X4 : MInst<"svldnt1_vnum[_{2}]_x4", "4}cl", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">;
+def SVLDNT1H_VNUM_X4 : MInst<"svldnt1_vnum[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">;
+def SVLDNT1W_VNUM_X4 : MInst<"svldnt1_vnum[_{2}]_x4", "4}cl", "iUif", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">;
+def SVLDNT1D_VNUM_X4 : MInst<"svldnt1_vnum[_{2}]_x4", "4}cl", "lUld", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ldnt1_pn_x4">;
+
+def SVST1B_X2 : MInst<"svst1[_{2}_x2]", "v}p2", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">;
+def SVST1H_X2 : MInst<"svst1[_{2}_x2]", "v}p2", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">;
+def SVST1W_X2 : MInst<"svst1[_{2}_x2]", "v}p2", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">;
+def SVST1D_X2 : MInst<"svst1[_{2}_x2]", "v}p2", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">;
+def SVST1B_X4 : MInst<"svst1[_{2}_x4]", "v}p4", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">;
+def SVST1H_X4 : MInst<"svst1[_{2}_x4]", "v}p4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">;
+def SVST1W_X4 : MInst<"svst1[_{2}_x4]", "v}p4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">;
+def SVST1D_X4 : MInst<"svst1[_{2}_x4]", "v}p4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">;
+
+def SVST1B_VNUM_X2 : MInst<"svst1_vnum[_{2}_x2]", "v}pl2", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">;
+def SVST1H_VNUM_X2 : MInst<"svst1_vnum[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">;
+def SVST1W_VNUM_X2 : MInst<"svst1_vnum[_{2}_x2]", "v}pl2", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">;
+def SVST1D_VNUM_X2 : MInst<"svst1_vnum[_{2}_x2]", "v}pl2", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x2">;
+def SVST1B_VNUM_X4 : MInst<"svst1_vnum[_{2}_x4]", "v}pl4", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">;
+def SVST1H_VNUM_X4 : MInst<"svst1_vnum[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">;
+def SVST1W_VNUM_X4 : MInst<"svst1_vnum[_{2}_x4]", "v}pl4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">;
+def SVST1D_VNUM_X4 : MInst<"svst1_vnum[_{2}_x4]", "v}pl4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_st1_pn_x4">;
+
+def SVSTNT1B_X2 : MInst<"svstnt1[_{2}_x2]", "v}p2", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">;
+def SVSTNT1H_X2 : MInst<"svstnt1[_{2}_x2]", "v}p2", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">;
+def SVSTNT1W_X2 : MInst<"svstnt1[_{2}_x2]", "v}p2", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">;
+def SVSTNT1D_X2 : MInst<"svstnt1[_{2}_x2]", "v}p2", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">;
+def SVSTNT1B_X4 : MInst<"svstnt1[_{2}_x4]", "v}p4", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
+def SVSTNT1H_X4 : MInst<"svstnt1[_{2}_x4]", "v}p4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
+def SVSTNT1W_X4 : MInst<"svstnt1[_{2}_x4]", "v}p4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
+def SVSTNT1D_X4 : MInst<"svstnt1[_{2}_x4]", "v}p4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
+
+def SVSTNT1B_VNUM_X2 : MInst<"svstnt1_vnum[_{2}_x2]", "v}pl2", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">;
+def SVSTNT1H_VNUM_X2 : MInst<"svstnt1_vnum[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">;
+def SVSTNT1W_VNUM_X2 : MInst<"svstnt1_vnum[_{2}_x2]", "v}pl2", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">;
+def SVSTNT1D_VNUM_X2 : MInst<"svstnt1_vnum[_{2}_x2]", "v}pl2", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x2">;
+def SVSTNT1B_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "cUc", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
+def SVSTNT1H_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
+def SVSTNT1W_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
+def SVSTNT1D_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
 }
 
 let TargetGuard = "sve2p1" in {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2b341b8090fad7d..e1211bb8949b665 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9340,6 +9340,11 @@ static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {
 // the elements of the specified datatype.
 Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,
                                              llvm::ScalableVectorType *VTy) {
+
+  if (isa<TargetExtType>(Pred->getType()) &&
+      cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")
+    return Pred;
+
   auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);
   if (Pred->getType() == RTy)
     return Pred;
@@ -9514,12 +9519,16 @@ Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,
   unsigned N;
   switch (IntID) {
   case Intrinsic::aarch64_sve_ld2_sret:
+  case Intrinsic::aarch64_sve_ld1_pn_x2:
+  case Intrinsic::aarch64_sve_ldnt1_pn_x2:
     N = 2;
     break;
   case Intrinsic::aarch64_sve_ld3_sret:
     N = 3;
     break;
   case Intrinsic::aarch64_sve_ld4_sret:
+  case Intrinsic::aarch64_sve_ld1_pn_x4:
+  case Intrinsic::aarch64_sve_ldnt1_pn_x4:
     N = 4;
     break;
   default:
@@ -9555,12 +9564,16 @@ Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,
   unsigned N;
   switch (IntID) {
   case Intrinsic::aarch64_sve_st2:
+  case Intrinsic::aarch64_sve_st1_pn_x2:
+  case Intrinsic::aarch64_sve_stnt1_pn_x2:
     N = 2;
     break;
   case Intrinsic::aarch64_sve_st3:
     N = 3;
     break;
   case Intrinsic::aarch64_sve_st4:
+  case Intrinsic::aarch64_sve_st1_pn_x4:
+  case Intrinsic::aarch64_sve_stnt1_pn_x4:
     N = 4;
     break;
   default:
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c
new file mode 100644
index 000000000000000..7a25d31de0130e0
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c
@@ -0,0 +1,1250 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: @test_svld1_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svld1_u8_x2u11__SVCount_tPKh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
+//
+svuint8x2_t test_svld1_u8_x2(svcount_t pn, const uint8_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_u8,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_u16_x2u11__SVCount_tPKt(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP4]]
+//
+svuint16x2_t test_svld1_u16_x2(svcount_t pn, const uint16_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_u16,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_u32_x2u11__SVCount_tPKj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP4]]
+//
+svuint32x2_t test_svld1_u32_x2(svcount_t pn, const uint32_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_u32,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_u64_x2u11__SVCount_tPKm(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP4]]
+//
+svuint64x2_t test_svld1_u64_x2(svcount_t pn, const uint64_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_u64,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svld1_u8_x4u11__SVCount_tPKh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
+//
+svuint8x4_t test_svld1_u8_x4(svcount_t pn, const uint8_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_u8,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_u16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_u16_x4u11__SVCount_tPKt(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP8]]
+//
+svuint16x4_t test_svld1_u16_x4(svcount_t pn, const uint16_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_u16,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_u32_x4u11__SVCount_tPKj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP8]]
+//
+svuint32x4_t test_svld1_u32_x4(svcount_t pn, const uint32_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_u32,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_u64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_u64_x4u11__SVCount_tPKm(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP8]]
+//
+svuint64x4_t test_svld1_u64_x4(svcount_t pn, const uint64_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_u64,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svld1_s8_x2u11__SVCount_tPKa(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
+//
+svint8x2_t test_svld1_s8_x2(svcount_t pn, const int8_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_s8,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_s16_x2u11__SVCount_tPKs(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP4]]
+//
+svint16x2_t test_svld1_s16_x2(svcount_t pn, const int16_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_s16,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_s32_x2u11__SVCount_tPKi(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP4]]
+//
+svint32x2_t test_svld1_s32_x2(svcount_t pn, const int32_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_s32,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_s64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_s64_x2u11__SVCount_tPKl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP4]]
+//
+svint64x2_t test_svld1_s64_x2(svcount_t pn, const int64_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_s64,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_s8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svld1_s8_x4u11__SVCount_tPKa(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
+//
+svint8x4_t test_svld1_s8_x4(svcount_t pn, const int8_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_s8,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_s16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_s16_x4u11__SVCount_tPKs(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP8]]
+//
+svint16x4_t test_svld1_s16_x4(svcount_t pn, const int16_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_s16,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_s32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_s32_x4u11__SVCount_tPKi(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP8]]
+//
+svint32x4_t test_svld1_s32_x4(svcount_t pn, const int32_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_s32,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_s64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_s64_x4u11__SVCount_tPKl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP8]]
+//
+svint64x4_t test_svld1_s64_x4(svcount_t pn, const int64_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_s64,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x half> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_f16_x2u11__SVCount_tPKDh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP4]]
+//
+svfloat16x2_t test_svld1_f16_x2(svcount_t pn, const float16_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_f16,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x float> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_f32_x2u11__SVCount_tPKf(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP4]]
+//
+svfloat32x2_t test_svld1_f32_x2(svcount_t pn, const float32_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_f32,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x double> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_f64_x2u11__SVCount_tPKd(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP4]]
+//
+svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_f64,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x half> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_f16_x4u11__SVCount_tPKDh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP8]]
+//
+svfloat16x4_t test_svld1_f16_x4(svcount_t pn, const float16_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_f16,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x float> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_f32_x4u11__SVCount_tPKf(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP8]]
+//
+svfloat32x4_t test_svld1_f32_x4(svcount_t pn, const float32_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_f32,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svld1_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], i64 2)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x double> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svld1_f64_x4u11__SVCount_tPKd(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP8]]
+//
+svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base)
+{
+  return SVE_ACLE_FUNC(svld1,_f64,_x4,)(pn, base);
+}
+
+
+// == VNUM variants ==
+
+
+// CHECK-LABEL: @test_svld1_vnum_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svld1_vnum_u8_x2u11__SVCount_tPKhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_u8,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u16_x2u11__SVCount_tPKtl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
+//
+svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_u16,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u32_x2u11__SVCount_tPKjl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
+//
+svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_u32,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u64_x2u11__SVCount_tPKml(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
+//
+svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_u64,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svld1_vnum_u8_x4u11__SVCount_tPKhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_u8,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_u16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u16_x4u11__SVCount_tPKtl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
+//
+svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_u16,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u32_x4u11__SVCount_tPKjl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
+//
+svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_u32,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_u64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u64_x4u11__SVCount_tPKml(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
+//
+svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_u64,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svld1_vnum_s8_x2u11__SVCount_tPKal(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_s8,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s16_x2u11__SVCount_tPKsl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
+//
+svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_s16,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s32_x2u11__SVCount_tPKil(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
+//
+svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_s32,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_s64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s64_x2u11__SVCount_tPKll(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
+//
+svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_s64,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_s8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svld1_vnum_s8_x4u11__SVCount_tPKal(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_s8,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_s16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s16_x4u11__SVCount_tPKsl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ld1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
+//
+svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_s16,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_s32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s32_x4u11__SVCount_tPKil(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ld1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
+//
+svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_s32,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_s64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s64_x4u11__SVCount_tPKll(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ld1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
+//
+svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_s64,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x half> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f16_x2u11__SVCount_tPKDhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP5]]
+//
+svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_f16,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x float> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f32_x2u11__SVCount_tPKfl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP5]]
+//
+svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_f32,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x double> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f64_x2u11__SVCount_tPKdl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP5]]
+//
+svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_f64,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 16)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x half> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f16_x4u11__SVCount_tPKDhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP9]]
+//
+svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_f16,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 8)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x float> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f32_x4u11__SVCount_tPKfl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ld1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP9]]
+//
+svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_f32,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svld1_vnum_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 4)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x double> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f64_x4u11__SVCount_tPKdl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP9]]
+//
+svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svld1_vnum,_f64,_x4,)(pn, base, vnum);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c
new file mode 100644
index 000000000000000..7a0fcde819dce0f
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c
@@ -0,0 +1,1250 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: @test_svldnt1_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svldnt1_u8_x2u11__SVCount_tPKh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
+//
+svuint8x2_t test_svldnt1_u8_x2(svcount_t pn, const uint8_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_u8,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_u16_x2u11__SVCount_tPKt(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP4]]
+//
+svuint16x2_t test_svldnt1_u16_x2(svcount_t pn, const uint16_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_u16,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_u32_x2u11__SVCount_tPKj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP4]]
+//
+svuint32x2_t test_svldnt1_u32_x2(svcount_t pn, const uint32_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_u32,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_u64_x2u11__SVCount_tPKm(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP4]]
+//
+svuint64x2_t test_svldnt1_u64_x2(svcount_t pn, const uint64_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_u64,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svldnt1_u8_x4u11__SVCount_tPKh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
+//
+svuint8x4_t test_svldnt1_u8_x4(svcount_t pn, const uint8_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_u8,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_u16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_u16_x4u11__SVCount_tPKt(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP8]]
+//
+svuint16x4_t test_svldnt1_u16_x4(svcount_t pn, const uint16_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_u16,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_u32_x4u11__SVCount_tPKj(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP8]]
+//
+svuint32x4_t test_svldnt1_u32_x4(svcount_t pn, const uint32_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_u32,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_u64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_u64_x4u11__SVCount_tPKm(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP8]]
+//
+svuint64x4_t test_svldnt1_u64_x4(svcount_t pn, const uint64_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_u64,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svldnt1_s8_x2u11__SVCount_tPKa(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
+//
+svint8x2_t test_svldnt1_s8_x2(svcount_t pn, const int8_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_s8,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_s16_x2u11__SVCount_tPKs(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP4]]
+//
+svint16x2_t test_svldnt1_s16_x2(svcount_t pn, const int16_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_s16,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_s32_x2u11__SVCount_tPKi(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP4]]
+//
+svint32x2_t test_svldnt1_s32_x2(svcount_t pn, const int32_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_s32,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_s64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_s64_x2u11__SVCount_tPKl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP4]]
+//
+svint64x2_t test_svldnt1_s64_x2(svcount_t pn, const int64_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_s64,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_s8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svldnt1_s8_x4u11__SVCount_tPKa(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
+//
+svint8x4_t test_svldnt1_s8_x4(svcount_t pn, const int8_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_s8,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_s16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_s16_x4u11__SVCount_tPKs(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP8]]
+//
+svint16x4_t test_svldnt1_s16_x4(svcount_t pn, const int16_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_s16,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_s32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_s32_x4u11__SVCount_tPKi(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP8]]
+//
+svint32x4_t test_svldnt1_s32_x4(svcount_t pn, const int32_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_s32,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_s64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_s64_x4u11__SVCount_tPKl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP8]]
+//
+svint64x4_t test_svldnt1_s64_x4(svcount_t pn, const int64_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_s64,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x half> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_f16_x2u11__SVCount_tPKDh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP4]]
+//
+svfloat16x2_t test_svldnt1_f16_x2(svcount_t pn, const float16_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_f16,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x float> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_f32_x2u11__SVCount_tPKf(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP4]]
+//
+svfloat32x2_t test_svldnt1_f32_x2(svcount_t pn, const float32_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_f32,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x double> [[TMP4]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_f64_x2u11__SVCount_tPKd(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP4]]
+//
+svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_f64,_x2,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x half> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_f16_x4u11__SVCount_tPKDh(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP8]]
+//
+svfloat16x4_t test_svldnt1_f16_x4(svcount_t pn, const float16_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_f16,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 8)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x float> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_f32_x4u11__SVCount_tPKf(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP8]]
+//
+svfloat32x4_t test_svldnt1_f32_x4(svcount_t pn, const float32_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_f32,_x4,)(pn, base);
+}
+
+// CHECK-LABEL: @test_svldnt1_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP1]], i64 0)
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 1
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], i64 2)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 2
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 4)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 3
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x double> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svldnt1_f64_x4u11__SVCount_tPKd(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 0
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP1]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 1
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 2
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP0]], 3
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP8]]
+//
+svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base)
+{
+  return SVE_ACLE_FUNC(svldnt1,_f64,_x4,)(pn, base);
+}
+
+
+// == VNUM variants ==
+
+
+// CHECK-LABEL: @test_svldnt1_vnum_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_u8_x2u11__SVCount_tPKhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u8,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u16_x2u11__SVCount_tPKtl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
+//
+svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u16,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u32_x2u11__SVCount_tPKjl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
+//
+svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u32,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u64_x2u11__SVCount_tPKml(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
+//
+svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u64,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_u8_x4u11__SVCount_tPKhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u8,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_u16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u16_x4u11__SVCount_tPKtl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
+//
+svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u16,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u32_x4u11__SVCount_tPKjl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
+//
+svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u32,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_u64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u64_x4u11__SVCount_tPKml(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
+//
+svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_u64,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_s8_x2u11__SVCount_tPKal(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
+//
+svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s8,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s16_x2u11__SVCount_tPKsl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
+//
+svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s16,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s32_x2u11__SVCount_tPKil(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
+//
+svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s32,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_s64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s64_x2u11__SVCount_tPKll(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
+//
+svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s64,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_s8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_s8_x4u11__SVCount_tPKal(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
+// CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
+//
+svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s8,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_s16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s16_x4u11__SVCount_tPKsl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8i16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> poison, <vscale x 8 x i16> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
+//
+svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s16,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_s32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s32_x4u11__SVCount_tPKil(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4i32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
+//
+svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s32,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_s64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s64_x4u11__SVCount_tPKll(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2i64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> poison, <vscale x 2 x i64> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
+//
+svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_s64,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x half> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f16_x2u11__SVCount_tPKDhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP5]]
+//
+svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_f16,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
+// CHECK-NEXT:    ret <vscale x 8 x float> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f32_x2u11__SVCount_tPKfl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP5]]
+//
+svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_f32,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
+// CHECK-NEXT:    ret <vscale x 4 x double> [[TMP5]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f64_x2u11__SVCount_tPKdl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP5]]
+//
+svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x2,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 16)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x half> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f16_x4u11__SVCount_tPKDhl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> poison, <vscale x 8 x half> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP9]]
+//
+svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_f16,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 8)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 12)
+// CHECK-NEXT:    ret <vscale x 16 x float> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f32_x4u11__SVCount_tPKfl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv4f32(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 12)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP9]]
+//
+svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_f32,_x4,)(pn, base, vnum);
+}
+
+// CHECK-LABEL: @test_svldnt1_vnum_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 2
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 4)
+// CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 3
+// CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 6)
+// CHECK-NEXT:    ret <vscale x 8 x double> [[TMP9]]
+//
+// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f64_x4u11__SVCount_tPKdl(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv2f64(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 0
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 1
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 2
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } [[TMP1]], 3
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 6)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP9]]
+//
+svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum)
+{
+  return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x4,)(pn, base, vnum);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c
new file mode 100644
index 000000000000000..3adf5f2a1a0526a
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c
@@ -0,0 +1,998 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// CHECK-LABEL: @test_svst1_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svst1_u8_x2u11__SVCount_tPh11svuint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_u8_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_u16_x2u11__SVCount_tPt12svuint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_u16_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_u32_x2u11__SVCount_tPj12svuint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_u32_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_u64_x2u11__SVCount_tPm12svuint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_u64_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svst1_u8_x4u11__SVCount_tPh11svuint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_u8_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_u16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_u16_x4u11__SVCount_tPt12svuint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_u16_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_u32_x4u11__SVCount_tPj12svuint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_u32_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_u64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_u64_x4u11__SVCount_tPm12svuint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_u64_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svst1_s8_x2u11__SVCount_tPa10svint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_s8_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_s16_x2u11__SVCount_tPs11svint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_s16_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_s32_x2u11__SVCount_tPi11svint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_s32_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_s64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_s64_x2u11__SVCount_tPl11svint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_s64_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_s8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z16test_svst1_s8_x4u11__SVCount_tPa10svint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_s8_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_s16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_s16_x4u11__SVCount_tPs11svint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_s16_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_s32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_s32_x4u11__SVCount_tPi11svint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_s32_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_s64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_s64_x4u11__SVCount_tPl11svint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_s64_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_f16_x2u11__SVCount_tPDh13svfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_f16_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_f32_x2u11__SVCount_tPf13svfloat32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_f32_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_f64_x2u11__SVCount_tPd13svfloat64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_f64_x2,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_f16_x4u11__SVCount_tPDh13svfloat16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_f16_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_f32_x4u11__SVCount_tPf13svfloat32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_f32_x4,,)(pn, base, v);
+}
+
+// CHECK-LABEL: @test_svst1_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z17test_svst1_f64_x4u11__SVCount_tPd13svfloat64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1,_f64_x4,,)(pn, base, v);
+}
+
+
+// == VNUM variants ==
+
+
+// CHECK-LABEL: @test_svst1_vnum_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z21test_svst1_vnum_u8_x2u11__SVCount_tPhl11svuint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_u8_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u16_x2u11__SVCount_tPtl12svuint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_u16_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u32_x2u11__SVCount_tPjl12svuint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_u32_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u64_x2u11__SVCount_tPml12svuint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_u64_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z21test_svst1_vnum_u8_x4u11__SVCount_tPhl11svuint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_u8_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_u16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u16_x4u11__SVCount_tPtl12svuint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_u16_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u32_x4u11__SVCount_tPjl12svuint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_u32_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_u64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u64_x4u11__SVCount_tPml12svuint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_u64_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z21test_svst1_vnum_s8_x2u11__SVCount_tPal10svint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_s8_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s16_x2u11__SVCount_tPsl11svint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_s16_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s32_x2u11__SVCount_tPil11svint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_s32_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_s64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s64_x2u11__SVCount_tPll11svint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_s64_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_s8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z21test_svst1_vnum_s8_x4u11__SVCount_tPal10svint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_s8_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_s16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s16_x4u11__SVCount_tPsl11svint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_s16_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_s32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s32_x4u11__SVCount_tPil11svint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_s32_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_s64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s64_x4u11__SVCount_tPll11svint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_s64_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f16_x2u11__SVCount_tPDhd13svfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_f16_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f32_x2u11__SVCount_tPfd13svfloat32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_f32_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f64_x2u11__SVCount_tPdd13svfloat64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x2.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_f64_x2,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f16_x4u11__SVCount_tPDhd13svfloat16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_f16_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f32_x4u11__SVCount_tPfd13svfloat32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_f32_x4,,)(pn, base, vnum, v);
+}
+
+// CHECK-LABEL: @test_svst1_vnum_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f64_x4u11__SVCount_tPdd13svfloat64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.pn.x4.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v)
+{
+  return SVE_ACLE_FUNC(svst1_vnum,_f64_x4,,)(pn, base, vnum, v);
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c
new file mode 100644
index 000000000000000..536211fdc3f7809
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c
@@ -0,0 +1,1042 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+
+// CHECK-LABEL: @test_svstnt1_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z18test_svstnt1_u8_x2u11__SVCount_tPh11svuint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_u8_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_u16_x2u11__SVCount_tPt12svuint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_u16_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_u32_x2u11__SVCount_tPj12svuint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_u32_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_u64_x2u11__SVCount_tPm12svuint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_u64_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z18test_svstnt1_u8_x4u11__SVCount_tPh11svuint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_u8_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_u16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_u16_x4u11__SVCount_tPt12svuint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_u16_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_u32_x4u11__SVCount_tPj12svuint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_u32_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_u64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_u64_x4u11__SVCount_tPm12svuint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_u64_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z18test_svstnt1_s8_x2u11__SVCount_tPa10svint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_s8_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_s16_x2u11__SVCount_tPs11svint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_s16_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_s32_x2u11__SVCount_tPi11svint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_s32_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_s64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_s64_x2u11__SVCount_tPl11svint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_s64_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_s8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z18test_svstnt1_s8_x4u11__SVCount_tPa10svint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_s8_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_s16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_s16_x4u11__SVCount_tPs11svint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_s16_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_s32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_s32_x4u11__SVCount_tPi11svint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_s32_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_s64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_s64_x4u11__SVCount_tPl11svint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_s64_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_f16_x2u11__SVCount_tPDh13svfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_f16_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_f32_x2u11__SVCount_tPf13svfloat32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_f32_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_f64_x2u11__SVCount_tPd13svfloat64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_f64_x2,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_f16_x4u11__SVCount_tPDh13svfloat16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_f16_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_f32_x4u11__SVCount_tPf13svfloat32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_f32_x4,,)(pn, base, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z19test_svstnt1_f64_x4u11__SVCount_tPd13svfloat64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1,_f64_x4,,)(pn, base, v);
+}
+
+
+// == VNUM variants ==
+
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_u8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_u8_x2u11__SVCount_tPhl11svuint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u8_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_u16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u16_x2u11__SVCount_tPtl12svuint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u16_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u32_x2u11__SVCount_tPjl12svuint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u32_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_u64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u64_x2u11__SVCount_tPml12svuint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u64_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_u8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_u8_x4u11__SVCount_tPhl11svuint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u8_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_u16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u16_x4u11__SVCount_tPtl12svuint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u16_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_u32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u32_x4u11__SVCount_tPjl12svuint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u32_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_u64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u64_x4u11__SVCount_tPml12svuint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_u64_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_s8_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_s8_x2u11__SVCount_tPal10svint8x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s8_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_s16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s16_x2u11__SVCount_tPsl11svint16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv16i16(<vscale x 16 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s16_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s32_x2u11__SVCount_tPil11svint32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s32_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_s64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s64_x2u11__SVCount_tPll11svint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv4i64(<vscale x 4 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s64_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_s8_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_s8_x4u11__SVCount_tPal10svint8x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 32)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv64i8(<vscale x 64 x i8> [[V]], i64 48)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s8_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_s16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s16_x4u11__SVCount_tPsl11svint16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x i16>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.vector.extract.nxv8i16.nxv32i16(<vscale x 32 x i16> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s16_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_s32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s32_x4u11__SVCount_tPil11svint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s32_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_s64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s64_x4u11__SVCount_tPll11svint64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x i64>, ptr [[BASE:%.*]], i64 [[VNUM:%.*]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.vector.extract.nxv2i64.nxv8i64(<vscale x 8 x i64> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_s64_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V]], i64 8)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f16_x2u11__SVCount_tPDhd13svfloat16x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[V]], i64 8)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_f16_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V]], i64 4)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f32_x2u11__SVCount_tPfd13svfloat32x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[V]], i64 4)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_f32_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_f64_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V]], i64 2)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f64_x2u11__SVCount_tPdd13svfloat64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv4f64(<vscale x 4 x double> [[V]], i64 2)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x2.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x2,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_f16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 16)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 24)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f16_x4u11__SVCount_tPDhd13svfloat16x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 8 x half>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[V]], i64 24)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv8f16(<vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_f16_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_f32_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 12)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f32_x4u11__SVCount_tPfd13svfloat32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 4 x float>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[V]], i64 12)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv4f32(<vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_f32_x4,,)(pn, base, vnum, v);
+}
+
+
+// CHECK-LABEL: @test_svstnt1_vnum_f64_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 2)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 4)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 6)
+// CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f64_x4u11__SVCount_tPdd13svfloat64x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = getelementptr <vscale x 2 x double>, ptr [[BASE:%.*]], i64 [[CONV]]
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 2)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 4)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.vector.extract.nxv2f64.nxv8f64(<vscale x 8 x double> [[V]], i64 6)
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.pn.x4.nxv2f64(<vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], target("aarch64.svcount") [[PN:%.*]], ptr [[TMP0]])
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v)
+{
+  return SVE_ACLE_FUNC(svstnt1_vnum,_f64_x4,,)(pn, base, vnum, v);
+}

>From 21e1b13f3384b875bd2205a736570320cb020f3e Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 19 Oct 2023 16:26:30 +0100
Subject: [PATCH 09/76] [TwoAddressInstruction] Handle physical registers with
 LiveIntervals (#66784)

Teach the LiveIntervals path in isPlainlyKilled to handle physical
registers, to get equivalent functionality with the LiveVariables path.

Test this by adding -early-live-intervals RUN lines to a handful of
tests that would fail without this.
---
 .../lib/CodeGen/TwoAddressInstructionPass.cpp | 43 ++++++++++++-------
 .../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll |  8 ++++
 .../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll |  8 ++++
 llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll  |  4 ++
 llvm/test/CodeGen/X86/combine-or.ll           |  5 +--
 llvm/test/CodeGen/X86/machine-cse.ll          |  1 +
 llvm/test/CodeGen/X86/ternlog.ll              |  1 +
 7 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 4ae396723ef0977..d3f58ae310d78ae 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -125,6 +125,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   bool isCopyToReg(MachineInstr &MI, Register &SrcReg, Register &DstReg,
                    bool &IsSrcPhys, bool &IsDstPhys) const;
 
+  bool isPlainlyKilled(const MachineInstr *MI, LiveRange &LR) const;
   bool isPlainlyKilled(const MachineInstr *MI, Register Reg) const;
   bool isPlainlyKilled(const MachineOperand &MO) const;
 
@@ -305,27 +306,37 @@ bool TwoAddressInstructionPass::isCopyToReg(MachineInstr &MI, Register &SrcReg,
   return true;
 }
 
+bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI,
+                                                LiveRange &LR) const {
+  // This is to match the kill flag version where undefs don't have kill flags.
+  if (!LR.hasAtLeastOneValue())
+    return false;
+
+  SlotIndex useIdx = LIS->getInstructionIndex(*MI);
+  LiveInterval::const_iterator I = LR.find(useIdx);
+  assert(I != LR.end() && "Reg must be live-in to use.");
+  return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+}
+
 /// Test if the given register value, which is used by the
 /// given instruction, is killed by the given instruction.
 bool TwoAddressInstructionPass::isPlainlyKilled(const MachineInstr *MI,
                                                 Register Reg) const {
-  if (LIS && Reg.isVirtual() && !LIS->isNotInMIMap(*MI)) {
-    // FIXME: Sometimes tryInstructionTransform() will add instructions and
-    // test whether they can be folded before keeping them. In this case it
-    // sets a kill before recursively calling tryInstructionTransform() again.
-    // If there is no interval available, we assume that this instruction is
-    // one of those. A kill flag is manually inserted on the operand so the
-    // check below will handle it.
-    LiveInterval &LI = LIS->getInterval(Reg);
-    // This is to match the kill flag version where undefs don't have kill
-    // flags.
-    if (!LI.hasAtLeastOneValue())
+  // FIXME: Sometimes tryInstructionTransform() will add instructions and
+  // test whether they can be folded before keeping them. In this case it
+  // sets a kill before recursively calling tryInstructionTransform() again.
+  // If there is no interval available, we assume that this instruction is
+  // one of those. A kill flag is manually inserted on the operand so the
+  // check below will handle it.
+  if (LIS && !LIS->isNotInMIMap(*MI)) {
+    if (Reg.isVirtual())
+      return isPlainlyKilled(MI, LIS->getInterval(Reg));
+    // Reserved registers are considered always live.
+    if (MRI->isReserved(Reg))
       return false;
-
-    SlotIndex useIdx = LIS->getInstructionIndex(*MI);
-    LiveInterval::const_iterator I = LI.find(useIdx);
-    assert(I != LI.end() && "Reg must be live-in to use.");
-    return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx);
+    return all_of(TRI->regunits(Reg), [&](MCRegUnit U) {
+      return isPlainlyKilled(MI, LIS->getRegUnit(U));
+    });
   }
 
   return MI->killsRegister(Reg);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index 6fd0b280a52c64d..c53fa714157d5c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -1,12 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 
 declare <vscale x 1 x half> @llvm.maximum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index d9d53fb53e44473..b386792cd3688c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -1,12 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
+; RUN:   -verify-machineinstrs -early-live-intervals < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 
 declare <vscale x 1 x half> @llvm.minimum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll
index 5792171269057bb..433b0d1cbdd8531 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll
@@ -1,8 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs -early-live-intervals < %s | FileCheck %s
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN:     -verify-machineinstrs -early-live-intervals < %s | FileCheck %s
 
 ; This tests a mix of vfmsac and vfmsub by using different operand orders to
 ; trigger commuting in TwoAddressInstructionPass.
diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll
index 460251ffa6c5dfd..3073cf0124a9d68 100644
--- a/llvm/test/CodeGen/X86/combine-or.ll
+++ b/llvm/test/CodeGen/X86/combine-or.ll
@@ -249,9 +249,8 @@ define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LIS-LABEL: test18:
 ; CHECK-LIS:       # %bb.0:
 ; CHECK-LIS-NEXT:    pxor %xmm2, %xmm2
-; CHECK-LIS-NEXT:    pxor %xmm3, %xmm3
-; CHECK-LIS-NEXT:    pblendw {{.*#+}} xmm3 = xmm0[0,1],xmm3[2,3,4,5,6,7]
-; CHECK-LIS-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,0,1,1]
+; CHECK-LIS-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
+; CHECK-LIS-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
 ; CHECK-LIS-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 ; CHECK-LIS-NEXT:    por %xmm0, %xmm2
 ; CHECK-LIS-NEXT:    movdqa %xmm2, %xmm0
diff --git a/llvm/test/CodeGen/X86/machine-cse.ll b/llvm/test/CodeGen/X86/machine-cse.ll
index d436f10fdc1f41f..431031136e4a429 100644
--- a/llvm/test/CodeGen/X86/machine-cse.ll
+++ b/llvm/test/CodeGen/X86/machine-cse.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -early-live-intervals < %s | FileCheck %s
 ; rdar://7610418
 
 %ptr = type { ptr }
diff --git a/llvm/test/CodeGen/X86/ternlog.ll b/llvm/test/CodeGen/X86/ternlog.ll
index da3b7d5baffd557..cef044acbc5a941 100644
--- a/llvm/test/CodeGen/X86/ternlog.ll
+++ b/llvm/test/CodeGen/X86/ternlog.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s
+; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl -early-live-intervals | FileCheck %s
 
 ;; This is just a simple test to make sure there are no regressions
 ;; cause by splitting/recombining ternlog intrinsics.

>From d2e7a15dfb509393d8eb74e1bb4348e72a92dfcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Thu, 19 Oct 2023 08:49:01 -0700
Subject: [PATCH 10/76] [flang][openacc] Warn about misplaced end loop
 directive and ignore it (#69512)

Instead of raising an error for a misplaced `end loop directive`, just
warn about it and ignore it. This directive is an extension and is
optional.
---
 flang/include/flang/Parser/dump-parse-tree.h |  1 +
 flang/include/flang/Parser/parse-tree.h      |  7 ++++++-
 flang/lib/Lower/OpenACC.cpp                  |  3 +++
 flang/lib/Parser/openacc-parsers.cpp         |  6 +++++-
 flang/lib/Semantics/check-acc-structure.cpp  |  4 ++++
 flang/lib/Semantics/check-acc-structure.h    |  1 +
 flang/test/Semantics/OpenACC/acc-error.f90   | 14 ++++++++++++++
 7 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index e7d74dda71a20cf..494e54faa64c841 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -568,6 +568,7 @@ class ParseTreeDumper {
   NODE(parser, OpenACCCombinedConstruct)
   NODE(parser, OpenACCConstruct)
   NODE(parser, OpenACCDeclarativeConstruct)
+  NODE(parser, OpenACCEndConstruct)
   NODE(parser, OpenACCLoopConstruct)
   NODE(parser, OpenACCRoutineConstruct)
   NODE(parser, OpenACCStandaloneDeclarativeConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 408a474cfa8a5d5..a51921f2562b8a9 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4257,6 +4257,11 @@ struct OpenACCLoopConstruct {
       t;
 };
 
+struct OpenACCEndConstruct {
+  WRAPPER_CLASS_BOILERPLATE(OpenACCEndConstruct, llvm::acc::Directive);
+  CharBlock source;
+};
+
 struct OpenACCStandaloneConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenACCStandaloneConstruct);
   CharBlock source;
@@ -4267,7 +4272,7 @@ struct OpenACCConstruct {
   UNION_CLASS_BOILERPLATE(OpenACCConstruct);
   std::variant<OpenACCBlockConstruct, OpenACCCombinedConstruct,
       OpenACCLoopConstruct, OpenACCStandaloneConstruct, OpenACCCacheConstruct,
-      OpenACCWaitConstruct, OpenACCAtomicConstruct>
+      OpenACCWaitConstruct, OpenACCAtomicConstruct, OpenACCEndConstruct>
       u;
 };
 
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index c8dcc91064415fa..4fafcebc30d116a 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -3358,6 +3358,9 @@ void Fortran::lower::genOpenACCConstruct(
           [&](const Fortran::parser::OpenACCAtomicConstruct &atomicConstruct) {
             genACC(converter, eval, atomicConstruct);
           },
+          [&](const Fortran::parser::OpenACCEndConstruct &) {
+            // No op
+          },
       },
       accConstruct.u);
 }
diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp
index 75aeffd29f92f10..bd5dcd8405e8f9f 100644
--- a/flang/lib/Parser/openacc-parsers.cpp
+++ b/flang/lib/Parser/openacc-parsers.cpp
@@ -235,6 +235,9 @@ TYPE_PARSER(startAccLine >>
             sourced(construct<OpenACCDeclarativeConstruct>(
                 Parser<OpenACCRoutineConstruct>{})))))
 
+TYPE_PARSER(sourced(construct<OpenACCEndConstruct>(
+    "END"_tok >> "LOOP"_tok >> pure(llvm::acc::Directive::ACCD_loop))))
+
 // OpenACC constructs
 TYPE_CONTEXT_PARSER("OpenACC construct"_en_US,
     startAccLine >>
@@ -246,7 +249,8 @@ TYPE_CONTEXT_PARSER("OpenACC construct"_en_US,
                     Parser<OpenACCStandaloneConstruct>{}),
                 construct<OpenACCConstruct>(Parser<OpenACCCacheConstruct>{}),
                 construct<OpenACCConstruct>(Parser<OpenACCWaitConstruct>{}),
-                construct<OpenACCConstruct>(Parser<OpenACCAtomicConstruct>{}))))
+                construct<OpenACCConstruct>(Parser<OpenACCAtomicConstruct>{}),
+                construct<OpenACCConstruct>(Parser<OpenACCEndConstruct>{}))))
 
 TYPE_PARSER(startAccLine >>
     sourced(construct<AccEndCombinedDirective>(sourced("END"_tok >>
diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp
index ce3525e3c335b97..ef253586cfa0ed1 100644
--- a/flang/lib/Semantics/check-acc-structure.cpp
+++ b/flang/lib/Semantics/check-acc-structure.cpp
@@ -673,6 +673,10 @@ void AccStructureChecker::Enter(const parser::AccClause::If &x) {
       GetContext().clauseSource, "Must have LOGICAL or INTEGER type"_err_en_US);
 }
 
+void AccStructureChecker::Enter(const parser::OpenACCEndConstruct &x) {
+  context_.Say(x.source, "Misplaced OpenACC end directive"_warn_en_US);
+}
+
 void AccStructureChecker::Enter(const parser::Module &) {
   declareSymbols.clear();
 }
diff --git a/flang/lib/Semantics/check-acc-structure.h b/flang/lib/Semantics/check-acc-structure.h
index 8b87b8ddc502f3f..2a09b7a39395d16 100644
--- a/flang/lib/Semantics/check-acc-structure.h
+++ b/flang/lib/Semantics/check-acc-structure.h
@@ -63,6 +63,7 @@ class AccStructureChecker
   void Enter(const parser::OpenACCCacheConstruct &);
   void Leave(const parser::OpenACCCacheConstruct &);
   void Enter(const parser::AccAtomicUpdate &);
+  void Enter(const parser::OpenACCEndConstruct &);
 
   // Clauses
   void Leave(const parser::AccClauseList &);
diff --git a/flang/test/Semantics/OpenACC/acc-error.f90 b/flang/test/Semantics/OpenACC/acc-error.f90
index b1c3b7784742992..69ee59f97ec6b30 100644
--- a/flang/test/Semantics/OpenACC/acc-error.f90
+++ b/flang/test/Semantics/OpenACC/acc-error.f90
@@ -13,3 +13,17 @@ subroutine test(a, n)
     !ERROR: expected OpenACC directive
     !$acc p
   end subroutine
+
+subroutine test2(a, n)
+  integer :: a(n)
+  integer :: i
+
+  !$acc parallel
+  !$acc loop
+  DO i = 1, n
+  END DO
+  !$acc end parallel
+  !WARN: Misplaced OpenACC end directive
+  !$acc end loop
+
+end subroutine

>From 3e49ce6ea16ec7e1c580ab785992b773a37f270c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Fri, 20 Oct 2023 00:51:12 +0900
Subject: [PATCH 11/76] InlineSpiller: Delete assert that implicit_def has no
 implicit operands (#69087)

It's not a verifier enforced property that implicit_def may only have
one operand. Fixes assertions after the coalescer implicit-defs to
preserve super register liveness to arbitrary instructions.

For some reason I'm unable to reproduce this as a MIR test running only
the allocator for the x86 test. Not sure it's worth keeping around.
---
 llvm/lib/CodeGen/InlineSpiller.cpp            |   3 +-
 ...implicit-def-with-impdef-greedy-assert.mir | 181 ++++++++++++++++++
 ...iller-impdef-on-implicit-def-regression.ll | 178 +++++++++++++++++
 3 files changed, 360 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
 create mode 100644 llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll

diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index c62f3db9d321562..46fcc62e09e8a8c 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -1071,8 +1071,7 @@ void InlineSpiller::insertReload(Register NewVReg,
 static bool isRealSpill(const MachineInstr &Def) {
   if (!Def.isImplicitDef())
     return true;
-  assert(Def.getNumOperands() == 1 &&
-         "Implicit def with more than one definition");
+
   // We can say that the VReg defined by Def is undef, only if it is
   // fully defined by Def. Otherwise, some of the lanes may not be
   // undef and the value of the VReg matters.
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
new file mode 100644
index 000000000000000..c79c951fdc15247
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
@@ -0,0 +1,181 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s
+
+---
+name:            widget
+tracksRegLiveness: true
+jumpTable:
+  kind:            label-difference32
+  entries:
+    - id:              0
+      blocks:          [ '%bb.9', '%bb.5', '%bb.2', '%bb.2', '%bb.2' ]
+body:             |
+  ; CHECK-LABEL: name: widget
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0, $w1, $x2, $x3, $x4, $w5, $w6
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:gpr64common = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:gpr64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:gpr32common = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF4:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF5:%[0-9]+]]:gpr64common = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF6:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   undef [[DEF7:%[0-9]+]].sub_32:gpr64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF8:%[0-9]+]]:gpr64common = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[DEF9:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+  ; CHECK-NEXT:   dead [[DEF10:%[0-9]+]]:gpr64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   undef [[DEF11:%[0-9]+]].sub_32:gpr64 = IMPLICIT_DEF implicit-def dead %11
+  ; CHECK-NEXT:   STRXui [[DEF11]], %stack.0, 0 :: (store (s64) into %stack.0)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x0fbefbf0), %bb.4(0x70410410)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   Bcc 8, %bb.3, implicit killed undef $nzcv
+  ; CHECK-NEXT:   B %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.11(0x00000000), %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead $wzr = SUBSWri [[DEF2]], 64, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   Bcc 0, %bb.11, implicit killed undef $nzcv
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.9(0x01288b01), %bb.5(0x01288b01), %bb.2(0x11f46a91), %bb.6(0x23e8d524), %bb.7(0x47d1aa49)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead early-clobber %12:gpr64, dead early-clobber %13:gpr64sp = JumpTableDest32 [[DEF8]], [[DEF7]], %jump-table.0
+  ; CHECK-NEXT:   BR undef %18:gpr64
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5:
+  ; CHECK-NEXT:   successors: %bb.8(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   B %bb.8
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   STRWui [[DEF9]], [[DEF5]], 0 :: (store (s32))
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   STRWui $wzr, [[DEF]], 0 :: (store (s32))
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = COPY [[DEF4]]
+  ; CHECK-NEXT:   $x1 = COPY [[DEF1]]
+  ; CHECK-NEXT:   BL 0, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = COPY [[DEF6]]
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.8:
+  ; CHECK-NEXT:   successors: %bb.8(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   B %bb.8
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.9:
+  ; CHECK-NEXT:   successors: %bb.10(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[DEF12:%[0-9]+]].sub_32:gpr64 = IMPLICIT_DEF
+  ; CHECK-NEXT:   STRXui [[DEF12]], %stack.0, 0 :: (store (s64) into %stack.0)
+  ; CHECK-NEXT:   TBZW [[DEF3]], 0, %bb.1
+  ; CHECK-NEXT:   B %bb.10
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.10:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 32, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   ADJCALLSTACKUP 32, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   B %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.11:
+  ; CHECK-NEXT:   [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
+  ; CHECK-NEXT:   dead undef [[COPY:%[0-9]+]].sub_32:gpr64 = COPY [[LDRXui]].sub_32
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+  bb.0:
+    liveins: $w0, $w1, $x2, $x3, $x4, $w5, $w6
+
+    %0:gpr64common = IMPLICIT_DEF
+    %1:gpr64 = IMPLICIT_DEF
+    %2:gpr32common = IMPLICIT_DEF
+    %3:gpr32 = IMPLICIT_DEF
+    %4:gpr32 = IMPLICIT_DEF
+    %5:gpr64common = IMPLICIT_DEF
+    %6:gpr32 = IMPLICIT_DEF
+    undef %7.sub_32:gpr64 = IMPLICIT_DEF
+    %8:gpr64common = IMPLICIT_DEF
+    %9:gpr32 = IMPLICIT_DEF
+    %10:gpr64 = IMPLICIT_DEF
+    undef %10.sub_32:gpr64 = IMPLICIT_DEF implicit-def %11:gpr64
+
+  bb.1:
+
+  bb.2:
+    successors: %bb.3(0x0fbefbf0), %bb.4(0x70410410)
+
+    Bcc 8, %bb.3, implicit killed undef $nzcv
+    B %bb.4
+
+  bb.3:
+    successors: %bb.11(0x00000000), %bb.2(0x80000000)
+
+    dead $wzr = SUBSWri %2, 64, 0, implicit-def $nzcv
+    Bcc 0, %bb.11, implicit killed undef $nzcv
+    B %bb.2
+
+  bb.4:
+    successors: %bb.9(0x01288b01), %bb.5(0x01288b01), %bb.2(0x11f46a91), %bb.6(0x23e8d524), %bb.7(0x47d1aa49)
+
+    early-clobber %12:gpr64, dead early-clobber %13:gpr64sp = JumpTableDest32 %8, %7, %jump-table.0
+    BR undef %10
+
+  bb.5:
+    B %bb.8
+
+  bb.6:
+    STRWui %9, %5, 0 :: (store (s32))
+    B %bb.2
+
+  bb.7:
+    STRWui $wzr, %0, 0 :: (store (s32))
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    $w0 = COPY %4
+    $x1 = COPY %1
+    BL 0, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    $w0 = COPY %6
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    B %bb.2
+
+  bb.8:
+    B %bb.8
+
+  bb.9:
+    successors: %bb.10, %bb.1
+
+    undef %10.sub_32:gpr64 = IMPLICIT_DEF
+    TBZW %3, 0, %bb.1
+    B %bb.10
+
+  bb.10:
+    ADJCALLSTACKDOWN 32, 0, implicit-def dead $sp, implicit $sp
+    ADJCALLSTACKUP 32, 0, implicit-def dead $sp, implicit $sp
+    B %bb.1
+
+  bb.11:
+    undef %14.sub_32:gpr64 = COPY %10.sub_32
+    ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+    ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+
+...
diff --git a/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll b/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll
new file mode 100644
index 000000000000000..8b8500ef7248667
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll
@@ -0,0 +1,178 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; Make sure there's no assert on an implicit-def with implicit operands
+; during register allocation.
+
+%struct.BlockContext = type { [32 x i8], [32 x i8], [2 x [32 x i8]], [32 x i8], [32 x i8], [32 x i8], [32 x i8], [32 x i8], [2 x [32 x i8]], [2 x [32 x i8]], [32 x i8], [32 x i8], [32 x i8], [32 x i8], [16 x i8], [32 x i8], [32 x i8] }
+%struct.CdfModeContext = type { [4 x [16 x i16]], [2 x [13 x [16 x i16]]], [9 x [16 x i16]], [5 x [4 x [16 x i16]]], [6 x [16 x i16]], [2 x [16 x i16]], [16 x i16], [2 x [13 x [8 x i16]]], [3 x [13 x [8 x i16]]], [8 x i16], [8 x [8 x i16]], [8 x i16], [8 x [8 x i16]], [3 x [8 x i16]], [2 x [7 x [8 x i16]]], [2 x [7 x [5 x [8 x i16]]]], [2 x [8 x [4 x i16]]], [4 x [3 x [4 x i16]]], [22 x [4 x i16]], [4 x i16], [5 x [4 x i16]], [4 x [4 x i16]], [4 x i16], [2 x i16], [2 x i16], [7 x [2 x i16]], [7 x [2 x i16]], [4 x [2 x i16]], [22 x [2 x i16]], [6 x [2 x i16]], [2 x [2 x i16]], [6 x [2 x i16]], [3 x [2 x i16]], [4 x [2 x i16]], [5 x [2 x i16]], [5 x [2 x i16]], [6 x [2 x i16]], [6 x [2 x i16]], [9 x [2 x i16]], [6 x [3 x [2 x i16]]], [3 x [3 x [2 x i16]]], [2 x [3 x [2 x i16]]], [3 x [3 x [2 x i16]]], [7 x [3 x [2 x i16]]], [3 x [2 x i16]], [3 x [2 x i16]], [3 x [2 x i16]], [22 x [2 x i16]], [7 x [3 x [2 x i16]]], [2 x [2 x i16]], [2 x i16], [8 x i8] }
+
+define i32 @decode_sb(ptr %t, i32 %bl, i32 %_msprop1966, i32 %sub.i, i64 %idxprom, i1 %cmp54) #0 {
+; CHECK-LABEL: decode_sb:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    movl %r9d, %ebx
+; CHECK-NEXT:    movabsq $87960930222080, %r15 # imm = 0x500000000000
+; CHECK-NEXT:    movl 0, %r13d
+; CHECK-NEXT:    movl %esi, %r12d
+; CHECK-NEXT:    # implicit-def: $eax
+; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    testb $1, %bl
+; CHECK-NEXT:    jne .LBB0_7
+; CHECK-NEXT:  # %bb.1: # %if.else
+; CHECK-NEXT:    movq %r8, %r14
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movzbl 544(%rax), %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    movl %r15d, %r9d
+; CHECK-NEXT:    andl $1, %r9d
+; CHECK-NEXT:    movl %r14d, %r10d
+; CHECK-NEXT:    andl $1, %r10d
+; CHECK-NEXT:    movl %esi, %r11d
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, %r11d
+; CHECK-NEXT:    movabsq $17592186044416, %r8 # imm = 0x100000000000
+; CHECK-NEXT:    orq %r10, %r8
+; CHECK-NEXT:    andl $2, %r11d
+; CHECK-NEXT:    testb $1, %bl
+; CHECK-NEXT:    cmoveq %r9, %r8
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    orq %rax, %rcx
+; CHECK-NEXT:    movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    orq $1, %r13
+; CHECK-NEXT:    orl %esi, %r11d
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:    je .LBB0_3
+; CHECK-NEXT:  # %bb.2: # %if.else
+; CHECK-NEXT:    movl (%r8), %edx
+; CHECK-NEXT:  .LBB0_3: # %if.else
+; CHECK-NEXT:    shlq $5, %rcx
+; CHECK-NEXT:    movq %r12, %rsi
+; CHECK-NEXT:    shlq $7, %rsi
+; CHECK-NEXT:    addq %rcx, %rsi
+; CHECK-NEXT:    addq $1248, %rsi # imm = 0x4E0
+; CHECK-NEXT:    movq %r13, 0
+; CHECK-NEXT:    movq %rdi, %r15
+; CHECK-NEXT:    movl %edx, (%rdi)
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    callq *%rax
+; CHECK-NEXT:    xorq $1, %r14
+; CHECK-NEXT:    cmpl $0, (%r14)
+; CHECK-NEXT:    je .LBB0_6
+; CHECK-NEXT:  # %bb.4: # %if.else
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    je .LBB0_5
+; CHECK-NEXT:  .LBB0_6: # %bb19
+; CHECK-NEXT:    testb $1, %bl
+; CHECK-NEXT:    movq %r15, %rdi
+; CHECK-NEXT:    movabsq $87960930222080, %r15 # imm = 0x500000000000
+; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; CHECK-NEXT:    jne .LBB0_8
+; CHECK-NEXT:  .LBB0_7: # %if.end69
+; CHECK-NEXT:    movl %r13d, 0
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %r8d, %r8d
+; CHECK-NEXT:    callq *%rax
+; CHECK-NEXT:    xorq %r15, %r12
+; CHECK-NEXT:    movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload
+; CHECK-NEXT:    movzbl (%r12), %ecx
+; CHECK-NEXT:    movb %cl, 544(%rax)
+; CHECK-NEXT:  .LBB0_8: # %land.lhs.true56
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_5: # %bb
+entry:
+  %i = load i32, ptr null, align 8
+  br i1 %cmp54, label %if.end69, label %if.else
+
+if.else:                                          ; preds = %entry
+  %shr18 = and i32 %sub.i, 1
+  %idxprom.i = zext i32 %shr18 to i64
+  %arrayidx.i = getelementptr %struct.BlockContext, ptr null, i64 0, i32 14, i64 %idxprom.i
+  %i1 = load i8, ptr %arrayidx.i, align 1
+  %conv.i = zext i8 %i1 to i32
+  %and.i = and i32 %conv.i, 1
+  %i2 = and i64 87960930222080, 1
+  %i3 = inttoptr i64 %i2 to ptr
+  %i4 = load i32, ptr %i3, align 4
+  %i5 = and i64 %idxprom, 1
+  %i6 = or i64 %i5, 17592186044416
+  %i7 = inttoptr i64 %i6 to ptr
+  %i8 = load i32, ptr %i7, align 4
+  %i9 = lshr i32 %bl, %sub.i
+  %i10 = and i32 %i9, 2
+  %i11 = or i32 %bl, %i10
+  %i12 = select i1 %cmp54, i32 %i8, i32 %i4
+  %add.i = or i32 %_msprop1966, %and.i
+  %idxprom4 = zext i32 %bl to i64
+  %idxprom24 = zext i32 %add.i to i64
+  %i13 = or i32 %i, 1
+  %i14 = zext i32 %i13 to i64
+  %.not2329 = icmp eq i32 %i11, 0
+  %i15 = select i1 %.not2329, i32 1, i32 %i12
+  %arrayidx25 = getelementptr %struct.CdfModeContext, ptr null, i64 0, i32 3, i64 %idxprom4, i64 %idxprom24
+  store i64 %i14, ptr null, align 8
+  store i32 %i15, ptr %t, align 4
+  %call53 = tail call i32 null(ptr null, ptr %arrayidx25, i64 0)
+  %i16 = xor i64 %idxprom, 1
+  %i17 = inttoptr i64 %i16 to ptr
+  %_msld1992 = load i32, ptr %i17, align 8
+  %i18 = icmp ne i32 %_msld1992, 0
+  %_msprop_icmp1993 = and i1 %i18, false
+  br i1 %_msprop_icmp1993, label %bb, label %bb19
+
+bb:                                               ; preds = %if.else
+  unreachable
+
+bb19:                                             ; preds = %if.else
+  br i1 %cmp54, label %land.lhs.true56, label %if.end69
+
+land.lhs.true56:                                  ; preds = %bb19
+  ret i32 0
+
+if.end69:                                         ; preds = %bb19, %entry
+  %bx8.011941201 = phi i32 [ %shr18, %bb19 ], [ undef, %entry ]
+  store i32 %i, ptr null, align 8
+  %call79 = tail call fastcc i32 null(ptr %t, i32 0, i32 0, i32 0, i32 0)
+  %idxprom666 = zext i32 %bl to i64
+  %i20 = xor i64 %idxprom666, 87960930222080
+  %idxprom675 = sext i32 %bx8.011941201 to i64
+  %arrayidx676 = getelementptr %struct.BlockContext, ptr null, i64 0, i32 14, i64 %idxprom675
+  %i21 = inttoptr i64 %i20 to ptr
+  %_msld1414 = load i8, ptr %i21, align 1
+  store i8 %_msld1414, ptr %arrayidx676, align 1
+  ret i32 0
+}
+
+attributes #0 = { "frame-pointer"="all" "target-cpu"="x86-64" }

>From e880e8aedbc17ba04c969c9426d1f2567af72e7b Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Thu, 19 Oct 2023 11:56:56 -0400
Subject: [PATCH 12/76] Let clang-cl support CUDA/HIP (#68921)

clang-cl is a driver mode that accepts options of MSVC cl.exe as a
drop-in replacement for cl.exe. Currently clang-cl accepts mixed clang
style options and cl style options. To let clang-cl accept a clang-style
option, just need to add visibility CLOption to that option.

Currently nvcc can pass cl style options to cl.exe, which allows nvcc to
compile C++ and CUDA programs with mixed nvcc and cl style options. On
the other hand, clang cannot use mixed clang and cl style options to
compile CUDA/HIP programs.

This patch add visibility CLOption to options needed to compile CUDA/HIP
programs. This allows clang-cl to compile CUDA/HIP programs with mixed
clang and cl style options.
---
 clang/include/clang/Driver/Options.td | 47 +++++++++++++++++----------
 clang/lib/Driver/Driver.cpp           |  5 ++-
 clang/test/Driver/cl-offload.cu       | 27 +++++++++++++++
 3 files changed, 61 insertions(+), 18 deletions(-)
 create mode 100644 clang/test/Driver/cl-offload.cu

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 95849fef787ed67..e63158fb0e5333a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -148,7 +148,8 @@ def pedantic_Group : OptionGroup<"<pedantic group>">, Group<f_Group>,
                      DocFlatten;
 
 def offload_Group : OptionGroup<"<offload group>">, Group<f_Group>,
-                   DocName<"Common Offloading options">;
+                   DocName<"Common Offloading options">,
+                   Visibility<[ClangOption, CLOption]>;
 
 def opencl_Group : OptionGroup<"<opencl group>">, Group<f_Group>,
                    DocName<"OpenCL options">;
@@ -157,13 +158,16 @@ def sycl_Group : OptionGroup<"<SYCL group>">, Group<f_Group>,
                  DocName<"SYCL options">;
 
 def cuda_Group : OptionGroup<"<CUDA group>">, Group<f_Group>,
-                   DocName<"CUDA options">;
+                   DocName<"CUDA options">,
+                   Visibility<[ClangOption, CLOption]>;
 
 def hip_Group : OptionGroup<"<HIP group>">, Group<f_Group>,
-                   DocName<"HIP options">;
+                   DocName<"HIP options">,
+                   Visibility<[ClangOption, CLOption]>;
 
 def m_Group : OptionGroup<"<m group>">, Group<CompileOnly_Group>,
-              DocName<"Target-dependent compilation options">;
+              DocName<"Target-dependent compilation options">,
+              Visibility<[ClangOption, CLOption]>;
 
 // Feature groups - these take command line options that correspond directly to
 // target specific features and can be translated directly from command line
@@ -5167,14 +5171,16 @@ def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules"
 def prebind : Flag<["-"], "prebind">;
 def preload : Flag<["-"], "preload">;
 def print_file_name_EQ : Joined<["-", "--"], "print-file-name=">,
-  HelpText<"Print the full library path of <file>">, MetaVarName<"<file>">;
+  HelpText<"Print the full library path of <file>">, MetaVarName<"<file>">,
+  Visibility<[ClangOption, CLOption]>;
 def print_ivar_layout : Flag<["-"], "print-ivar-layout">,
   Visibility<[ClangOption, CC1Option]>,
   HelpText<"Enable Objective-C Ivar layout bitmap print trace">,
   MarshallingInfoFlag<LangOpts<"ObjCGCBitmapPrint">>;
 def print_libgcc_file_name : Flag<["-", "--"], "print-libgcc-file-name">,
   HelpText<"Print the library path for the currently used compiler runtime "
-           "library (\"libgcc.a\" or \"libclang_rt.builtins.*.a\")">;
+           "library (\"libgcc.a\" or \"libclang_rt.builtins.*.a\")">,
+  Visibility<[ClangOption, CLOption]>;
 def print_multi_directory : Flag<["-", "--"], "print-multi-directory">;
 def print_multi_lib : Flag<["-", "--"], "print-multi-lib">;
 def print_multi_flags : Flag<["-", "--"], "print-multi-flags-experimental">,
@@ -5183,27 +5189,34 @@ def print_multi_os_directory : Flag<["-", "--"], "print-multi-os-directory">,
   Flags<[Unsupported]>;
 def print_target_triple : Flag<["-", "--"], "print-target-triple">,
   HelpText<"Print the normalized target triple">,
-  Visibility<[ClangOption, FlangOption]>;
+  Visibility<[ClangOption, FlangOption, CLOption]>;
 def print_effective_triple : Flag<["-", "--"], "print-effective-triple">,
   HelpText<"Print the effective target triple">,
-  Visibility<[ClangOption, FlangOption]>;
+  Visibility<[ClangOption, FlangOption, CLOption]>;
 // GCC --disable-multiarch, GCC --enable-multiarch (upstream and Debian
 // specific) have different behaviors. We choose not to support the option.
 def : Flag<["-", "--"], "print-multiarch">, Flags<[Unsupported]>;
 def print_prog_name_EQ : Joined<["-", "--"], "print-prog-name=">,
-  HelpText<"Print the full program path of <name>">, MetaVarName<"<name>">;
+  HelpText<"Print the full program path of <name>">, MetaVarName<"<name>">,
+  Visibility<[ClangOption, CLOption]>;
 def print_resource_dir : Flag<["-", "--"], "print-resource-dir">,
-  HelpText<"Print the resource directory pathname">;
+  HelpText<"Print the resource directory pathname">,
+  Visibility<[ClangOption, CLOption]>;
 def print_search_dirs : Flag<["-", "--"], "print-search-dirs">,
-  HelpText<"Print the paths used for finding libraries and programs">;
+  HelpText<"Print the paths used for finding libraries and programs">,
+  Visibility<[ClangOption, CLOption]>;
 def print_targets : Flag<["-", "--"], "print-targets">,
-  HelpText<"Print the registered targets">;
+  HelpText<"Print the registered targets">,
+  Visibility<[ClangOption, CLOption]>;
 def print_rocm_search_dirs : Flag<["-", "--"], "print-rocm-search-dirs">,
-  HelpText<"Print the paths used for finding ROCm installation">;
+  HelpText<"Print the paths used for finding ROCm installation">,
+  Visibility<[ClangOption, CLOption]>;
 def print_runtime_dir : Flag<["-", "--"], "print-runtime-dir">,
-  HelpText<"Print the directory pathname containing clangs runtime libraries">;
+  HelpText<"Print the directory pathname containing clangs runtime libraries">,
+  Visibility<[ClangOption, CLOption]>;
 def print_diagnostic_options : Flag<["-", "--"], "print-diagnostic-options">,
-  HelpText<"Print all of Clang's warning options">;
+  HelpText<"Print all of Clang's warning options">,
+  Visibility<[ClangOption, CLOption]>;
 def private__bundle : Flag<["-"], "private_bundle">;
 def pthreads : Flag<["-"], "pthreads">;
 defm pthread : BoolOption<"", "pthread",
@@ -5230,7 +5243,7 @@ def resource_dir_EQ : Joined<["-"], "resource-dir=">, Flags<[NoXarchOption]>,
   Visibility<[ClangOption, CLOption, DXCOption]>,
   Alias<resource_dir>;
 def rpath : Separate<["-"], "rpath">, Flags<[LinkerInput]>, Group<Link_Group>;
-def rtlib_EQ : Joined<["-", "--"], "rtlib=">,
+def rtlib_EQ : Joined<["-", "--"], "rtlib=">, Visibility<[ClangOption, CLOption]>,
   HelpText<"Compiler runtime library to use">;
 def frtlib_add_rpath: Flag<["-"], "frtlib-add-rpath">, Flags<[NoArgumentUnused]>,
   HelpText<"Add -rpath with architecture-specific resource directory to the linker flags. "
@@ -5396,7 +5409,7 @@ def w : Flag<["-"], "w">, HelpText<"Suppress all warnings">,
   MarshallingInfoFlag<DiagnosticOpts<"IgnoreWarnings">>;
 def x : JoinedOrSeparate<["-"], "x">,
 Flags<[NoXarchOption]>,
-  Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
+  Visibility<[ClangOption, CC1Option, FlangOption, FC1Option, CLOption]>,
   HelpText<"Treat subsequent input files as having type <language>">,
   MetaVarName<"<language>">;
 def y : Joined<["-"], "y">;
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 77328e1f99e5021..f5fd900a6447fe3 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2589,8 +2589,11 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
       Diag(clang::diag::note_drv_t_option_is_global);
   }
 
+  // CUDA/HIP and their preprocessor expansions can be accepted by CL mode.
   // Warn -x after last input file has no effect
-  if (!IsCLMode()) {
+  auto LastXArg = Args.getLastArgValue(options::OPT_x);
+  const llvm::StringSet<> ValidXArgs = {"cuda", "hip", "cui", "hipi"};
+  if (!IsCLMode() || ValidXArgs.find(LastXArg) != ValidXArgs.end()) {
     Arg *LastXArg = Args.getLastArgNoClaim(options::OPT_x);
     Arg *LastInputArg = Args.getLastArgNoClaim(options::OPT_INPUT);
     if (LastXArg && LastInputArg &&
diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu
new file mode 100644
index 000000000000000..aa5c096338110b0
--- /dev/null
+++ b/clang/test/Driver/cl-offload.cu
@@ -0,0 +1,27 @@
+// RUN: %clang_cl -### --offload-arch=sm_35 -fgpu-rdc \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN:   /Wall -x cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=CUDA
+
+// RUN: %clang_cl -### --offload-arch=gfx1010 -fgpu-rdc --hip-link \
+// RUN:   --rocm-path=%S/Inputs/rocm /Wall -x hip %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=HIP
+
+// CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-pc-windows-msvc"
+// CUDA-SAME: "-Weverything"
+// CUDA: ptxas
+// CUDA: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "nvptx64-nvidia-cuda"
+// CUDA-SAME: "-Weverything"
+// CUDA: link
+
+// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
+// HIP-SAME: "-Weverything"
+// HIP: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-pc-windows-msvc"
+// HIP-SAME: "-Weverything"
+// HIP: {{lld.* "-flavor" "gnu" "-m" "elf64_amdgpu"}}
+// HIP: {{link.* "amdhip64.lib"}}
+
+// CMake uses this option when finding packages for HIP, so
+// make sure it does not cause error.
+
+// RUN: %clang_cl --print-libgcc-file-name

>From 03d1c99d99d7918115b993f14dcb6fc39cf09f72 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 19 Oct 2023 17:00:02 +0100
Subject: [PATCH 13/76] [mlir][ODS] Add `OptionalTypesMatchWith` and remove a
 custom assemblyFormat (#68876)

This is just a slight specialization of `TypesMatchWith` that returns
success if an optional parameter is missing.

There may be other places this could help e.g.:

https://github.com/llvm/llvm-project/blob/eb21049b4b904b072679ece60e73c6b0dc0d1ebf/mlir/include/mlir/Dialect/X86Vector/X86Vector.td#L58-L59
...but I'm leaving those to avoid some churn.

This constraint will be handy for us in some later patches, it's a
formalization of a short circuiting trick with the `comparator` of the
`TypesMatchWith` constraint (devised for #69195).

```
TypesMatchWith<
  "padding type matches element type of result (if present)",
  "result", "padding",
  "::llvm::cast<VectorType>($_self).getElementType()",
  // This returns true if no padding is present, or it's present with a type that matches the element type of `result`.
  "!getPadding() || std::equal_to<>()">
```

This is a little non-obvious, so after this patch you can instead do:
```
OptionalTypesMatchWith<
  "padding type matches element type of result (if present)",
  "result", "padding",
  "::llvm::cast<VectorType>($_self).getElementType()">
```
---
 .../mlir/Dialect/Vector/IR/VectorOps.td       |  7 ++--
 mlir/include/mlir/IR/OpBase.td                |  8 ++++
 mlir/include/mlir/IR/Utils.td                 | 24 +++++++++++
 mlir/lib/Dialect/Vector/IR/VectorOps.cpp      | 41 -------------------
 mlir/test/Dialect/Vector/invalid.mlir         |  2 +-
 mlir/test/mlir-tblgen/utils.td                | 23 +++++++++++
 6 files changed, 60 insertions(+), 45 deletions(-)
 create mode 100644 mlir/test/mlir-tblgen/utils.td

diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index 2df2fe4c5ce8e9c..917b27a40f26f13 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -215,6 +215,8 @@ def Vector_ReductionOp :
   Vector_Op<"reduction", [Pure,
      PredOpTrait<"source operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>,
+     OptionalTypesMatchWith<"dest and acc have the same type",
+                            "dest", "acc", "::llvm::cast<Type>($_self)">,
      DeclareOpInterfaceMethods<ArithFastMathInterface>,
      DeclareOpInterfaceMethods<MaskableOpInterface>,
      DeclareOpInterfaceMethods<VectorUnrollOpInterface, ["getShapeForUnroll"]>
@@ -263,9 +265,8 @@ def Vector_ReductionOp :
                          "::mlir::arith::FastMathFlags::none">:$fastMathFlags)>
   ];
 
-  // TODO: Migrate to assemblyFormat once `AllTypesMatch` supports optional
-  // operands.
-  let hasCustomAssemblyFormat = 1;
+  let assemblyFormat = "$kind `,` $vector (`,` $acc^)? (`fastmath` `` $fastmath^)?"
+                       " attr-dict `:` type($vector) `into` type($dest)";
   let hasCanonicalizer = 1;
   let hasVerifier = 1;
 }
diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index 236dd74839dfb04..7866ac24c1ccbad 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -568,6 +568,14 @@ class TypesMatchWith<string summary, string lhsArg, string rhsArg,
   string transformer = transform;
 }
 
+// The same as TypesMatchWith but if either `lhsArg` or `rhsArg` are optional
+// and not present returns success.
+class OptionalTypesMatchWith<string summary, string lhsArg, string rhsArg,
+                     string transform, string comparator = "std::equal_to<>()">
+  : TypesMatchWith<summary, lhsArg, rhsArg, transform,
+     "!get" # snakeCaseToCamelCase<lhsArg>.ret # "()"
+     # " || !get" # snakeCaseToCamelCase<rhsArg>.ret # "() || " # comparator>;
+
 // Special variant of `TypesMatchWith` that provides a comparator suitable for
 // ranged arguments.
 class RangedTypesMatchWith<string summary, string lhsArg, string rhsArg,
diff --git a/mlir/include/mlir/IR/Utils.td b/mlir/include/mlir/IR/Utils.td
index 651706099d9b1ce..e75b3ae7a50d171 100644
--- a/mlir/include/mlir/IR/Utils.td
+++ b/mlir/include/mlir/IR/Utils.td
@@ -66,4 +66,28 @@ class CArg<string ty, string value = ""> {
   string defaultValue = value;
 }
 
+// Helper which makes the first letter of a string uppercase.
+// e.g. cat -> Cat
+class firstCharToUpper<string str>
+{
+  string ret = !if(!gt(!size(str), 0),
+    !toupper(!substr(str, 0, 1)) # !substr(str, 1),
+    "");
+}
+
+class _snakeCaseHelper<string str> {
+  int idx = !find(str, "_");
+  string ret = !if(!ge(idx, 0),
+    !substr(str, 0, idx) # firstCharToUpper<!substr(str, !add(idx, 1))>.ret,
+    str);
+}
+
+// Converts a snake_case string to CamelCase.
+// TODO: Replace with a !tocamelcase bang operator.
+class snakeCaseToCamelCase<string str>
+{
+  string ret = !foldl(firstCharToUpper<str>.ret,
+    !range(0, !size(str)), acc, idx, _snakeCaseHelper<acc>.ret);
+}
+
 #endif // UTILS_TD
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index 68a5cf209f2fb49..9e7de1d1e11f782 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -524,47 +524,6 @@ LogicalResult ReductionOp::verify() {
   return success();
 }
 
-ParseResult ReductionOp::parse(OpAsmParser &parser, OperationState &result) {
-  SmallVector<OpAsmParser::UnresolvedOperand, 2> operandsInfo;
-  Type redType;
-  Type resType;
-  CombiningKindAttr kindAttr;
-  arith::FastMathFlagsAttr fastMathAttr;
-  if (parser.parseCustomAttributeWithFallback(kindAttr, Type{}, "kind",
-                                              result.attributes) ||
-      parser.parseComma() || parser.parseOperandList(operandsInfo) ||
-      (succeeded(parser.parseOptionalKeyword("fastmath")) &&
-       parser.parseCustomAttributeWithFallback(fastMathAttr, Type{}, "fastmath",
-                                               result.attributes)) ||
-      parser.parseColonType(redType) ||
-      parser.parseKeywordType("into", resType) ||
-      (!operandsInfo.empty() &&
-       parser.resolveOperand(operandsInfo[0], redType, result.operands)) ||
-      (operandsInfo.size() > 1 &&
-       parser.resolveOperand(operandsInfo[1], resType, result.operands)) ||
-      parser.addTypeToList(resType, result.types))
-    return failure();
-  if (operandsInfo.empty() || operandsInfo.size() > 2)
-    return parser.emitError(parser.getNameLoc(),
-                            "unsupported number of operands");
-  return success();
-}
-
-void ReductionOp::print(OpAsmPrinter &p) {
-  p << " ";
-  getKindAttr().print(p);
-  p << ", " << getVector();
-  if (getAcc())
-    p << ", " << getAcc();
-
-  if (getFastmathAttr() &&
-      getFastmathAttr().getValue() != arith::FastMathFlags::none) {
-    p << ' ' << getFastmathAttrName().getValue();
-    p.printStrippedAttrOrType(getFastmathAttr());
-  }
-  p << " : " << getVector().getType() << " into " << getDest().getType();
-}
-
 // MaskableOpInterface methods.
 
 /// Returns the mask type expected by this operation.
diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir
index 5967a8d69bbfcc0..504ac89659fdb73 100644
--- a/mlir/test/Dialect/Vector/invalid.mlir
+++ b/mlir/test/Dialect/Vector/invalid.mlir
@@ -1169,7 +1169,7 @@ func.func @reduce_unsupported_attr(%arg0: vector<16xf32>) -> i32 {
 // -----
 
 func.func @reduce_unsupported_third_argument(%arg0: vector<16xf32>, %arg1: f32) -> f32 {
-  // expected-error at +1 {{'vector.reduction' unsupported number of operands}}
+  // expected-error at +1 {{expected ':'}}
   %0 = vector.reduction <add>, %arg0, %arg1, %arg1 : vector<16xf32> into f32
 }
 
diff --git a/mlir/test/mlir-tblgen/utils.td b/mlir/test/mlir-tblgen/utils.td
new file mode 100644
index 000000000000000..28e0fecb2881bdd
--- /dev/null
+++ b/mlir/test/mlir-tblgen/utils.td
@@ -0,0 +1,23 @@
+// RUN: mlir-tblgen -I %S/../../include %s | FileCheck %s
+
+include "mlir/IR/Utils.td"
+
+// CHECK-DAG: string value = "CamelCaseTest"
+class already_camel_case {
+  string value = snakeCaseToCamelCase<"CamelCaseTest">.ret;
+}
+
+// CHECK-DAG: string value = "Foo"
+class single_word {
+  string value = snakeCaseToCamelCase<"foo">.ret;
+}
+
+// CHECK-DAG: string value = "ThisIsATest"
+class snake_case {
+  string value = snakeCaseToCamelCase<"this_is_a_test">.ret;
+}
+
+// CHECK-DAG: string value = "ThisIsATestAgain"
+class extra_underscores {
+  string value = snakeCaseToCamelCase<"__this__is_a_test__again__">.ret;
+}

>From 9f93a99a096c093b5c205cf9143d88bbbbba1b53 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Thu, 19 Oct 2023 16:02:28 +0000
Subject: [PATCH 14/76] [Clang][SVE2.1] Add builtins for 2-way svdot (vectors,
 indexed)

As described in: https://github.com/ARM-software/acle/pull/257

Patch by: David Sherwood <david.sherwood at arm.com>

Reviewed By: dtemirbulatov

Differential Revision: https://reviews.llvm.org/D151439
---
 clang/include/clang/Basic/arm_sve.td          |   7 ++
 .../acle_sve2p1_dot.c                         | 107 ++++++++++++++++++
 .../acle_sve2p1_imm.cpp                       |   9 ++
 3 files changed, 123 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 8684b4ff1b6053e..8750e75f2a777a6 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1952,6 +1952,13 @@ def SVSTNT1B_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "cUc", [IsStructS
 def SVSTNT1H_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
 def SVSTNT1W_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "iUif", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
 def SVSTNT1D_VNUM_X4 : MInst<"svstnt1_vnum[_{2}_x4]", "v}pl4", "lUld", [IsStructStore], MemEltTyDefault, "aarch64_sve_stnt1_pn_x4">;
+
+def SVDOT_X2_S : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "i",  MergeNone, "aarch64_sve_sdot_x2", [], []>;
+def SVDOT_X2_U : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [], []>;
+def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f",  MergeNone, "aarch64_sve_fdot_x2", [], []>;
+def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
+def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
+def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 }
 
 let TargetGuard = "sve2p1" in {
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c
new file mode 100644
index 000000000000000..d50be9ae177d790
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_dot.c
@@ -0,0 +1,107 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: @test_svdot_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svdot_s32_x2u11__SVInt32_tu11__SVInt16_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svdot_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
+{
+  return SVE_ACLE_FUNC(svdot,_s32_s16_s16,)(op1, op2, op3);
+}
+
+// CHECK-LABEL: @test_svdot_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svdot_u32_x2u12__SVUint32_tu12__SVUint16_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svdot_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
+{
+  return SVE_ACLE_FUNC(svdot,_u32_u16_u16,)(op1, op2, op3);
+}
+
+// CHECK-LABEL: @test_svdot_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svdot_f32_x2u13__SVFloat32_tu13__SVFloat16_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svdot_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  return SVE_ACLE_FUNC(svdot,_f32_f16_f16,)(op1, op2, op3);
+}
+
+
+
+// CHECK-LABEL: @test_svdot_lane_s32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svdot_lane_s32_x2u11__SVInt32_tu11__SVInt16_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svdot_lane_s32_x2(svint32_t op1, svint16_t op2, svint16_t op3)
+{
+  return SVE_ACLE_FUNC(svdot_lane,_s32_s16_s16,)(op1, op2, op3, 3);
+}
+
+// CHECK-LABEL: @test_svdot_lane_u32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svdot_lane_u32_x2u12__SVUint32_tu12__SVUint16_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.x2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svdot_lane_u32_x2(svuint32_t op1, svuint16_t op2, svuint16_t op3)
+{
+  return SVE_ACLE_FUNC(svdot_lane,_u32_u16_u16,)(op1, op2, op3, 3);
+}
+
+// CHECK-LABEL: @test_svdot_lane_f32_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.lane.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 3)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z22test_svdot_lane_f32_x2u13__SVFloat32_tu13__SVFloat16_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdot.lane.x2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 3)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svdot_lane_f32_x2(svfloat32_t op1, svfloat16_t op2, svfloat16_t op3)
+{
+  return SVE_ACLE_FUNC(svdot_lane,_f32_f16_f16,)(op1, op2, op3, 3);
+}
diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
index c889f34bbd0334d..c4e087c8b7d79ea 100644
--- a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
+++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sve2p1_imm.cpp
@@ -107,3 +107,12 @@ void test_cntp(svcount_t c) {
   svcntp_c14(c, 3); // expected-error {{argument should be a multiple of 2}}
 }
 
+void test_svdot_lane_2way(svint32_t s32, svuint32_t u32, svint16_t s16, svuint16_t u16,
+                          svfloat32_t f32, svfloat16_t f16) {
+  svdot_lane_s32_s16_s16(s32, s16, s16, 1); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_u32_u16_u16(u32, u16, u16, 1); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_f32_f16_f16(f32, f16, f16, 1); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_s32_s16_s16(s32, s16, s16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_u32_u16_u16(u32, u16, u16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+  svdot_lane_f32_f16_f16(f32, f16, f16, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+}

>From 553616a213b8f213073ba5874186b2667696e07b Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev at outlook.com>
Date: Thu, 19 Oct 2023 09:08:46 -0700
Subject: [PATCH 15/76] [SLP][NFC]Add avx2 test run, NFC.

---
 llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll
index cf3d40df15dadd6..21aac98aa3ece62 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-107 | FileCheck %s
+; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-107 -mattr=+avx2 | FileCheck %s
 
 define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) {
 ; CHECK-LABEL: @test(

>From fdac18c2e23aaf769b7d5a2fa2b6135311fad69f Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston.dang at gmail.com>
Date: Thu, 19 Oct 2023 09:14:40 -0700
Subject: [PATCH 16/76] [hwasan] Fix rare false negative (zero tag) in two more
 test cases (#69491)

stack-uas.c and stack-history-length.c both have
-hwasan-record-stack-history=libcall, which makes the stack base
tag fully randomized. They may therefore sometimes have a zero tag
for a stack allocated variable, resulting in a false negative

(https://github.com/llvm/llvm-project/issues/69221#issuecomment-1767322411).

This patch applies the same workaround as used for deep-recursion.c

(https://github.com/llvm/llvm-project/commit/aa4dfd3736dd1c2e0263eacd09bd613c5784ea73)
and stack-uar.c

(https://github.com/llvm/llvm-project/commit/ddf1de20a3f7db3bca1ef6ba7e6cbb90aac5fd2d):
have two adjacent stack-allocated variables, and use whichever is not
zero-tagged.

These are the last remaining test cases that use
-hwasan-record-stack-history=libcall.

stack-uas flakiness spotted in the wild:
https://lab.llvm.org/buildbot/#/builders/269/builds/549/steps/11/logs/stdio
stack-history-length:
https://lab.llvm.org/buildbot/#/builders/269/builds/537

Co-authored-by: Thurston Dang <thurston at google.com>
---
 .../hwasan/TestCases/stack-history-length.c   | 21 +++++++++++++++++-
 compiler-rt/test/hwasan/TestCases/stack-uas.c | 22 ++++++++++++++++---
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/test/hwasan/TestCases/stack-history-length.c b/compiler-rt/test/hwasan/TestCases/stack-history-length.c
index d997677b4e90b8c..6b1274892c07485 100644
--- a/compiler-rt/test/hwasan/TestCases/stack-history-length.c
+++ b/compiler-rt/test/hwasan/TestCases/stack-history-length.c
@@ -11,6 +11,8 @@
 // Stack histories are currently not recorded on x86.
 // XFAIL: target=x86_64{{.*}}
 
+#include <assert.h>
+#include <sanitizer/hwasan_interface.h>
 #include <stdlib.h>
 
 void USE(void *x) { // pretend_to_do_something(void *x)
@@ -20,7 +22,24 @@ void USE(void *x) { // pretend_to_do_something(void *x)
 volatile int four = 4;
 __attribute__((noinline)) void FUNC0() { int x[4]; USE(&x[0]); }
 __attribute__((noinline)) void FUNC() { int x[4]; USE(&x[0]); }
-__attribute__((noinline)) void OOB() { int x[4]; x[four] = 0; USE(&x[0]); }
+__attribute__((noinline)) void OOB() {
+  int x[4];
+  int y[4];
+  // With -hwasan-generate-tags-with-calls=false, stack tags can occasionally
+  // be zero, leading to a false negative
+  // (https://github.com/llvm/llvm-project/issues/69221). Work around it by
+  // using the neighboring variable, which is guaranteed by
+  // -hwasan-generate-tags-with-calls=false to have a different (hence
+  // non-zero) tag.
+  if (__hwasan_tag_pointer(x, 0) == x) {
+    assert(__hwasan_tag_pointer(y, 0) != y);
+    y[four] = 0;
+  } else {
+    x[four] = 0;
+  }
+  USE(&x[0]);
+  USE(&y[0]);
+}
 
 int main(int argc, char **argv) {
   int X = argc == 2 ? atoi(argv[1]) : 10;
diff --git a/compiler-rt/test/hwasan/TestCases/stack-uas.c b/compiler-rt/test/hwasan/TestCases/stack-uas.c
index 53d51ee25dca32e..4455e5910074750 100644
--- a/compiler-rt/test/hwasan/TestCases/stack-uas.c
+++ b/compiler-rt/test/hwasan/TestCases/stack-uas.c
@@ -13,6 +13,10 @@
 // Stack histories currently are not recorded on x86.
 // XFAIL: target=x86_64{{.*}}
 
+#include <assert.h>
+#include <sanitizer/hwasan_interface.h>
+#include <stdio.h>
+
 void USE(void *x) { // pretend_to_do_something(void *x)
   __asm__ __volatile__(""
                        :
@@ -36,8 +40,20 @@ __attribute__((noinline)) void Unrelated3() {
 __attribute__((noinline)) char buggy() {
   char *volatile p;
   {
-    char zzz[0x1000] = {};
-    p = zzz;
+    char zzz[0x800] = {};
+    char yyy[0x800] = {};
+    // With -hwasan-generate-tags-with-calls=false, stack tags can occasionally
+    // be zero, leading to a false negative
+    // (https://github.com/llvm/llvm-project/issues/69221). Work around it by
+    // using the neighboring variable, which is guaranteed by
+    // -hwasan-generate-tags-with-calls=false to have a different (hence
+    // non-zero) tag.
+    if (__hwasan_tag_pointer(zzz, 0) == zzz) {
+      assert(__hwasan_tag_pointer(yyy, 0) != yyy);
+      p = yyy;
+    } else {
+      p = zzz;
+    }
   }
   return *p;
 }
@@ -53,7 +69,7 @@ int main() {
   // CHECK: Cause: stack tag-mismatch
   // CHECK: is located in stack of thread
   // CHECK: Potentially referenced stack objects:
-  // CHECK-NEXT: zzz in buggy {{.*}}stack-uas.c:[[@LINE-17]]
+  // CHECK-NEXT: {{zzz|yyy}} in buggy {{.*}}stack-uas.c:
   // CHECK-NEXT: Memory tags around the buggy address
 
   // NOSYM: Previously allocated frames:

>From 7b9fb1c228337de9c866b123ff60f3491eebd3d7 Mon Sep 17 00:00:00 2001
From: Yinying Li <107574043+yinying-lisa-li at users.noreply.github.com>
Date: Thu, 19 Oct 2023 12:34:18 -0400
Subject: [PATCH 17/76] [mlir][sparse] Update verifier for block sparsity and
 singleton (#69389)

Updates:
1. Verification of block sparsity.
2. Verification of singleton level type can only follow compressed or
loose_compressed levels. And all level types after singleton should be
singleton.
3. Added getBlockSize function.
4. Added an invalid encoding test for an incorrect lvlToDim map that
user provides.
---
 .../Dialect/SparseTensor/IR/SparseTensor.h    | 15 ++++
 .../SparseTensor/IR/Detail/DimLvlMap.cpp      | 11 ++-
 .../SparseTensor/IR/SparseTensorDialect.cpp   | 87 +++++++++++++++----
 .../SparseTensor/invalid_encoding.mlir        | 23 ++++-
 4 files changed, 114 insertions(+), 22 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
index 6e834426b441764..51aacd8cc2438e4 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
@@ -173,6 +173,21 @@ AffineMap inferLvlToDim(AffineMap dimToLvl, MLIRContext *context);
 /// Asserts on failure (so only use when known to succeed).
 AffineMap inverseBlockSparsity(AffineMap dimToLvl, MLIRContext *context);
 
+/// Given the dimToLvl map, returns the block sizes in a vector.
+/// For instance, a 2x3 block will return [2, 3]. Unblocked dimension i
+/// will return 0, and i floordiv 1, i mod 1 will return 1. Therefore,
+/// the example below will return [0, 1].
+/// map = ( i, j ) ->
+///       ( i : dense,
+///         j floordiv 1 : compressed,
+///         j mod 1      : dense
+///       )
+/// Only valid block sparsity will be accepted.
+SmallVector<unsigned> getBlockSize(AffineMap dimToLvl);
+
+/// Given the dimToLvl map, returns if it's block sparsity.
+bool isBlockSparsity(AffineMap dimToLvl);
+
 //
 // Reordering.
 //
diff --git a/mlir/lib/Dialect/SparseTensor/IR/Detail/DimLvlMap.cpp b/mlir/lib/Dialect/SparseTensor/IR/Detail/DimLvlMap.cpp
index 05fce96043826f1..5f947b67c6d848e 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/Detail/DimLvlMap.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/Detail/DimLvlMap.cpp
@@ -356,10 +356,15 @@ AffineMap DimLvlMap::getDimToLvlMap(MLIRContext *context) const {
 AffineMap DimLvlMap::getLvlToDimMap(MLIRContext *context) const {
   SmallVector<AffineExpr> dimAffines;
   dimAffines.reserve(getDimRank());
-  for (const auto &dimSpec : dimSpecs)
-    dimAffines.push_back(dimSpec.getExpr().getAffineExpr());
+  for (const auto &dimSpec : dimSpecs) {
+    auto expr = dimSpec.getExpr().getAffineExpr();
+    if (expr) {
+      dimAffines.push_back(expr);
+    }
+  }
   auto map = AffineMap::get(getLvlRank(), getSymRank(), dimAffines, context);
-  if (map.isIdentity()) return AffineMap();
+  if (dimAffines.empty() || map.isIdentity())
+    return AffineMap();
   return map;
 }
 
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index fd87bbfa905ed69..170cd00821ea6ba 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -455,6 +455,7 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
   SmallVector<DimLevelType> lvlTypes;
   SmallVector<SparseTensorDimSliceAttr> dimSlices;
   AffineMap dimToLvl = {};
+  AffineMap lvlToDim = {};
   unsigned posWidth = 0;
   unsigned crdWidth = 0;
   StringRef attrName;
@@ -568,6 +569,7 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
 
       ERROR_IF(dimToLvl, "Cannot mix `dimToLvl` with `map`")
       dimToLvl = dlm.getDimToLvlMap(parser.getContext());
+      lvlToDim = dlm.getLvlToDimMap(parser.getContext());
       break;
     }
     } // switch
@@ -582,8 +584,9 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
 #undef RETURN_ON_FAIL
 
   // Construct struct-like storage for attribute.
-  // TODO: Fetch lvlToDim if user provides one
-  AffineMap lvlToDim = inferLvlToDim(dimToLvl, parser.getContext());
+  if (!lvlToDim || lvlToDim.isEmpty()) {
+    lvlToDim = inferLvlToDim(dimToLvl, parser.getContext());
+  }
   return parser.getChecked<SparseTensorEncodingAttr>(
       parser.getContext(), lvlTypes, dimToLvl, lvlToDim, posWidth, crdWidth,
       dimSlices);
@@ -663,6 +666,17 @@ SparseTensorEncodingAttr::verify(function_ref<InFlightDiagnostic()> emitError,
     return emitError() << "unexpected position bitwidth: " << posWidth;
   if (!acceptBitWidth(crdWidth))
     return emitError() << "unexpected coordinate bitwidth: " << crdWidth;
+  if (auto it = std::find_if(lvlTypes.begin(), lvlTypes.end(), isSingletonDLT);
+      it != std::end(lvlTypes)) {
+    if (it == lvlTypes.begin() ||
+        (!isCompressedDLT(*(it - 1)) && !isLooseCompressedDLT(*(it - 1))))
+      return emitError() << "expected compressed or loose_compressed level "
+                            "before singleton level";
+    if (!std::all_of(it, lvlTypes.end(),
+                     [](DimLevelType i) { return isSingletonDLT(i); }))
+      return emitError() << "expected all singleton lvlTypes "
+                            "following a singleton level";
+  }
   // Before we can check that the level-rank is consistent/coherent
   // across all fields, we need to define it.  The source-of-truth for
   // the `getLvlRank` method is the length of the level-types array,
@@ -678,19 +692,12 @@ SparseTensorEncodingAttr::verify(function_ref<InFlightDiagnostic()> emitError,
       return emitError()
              << "level-rank mismatch between dimToLvl and lvlTypes: "
              << dimToLvl.getNumResults() << " != " << lvlRank;
-    // TODO:  The following is attempting to match the old error-conditions
-    // from prior to merging dimOrdering and higherOrdering into dimToLvl.
-    // That is, we currently require `dimToLvl` to be either a permutation
-    // (as when higherOrdering is the identity) or expansive (as per the
-    // constraints on higherOrdering).  However, those constraints do
-    // not match the intended semantics of `dimToLvl`.  As we improve the
-    // compiler to actually handle non-permutations, we need to update these
-    // checks to match what is actually supported.  In particular, this is
-    // where we'll have to check that when `lvlToDim` is provided then it
-    // is indeed an inverse of `dimToLvl`, and when it isn't provided then
-    // it can be automatically inferred.
-    if (dimRank == lvlRank && !dimToLvl.isPermutation())
-      return emitError() << "expected a permutation affine map for dimToLvl";
+    auto inferRes = inferLvlToDim(dimToLvl, dimToLvl.getContext());
+    // Symbols can't be inferred but are acceptable.
+    if (!inferRes && dimToLvl.getNumSymbols() == 0)
+      return emitError() << "failed to infer lvlToDim from dimToLvl";
+    if (lvlToDim && (inferRes != lvlToDim))
+      return emitError() << "expected lvlToDim to be an inverse of dimToLvl";
     if (dimRank > lvlRank)
       return emitError() << "unexpected dimToLvl mapping from " << dimRank
                          << " to " << lvlRank;
@@ -758,8 +765,7 @@ AffineMap mlir::sparse_tensor::inferLvlToDim(AffineMap dimToLvl,
     lvlToDim = AffineMap();
   } else if (map.isPermutation()) {
     lvlToDim = inversePermutation(map);
-  } else {
-    // TODO: check if it's block sparsity
+  } else if (isBlockSparsity(map)) {
     lvlToDim = inverseBlockSparsity(map, context);
   }
   return lvlToDim;
@@ -818,6 +824,53 @@ AffineMap mlir::sparse_tensor::inverseBlockSparsity(AffineMap dimToLvl,
   return dimToLvl.get(dimToLvl.getNumResults(), 0, lvlExprs, context);
 }
 
+SmallVector<unsigned> mlir::sparse_tensor::getBlockSize(AffineMap dimToLvl) {
+  assert(isBlockSparsity(dimToLvl) &&
+         "expected dimToLvl to be block sparsity for calling getBlockSize");
+  SmallVector<unsigned> blockSize;
+  for (auto result : dimToLvl.getResults()) {
+    if (auto binOp = result.dyn_cast<AffineBinaryOpExpr>()) {
+      if (result.getKind() == AffineExprKind::Mod) {
+        blockSize.push_back(
+            binOp.getRHS().dyn_cast<AffineConstantExpr>().getValue());
+      }
+    } else {
+      blockSize.push_back(0);
+    }
+  }
+  return blockSize;
+}
+
+bool mlir::sparse_tensor::isBlockSparsity(AffineMap dimToLvl) {
+  if (!dimToLvl)
+    return false;
+  std::map<unsigned, int64_t> coeffientMap;
+  for (auto result : dimToLvl.getResults()) {
+    if (auto binOp = result.dyn_cast<AffineBinaryOpExpr>()) {
+      auto pos = binOp.getLHS().dyn_cast<AffineDimExpr>().getPosition();
+      if (result.getKind() == AffineExprKind::FloorDiv) {
+        // Expect only one floordiv for each dimension.
+        if (coeffientMap.find(pos) != coeffientMap.end())
+          return false;
+        coeffientMap[pos] =
+            binOp.getRHS().dyn_cast<AffineConstantExpr>().getValue();
+      } else if (result.getKind() == AffineExprKind::Mod) {
+        // Expect floordiv before mod.
+        if (coeffientMap.find(pos) == coeffientMap.end())
+          return false;
+        // Expect mod to have the same coefficient as floordiv.
+        if (binOp.getRHS().dyn_cast<AffineConstantExpr>().getValue() !=
+            coeffientMap[pos]) {
+          return false;
+        }
+      } else {
+        return false;
+      }
+    }
+  }
+  return !coeffientMap.empty();
+}
+
 bool mlir::sparse_tensor::isCOOType(SparseTensorEncodingAttr enc,
                                     Level startLvl, bool isUnique) {
   if (!enc ||
diff --git a/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir
index ef1dd3ee41f8576..6514391bae92d9b 100644
--- a/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir
@@ -60,7 +60,7 @@ func.func private @tensor_sizes_mismatch(%arg0: tensor<8xi32, #a>) -> ()
 
 // -----
 
-// expected-error at +1 {{unexpected dimToLvl mapping from 2 to 1}}
+// expected-error at +1 {{failed to infer lvlToDim from dimToLvl}}
 #a = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense)}>
 func.func private @tensor_sizes_mismatch(%arg0: tensor<8xi32, #a>) -> ()
 
@@ -119,7 +119,7 @@ func.func private @tensor_dimtolvl_mismatch(%arg0: tensor<8xi32, #a>) -> ()
 
 // -----
 
-// expected-error at +1 {{expected a permutation affine map for dimToLvl}}
+// expected-error at +1 {{failed to infer lvlToDim from dimToLvl}}
 #a = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d0 : compressed)}>
 func.func private @tensor_no_permutation(%arg0: tensor<16x32xf32, #a>) -> ()
 
@@ -251,3 +251,22 @@ func.func private @too_few_lvl_decl(%arg0: tensor<?x?xf64, #TooFewLvlDecl>) {
 func.func private @wrong_order_lvl_decl(%arg0: tensor<?x?xf64, #WrongOrderLvlDecl>) {
   return
 }
+
+// -----
+
+// expected-error at +1 {{expected lvlToDim to be an inverse of dimToLvl}}
+#BSR_explicit = #sparse_tensor.encoding<{
+  map =
+  {il, jl, ii, jj}
+  ( i = il * 3 + ii,
+    j = jl * 2 + jj
+  ) ->
+  ( il = i floordiv 2 : dense,
+    jl = j floordiv 3 : compressed,
+    ii = i mod 2      : dense,
+    jj = j mod 3      : dense
+  )
+}>
+func.func private @BSR_explicit(%arg0: tensor<?x?xf64, #BSR_explicit>) {
+  return
+}

>From 6cfb64276d47b6c24f85311ddcc80fa8d703d76b Mon Sep 17 00:00:00 2001
From: Konstantin Zhuravlyov <kzhuravl_dev at outlook.com>
Date: Thu, 19 Oct 2023 12:40:19 -0400
Subject: [PATCH 18/76] AMDGPU: Minor updates to program resource registers
 (#69525)

- Be explicit about which program resource register is supported by
which target
    - RSRC1
      - FP16_OVFL is GFX9+
      - WGP_MODE is GFX10+
      - MEM_ORDERED is GFX10+
      - FWD_PROGRESS is GFX10+
    - RSRC3
      - INST_PREF_SIZE is GFX11+
      - TRAP_ON_START is GFX11+
      - TRAP_ON_END is GFX11+
      - IMAGE_OP is GFX11+
  - Do not emit GFX11+ fields when disassembling GFX10 code objects
  - Tighten enforcement of reserved bits in disassembler

---------

Co-authored-by: Konstantin Zhuravlyov <kzhuravl at amd.com>
---
 .../llvm/Support/AMDHSAKernelDescriptor.h     |  46 +++-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |   8 +-
 .../Disassembler/AMDGPUDisassembler.cpp       |  48 ++--
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |   8 +-
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |   4 +-
 .../tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s  |  16 --
 .../tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s  | 220 ++++++++++++++++++
 .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s |   4 -
 8 files changed, 299 insertions(+), 55 deletions(-)
 create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s

diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 1bd65471d3b7c90..0574f96e6e15c4f 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -79,8 +79,21 @@ enum : uint8_t {
 };
 
 // Compute program resource register 1. Must match hardware definition.
+// GFX6+.
 #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
+// [GFX6-GFX8].
+#define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
+// [GFX6-GFX9].
+#define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
+// GFX9+.
+#define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
+// GFX10+.
+#define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
 enum : int32_t {
   COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
   COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
@@ -95,11 +108,13 @@ enum : int32_t {
   COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
   COMPUTE_PGM_RSRC1(BULKY, 24, 1),
   COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
-  COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1),    // GFX9+
-  COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
-  COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1),     // GFX10+
-  COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1),  // GFX10+
-  COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
+  COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
+  COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
+  COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
+  COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
+  COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
+  COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
+  COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
 };
 #undef COMPUTE_PGM_RSRC1
 
@@ -143,15 +158,24 @@ enum : int32_t {
 
 // Compute program resource register 3 for GFX10+. Must match hardware
 // definition.
+// [GFX10].
+#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
+// GFX10+.
 #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
+// GFX11+.
+#define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
 enum : int32_t {
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6),    // GFX11+
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1),    // GFX11+
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1),      // GFX11+
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1),         // GFX11+
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4),
+  COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8),
+  COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6),
+  COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1),
+  COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1),
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19),
+  COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1),
+  COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
 };
 #undef COMPUTE_PGM_RSRC3_GFX10_PLUS
 
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index faeaa94f9733576..9e143c77b606c34 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5242,7 +5242,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
     } else if (ID == ".amdhsa_fp16_overflow") {
       if (IVersion.Major < 9)
         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
                        ValRange);
     } else if (ID == ".amdhsa_tg_split") {
       if (!isGFX90A())
@@ -5252,17 +5252,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
     } else if (ID == ".amdhsa_workgroup_processor_mode") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
                        ValRange);
     } else if (ID == ".amdhsa_memory_ordered") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
                        ValRange);
     } else if (ID == ".amdhsa_forward_progress") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
                        ValRange);
     } else if (ID == ".amdhsa_shared_vgpr_count") {
       if (IVersion.Major < 10)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index d74fd0b3a9ea74e..1b301ee5f49b216 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1818,16 +1818,23 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
   if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
     return MCDisassembler::Fail;
 
-  PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
+  if (isGFX9Plus())
+    PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
 
-  if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
+  if (!isGFX9Plus())
+    if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
+      return MCDisassembler::Fail;
+  if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
     return MCDisassembler::Fail;
+  if (!isGFX10Plus())
+    if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
+      return MCDisassembler::Fail;
 
   if (isGFX10Plus()) {
     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
-                    COMPUTE_PGM_RSRC1_WGP_MODE);
-    PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
-    PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+                    COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
+    PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
+    PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
   }
   return MCDisassembler::Success;
 }
@@ -1908,16 +1915,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
       PRINT_PSEUDO_DIRECTIVE_COMMENT(
           "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
     }
-    PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
-                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
-    PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
-                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
-    PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
-                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
-    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
+
+    if (isGFX11Plus()) {
+      PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
+                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
+      PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
+                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+      PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
+                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
+    } else {
+      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
+        return MCDisassembler::Fail;
+    }
+
+    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
       return MCDisassembler::Fail;
-    PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
-                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+
+    if (isGFX11Plus()) {
+      PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
+                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+    } else {
+      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
+        return MCDisassembler::Fail;
+    }
   } else if (FourByteBuffer) {
     return MCDisassembler::Fail;
   }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 6b8c03c1620d26b..70350b83849aaae 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -453,7 +453,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
   if (IVersion.Major >= 9)
     PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
                 compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+                amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
   if (AMDGPU::isGFX90A(STI))
     PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
                 compute_pgm_rsrc3,
@@ -461,13 +461,13 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
   if (IVersion.Major >= 10) {
     PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
                 compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
     PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
                 compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
     PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
                 compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
     PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
                 amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
   }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d123b384a27d4cc..954f21b5ec49bda 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1163,10 +1163,10 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
                     amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
                     STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
+                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
                     STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
+                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
   }
   if (AMDGPU::isGFX90A(*STI)) {
     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
index 52b399e4f0c56e2..58b01031afe383e 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
@@ -12,10 +12,6 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
 ; CHECK-NEXT: .amdhsa_reserve_vcc 0
 ; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -69,10 +65,6 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
 ; CHECK-NEXT: .amdhsa_reserve_vcc 0
 ; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -126,10 +118,6 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
 ; CHECK-NEXT: .amdhsa_reserve_vcc 0
 ; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -183,10 +171,6 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
 ; CHECK-NEXT: .amdhsa_reserve_vcc 0
 ; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
new file mode 100644
index 000000000000000..2133002908d9fc8
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
@@ -0,0 +1,220 @@
+;; Test disassembly for gfx11 kernel descriptor.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;--- 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1100 < 1.s > 1.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1100 < 1-disasm.s > 1-disasm.o
+; RUN: cmp 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 1
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+  .amdhsa_wavefront_size32 1
+.end_amdhsa_kernel
+
+;--- 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 2.s > 2.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 2.o | tail -n +7 | tee 2-disasm.s | FileCheck 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 2-disasm.s > 2-disasm.o
+; RUN: cmp 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+  .amdhsa_shared_vgpr_count 0
+.end_amdhsa_kernel
+
+;--- 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 3.s > 3.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 3.o | tail -n +7 | tee 3-disasm.s | FileCheck 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 3-disasm.s > 3-disasm.o
+; RUN: cmp 3.o 3-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+  .amdhsa_shared_vgpr_count 1
+.end_amdhsa_kernel
+
+;--- 4.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 4.s > 4.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 4.o | tail -n +7 | tee 4-disasm.s | FileCheck 4.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 4-disasm.s > 4-disasm.o
+; RUN: cmp 4.o 4-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+  .amdhsa_shared_vgpr_count 1
+  .amdhsa_wavefront_size32 0
+.end_amdhsa_kernel
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
index 04cf28f89e44826..39cef4da4278df2 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
@@ -26,10 +26,6 @@
 ; OBJDUMP-NEXT:         .amdhsa_private_segment_fixed_size 0
 ; OBJDUMP-NEXT:         .amdhsa_kernarg_size 0
 ; OBJDUMP-NEXT:         .amdhsa_shared_vgpr_count 0
-; OBJDUMP-NEXT:         ; INST_PREF_SIZE 0
-; OBJDUMP-NEXT:         ; TRAP_ON_START 0
-; OBJDUMP-NEXT:         ; TRAP_ON_END 0
-; OBJDUMP-NEXT:         ; IMAGE_OP 0
 ; OBJDUMP-NEXT:         .amdhsa_next_free_vgpr 8
 ; OBJDUMP-NEXT:         .amdhsa_reserve_vcc 0
 ; OBJDUMP-NEXT:         .amdhsa_reserve_flat_scratch 0

>From 200a92520c255c20c916c11e9c240f07b9218380 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto at arm.com>
Date: Thu, 19 Oct 2023 16:21:23 +0000
Subject: [PATCH 19/76] [Clang][SVE2.1] Add builtins and intrinsics for
 SVBFMLSLB/T

As described in: https://github.com/ARM-software/acle/pull/257

Patch by: Kerry McLaughlin <kerry.mclaughlin at arm.com>

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D151461
---
 clang/include/clang/Basic/arm_sve.td          |  6 ++
 clang/include/clang/Basic/arm_sve_sme_incl.td |  1 +
 .../acle_sve2p1_bfmlsl.c                      | 85 +++++++++++++++++++
 clang/utils/TableGen/SveEmitter.cpp           |  7 ++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     | 16 ++++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 ++-
 .../AArch64/sve2p1-intrinsics-bfmls.ll        | 43 ++++++++++
 7 files changed, 164 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
 create mode 100644 llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 8750e75f2a777a6..a1585443e5fd229 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1959,6 +1959,12 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f",  MergeNone, "aarch64_
 def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
+
+def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>;
+def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>;
+
+def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
 }
 
 let TargetGuard = "sve2p1" in {
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index c3a6dc4e4d44abe..3a7a5b51b25801e 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -99,6 +99,7 @@
 // O: svfloat16_t
 // M: svfloat32_t
 // N: svfloat64_t
+// $: svbfloat16_t
 
 // J: Prefetch type (sv_prfop)
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
new file mode 100644
index 000000000000000..2955d4554da004e
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -0,0 +1,85 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sve.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// BFMLSLB
+
+
+// CHECK-LABEL: @test_bfmlslb(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z12test_bfmlslbu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_bfmlslb(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
+{
+  return SVE_ACLE_FUNC(svbfmlslb,_f32,,)(zda, zn, zm);
+}
+
+
+// CHECK-LABEL: @test_bfmlslb_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_bfmlslb_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_bfmlslb_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
+{
+  return SVE_ACLE_FUNC(svbfmlslb_lane,_f32,,)(zda, zn, zm, 7);
+}
+
+// BFMLSLT
+
+
+// CHECK-LABEL: @test_bfmlslt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z12test_bfmlsltu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_bfmlslt(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
+{
+  return SVE_ACLE_FUNC(svbfmlslt,_f32,,)(zda, zn, zm);
+}
+
+
+// CHECK-LABEL: @test_bfmlslt_lane(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_bfmlslt_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> [[ZDA:%.*]], <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]], i32 7)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_bfmlslt_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm)
+{
+  return SVE_ACLE_FUNC(svbfmlslt_lane,_f32,,)(zda, zn, zm, 7);
+}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 7e9afc538c2b5ba..ab2b22233987a3c 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -852,6 +852,13 @@ void SVEType::applyModifier(char Mod) {
     NumVectors = 0;
     Signed = false;
     break;
+  case '$':
+    Predicate = false;
+    Svcount = false;
+    Float = false;
+    BFloat = true;
+    ElementBitwidth = 16;
+    break;
   case '}':
     Predicate = false;
     Signed = true;
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 557063c8813268e..a42e2c49cb477ba 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3011,6 +3011,16 @@ let TargetPrefix = "aarch64" in {
                             [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
                             [IntrNoMem]>;
 
+  class SME2_BFMLS_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
+                            [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty],
+                            [IntrNoMem]>;
+
+  class SME2_BFMLS_Lane_Intrinsic
+    : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty],
+                            [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty],
+                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
+
   class SME2_ZA_ArrayVector_Read_VG2_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
                 [llvm_i32_ty],
@@ -3214,6 +3224,12 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_usmla_za32_lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
   def int_aarch64_sme_usmla_za32_lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
 
+  def int_aarch64_sve_bfmlslb : SME2_BFMLS_Intrinsic;
+  def int_aarch64_sve_bfmlslb_lane : SME2_BFMLS_Lane_Intrinsic;
+
+  def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic;
+  def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic;
+
   // Multi-vector signed saturating doubling multiply high
 
   def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 752f58596a2f04f..d599ac4689e5cb3 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3753,12 +3753,14 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
 
 let Predicates = [HasSVE2p1_or_HasSME2] in {
 defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>;
+
 defm FDOT_ZZZ_S  : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>;
 defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>;
-def BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb">;
-def BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt">;
-def BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb">;
-def BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt">;
+
+defm BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb>;
+defm BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt>;
+defm BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb_lane>;
+defm BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt_lane>;
 
 defm SDOT_ZZZ_HtoS  : sve2p1_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>;
 defm UDOT_ZZZ_HtoS  : sve2p1_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>;
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll
new file mode 100644
index 000000000000000..04ad000f20f070a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 4 x float> @bfmlslb_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: bfmlslb_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlslb z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @bfmlslt_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: bfmlslt_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlslt z0.s, z1.h, z2.h
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @bfmlslb_lane_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: bfmlslb_lane_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlslb z0.s, z1.h, z2.h[7]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 7)
+  ret <vscale x 4 x float> %out
+}
+
+define <vscale x 4 x float> @bfmlslt_lane_f32(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm) {
+; CHECK-LABEL: bfmlslt_lane_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bfmlslt z0.s, z1.h, z2.h[7]
+; CHECK-NEXT:    ret
+  %out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float> %zda, <vscale x 8 x bfloat> %zn, <vscale x 8 x bfloat> %zm, i32 7)
+  ret <vscale x 4 x float> %out
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
+declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlslt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)

>From c35939b22eba9440dff0509b1c2608de39478f14 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 19 Oct 2023 12:45:21 -0400
Subject: [PATCH 20/76] [VectorCombine] Use isSafeToSpeculativelyExecute to
 guard VP scalarization (#69494)

Previously we were just matching against a fixed list of VP intrinsics
that we
knew couldn't be speculated, but we can reuse the logic in
isSafeToSpeculativelyExecuteWithOpcode. This also allows speculation in
more
cases, e.g. when the divisor is known to be non-zero.

Unfortunately we can't reuse the exact same function call for VP
intrinsics
with functional intrinsics instead of opcodes, because
isSafeToSpeculativelyExecute needs an instruction that already exists.
So this
just copies the logic by peeking into the function attributes of the
intrinsic.
---
 .../Transforms/Vectorize/VectorCombine.cpp    |  43 +++++---
 .../RISCV/vpintrin-scalarization.ll           | 100 ++++++++++++------
 2 files changed, 94 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 64a515270fd57f2..5cbcb017f97c1b6 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -825,23 +825,32 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
   ElementCount EC = cast<VectorType>(Op0->getType())->getElementCount();
   Value *EVL = VPI.getArgOperand(3);
   const DataLayout &DL = VPI.getModule()->getDataLayout();
-  bool MustHaveNonZeroVL =
-      IntrID == Intrinsic::vp_sdiv || IntrID == Intrinsic::vp_udiv ||
-      IntrID == Intrinsic::vp_srem || IntrID == Intrinsic::vp_urem;
-
-  if (!MustHaveNonZeroVL || isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT)) {
-    Value *ScalarOp0 = getSplatValue(Op0);
-    Value *ScalarOp1 = getSplatValue(Op1);
-    Value *ScalarVal =
-        ScalarIntrID
-            ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,
-                                      {ScalarOp0, ScalarOp1})
-            : Builder.CreateBinOp((Instruction::BinaryOps)(*FunctionalOpcode),
-                                  ScalarOp0, ScalarOp1);
-    replaceValue(VPI, *Builder.CreateVectorSplat(EC, ScalarVal));
-    return true;
-  }
-  return false;
+
+  // If the VP op might introduce UB or poison, we can scalarize it provided
+  // that we know the EVL > 0: If the EVL is zero, then the original VP op
+  // becomes a no-op and thus won't be UB, so make sure we don't introduce UB by
+  // scalarizing it.
+  bool SafeToSpeculate;
+  if (ScalarIntrID)
+    SafeToSpeculate = Intrinsic::getAttributes(I.getContext(), *ScalarIntrID)
+                          .hasFnAttr(Attribute::AttrKind::Speculatable);
+  else
+    SafeToSpeculate = isSafeToSpeculativelyExecuteWithOpcode(
+        *FunctionalOpcode, &VPI, nullptr, &AC, &DT);
+  if (!SafeToSpeculate && !isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT))
+    return false;
+
+  Value *ScalarOp0 = getSplatValue(Op0);
+  Value *ScalarOp1 = getSplatValue(Op1);
+  Value *ScalarVal =
+      ScalarIntrID
+          ? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,
+                                    {ScalarOp0, ScalarOp1})
+          : Builder.CreateBinOp((Instruction::BinaryOps)(*FunctionalOpcode),
+                                ScalarOp0, ScalarOp1);
+
+  replaceValue(VPI, *Builder.CreateVectorSplat(EC, ScalarVal));
+  return true;
 }
 
 /// Match a vector binop or compare instruction with at least one inserted
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
index da183a6b14bc687..e95aea4eb487b87 100644
--- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
@@ -166,14 +166,23 @@ define <vscale x 1 x i64> @mul_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <v
 }
 
 define <vscale x 1 x i64> @sdiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
-; ALL-LABEL: @sdiv_nxv1i64_allonesmask(
-; ALL-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
-; ALL-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
-; ALL-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
-; ALL-NEXT:    ret <vscale x 1 x i64> [[TMP4]]
+; VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask(
+; VEC-COMBINE-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT:    [[TMP1:%.*]] = sdiv i64 [[Y:%.*]], 42
+; VEC-COMBINE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
+; VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-NEXT:    ret <vscale x 1 x i64> [[TMP3]]
+;
+; NO-VEC-COMBINE-LABEL: @sdiv_nxv1i64_allonesmask(
+; NO-VEC-COMBINE-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; NO-VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
+; NO-VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
+; NO-VEC-COMBINE-NEXT:    ret <vscale x 1 x i64> [[TMP4]]
 ;
   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
   %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -221,14 +230,23 @@ define <vscale x 1 x i64> @sdiv_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe
 }
 
 define <vscale x 1 x i64> @udiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
-; ALL-LABEL: @udiv_nxv1i64_allonesmask(
-; ALL-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
-; ALL-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
-; ALL-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
-; ALL-NEXT:    ret <vscale x 1 x i64> [[TMP4]]
+; VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask(
+; VEC-COMBINE-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT:    [[TMP1:%.*]] = udiv i64 [[Y:%.*]], 42
+; VEC-COMBINE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
+; VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-NEXT:    ret <vscale x 1 x i64> [[TMP3]]
+;
+; NO-VEC-COMBINE-LABEL: @udiv_nxv1i64_allonesmask(
+; NO-VEC-COMBINE-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; NO-VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
+; NO-VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
+; NO-VEC-COMBINE-NEXT:    ret <vscale x 1 x i64> [[TMP4]]
 ;
   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
   %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -276,14 +294,23 @@ define <vscale x 1 x i64> @udiv_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe
 }
 
 define <vscale x 1 x i64> @srem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
-; ALL-LABEL: @srem_nxv1i64_allonesmask(
-; ALL-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
-; ALL-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
-; ALL-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
-; ALL-NEXT:    ret <vscale x 1 x i64> [[TMP4]]
+; VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask(
+; VEC-COMBINE-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT:    [[TMP1:%.*]] = srem i64 [[Y:%.*]], 42
+; VEC-COMBINE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
+; VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-NEXT:    ret <vscale x 1 x i64> [[TMP3]]
+;
+; NO-VEC-COMBINE-LABEL: @srem_nxv1i64_allonesmask(
+; NO-VEC-COMBINE-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; NO-VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
+; NO-VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
+; NO-VEC-COMBINE-NEXT:    ret <vscale x 1 x i64> [[TMP4]]
 ;
   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
   %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -331,14 +358,23 @@ define <vscale x 1 x i64> @srem_nxv1i64_unspeculatable(i64 %x, i64 %y, i32 zeroe
 }
 
 define <vscale x 1 x i64> @urem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
-; ALL-LABEL: @urem_nxv1i64_allonesmask(
-; ALL-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
-; ALL-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
-; ALL-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
-; ALL-NEXT:    ret <vscale x 1 x i64> [[TMP4]]
+; VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask(
+; VEC-COMBINE-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT:    [[TMP1:%.*]] = urem i64 [[Y:%.*]], 42
+; VEC-COMBINE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP1]], i64 0
+; VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-NEXT:    ret <vscale x 1 x i64> [[TMP3]]
+;
+; NO-VEC-COMBINE-LABEL: @urem_nxv1i64_allonesmask(
+; NO-VEC-COMBINE-NEXT:    [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+; NO-VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT:    [[TMP1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[Y:%.*]], i64 0
+; NO-VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <vscale x 1 x i64> [[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+; NO-VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT:    [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
+; NO-VEC-COMBINE-NEXT:    ret <vscale x 1 x i64> [[TMP4]]
 ;
   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
   %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer

>From 0446c589afd66f061a604715d3f6e9f8fef012bf Mon Sep 17 00:00:00 2001
From: Alex Langford <alangford at apple.com>
Date: Thu, 19 Oct 2023 09:48:15 -0700
Subject: [PATCH 21/76] [DebugInfo] Correctly report header parsing errors from
 DWARFContext::fixupIndex (#69505)

In ef762e5e7292, I shifted around where errors were reported when
failing to parse and/or validate DWARFUnitHeaders. When we are doing so
in DWARFContext::fixupIndex, the actual error message isn't prefixed
with `warning:` like it would be elsewhere (because of the way
`logAllUnhandledErrors` is implemented).
---
 llvm/lib/DebugInfo/DWARF/DWARFContext.cpp              | 10 ++++------
 .../tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s |  4 ++--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 724f816ad094a75..57ca11a077a486a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -91,10 +91,9 @@ void fixupIndexV4(DWARFContext &C, DWARFUnitIndex &Index) {
       DWARFUnitHeader Header;
       if (Error ExtractionErr = Header.extract(
               C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) {
-        logAllUnhandledErrors(
+        C.getWarningHandler()(
             createError("Failed to parse CU header in DWP file: " +
-                        toString(std::move(ExtractionErr))),
-            errs());
+                        toString(std::move(ExtractionErr))));
         Map.clear();
         break;
       }
@@ -154,10 +153,9 @@ void fixupIndexV5(DWARFContext &C, DWARFUnitIndex &Index) {
       DWARFUnitHeader Header;
       if (Error ExtractionErr = Header.extract(
               C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) {
-        logAllUnhandledErrors(
+        C.getWarningHandler()(
             createError("Failed to parse CU header in DWP file: " +
-                        toString(std::move(ExtractionErr))),
-            errs());
+                        toString(std::move(ExtractionErr))));
         break;
       }
       bool CU = Header.getUnitType() == DW_UT_split_compile;
diff --git a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s
index d1ab9f75b74c8fa..1f63b2121797038 100644
--- a/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s
+++ b/llvm/test/tools/llvm-dwp/X86/cu_tu_units_manual_v5_invalid.s
@@ -13,10 +13,10 @@
 # CHECK-NOT: .debug_info.dwo contents:
 
 # CHECK-DAG: .debug_cu_index contents:
-# CHECK: Failed to parse CU header in DWP file: DWARF unit at offset 0x00000000 has unsupported version 6, supported are 2-5
+# CHECK: warning: Failed to parse CU header in DWP file: DWARF unit at offset 0x00000000 has unsupported version 6, supported are 2-5
 
 # CHECK-DAG: .debug_tu_index contents:
-# CHECK: Failed to parse CU header in DWP file: DWARF unit at offset 0x00000000 has unsupported version 6, supported are 2-5
+# CHECK: warning: Failed to parse CU header in DWP file: DWARF unit at offset 0x00000000 has unsupported version 6, supported are 2-5
 
     .section	.debug_info.dwo,"e", at progbits
     .long	.Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit

>From 01263c6c6fb495a94fe0ccbb1420bb1ec8460748 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Thu, 19 Oct 2023 09:48:54 -0700
Subject: [PATCH 22/76] [lldb] Rename lldb-vscode to lldb-dap (#69264)

Rename lldb-vscode to lldb-dap. This change is largely mechanical. The
following substitutions cover the majority of the changes in this
commit:

  s/VSCODE/DAP/
  s/VSCode/DAP/
  s/vscode/dap/
  s/g_vsc/g_dap/

Discourse RFC:
https://discourse.llvm.org/t/rfc-rename-lldb-vscode-to-lldb-dap/74075/
---
 lldb/packages/Python/lldbsuite/test/dotest.py |  20 +-
 .../Python/lldbsuite/test/lldbtest.py         |   6 +-
 .../Python/lldbsuite/test/test_categories.py  |   2 +-
 .../vscode.py => lldb-dap/dap_server.py}      |   8 +-
 .../lldbdap_testcase.py}                      | 104 +--
 .../{lldb-vscode => lldb-dap}/attach/Makefile |   0
 .../attach/TestDAP_attach.py}                 |  16 +-
 .../{lldb-vscode => lldb-dap}/attach/main.c   |   0
 .../breakpoint-events/Makefile                |   0
 .../TestDAP_breakpointEvents.py}              |  36 +-
 .../breakpoint-events/foo.cpp                 |   2 +-
 .../breakpoint-events/foo.h                   |   0
 .../breakpoint-events/main.cpp                |   0
 .../breakpoint/Makefile                       |   0
 .../breakpoint/TestDAP_logpoints.py}          |  26 +-
 .../breakpoint/TestDAP_setBreakpoints.py}     |  50 +-
 .../TestDAP_setExceptionBreakpoints.py}       |  10 +-
 .../TestDAP_setFunctionBreakpoints.py}        |  28 +-
 .../breakpoint/main.cpp                       |  10 +-
 .../breakpoint/other.c                        |   0
 lldb/test/API/tools/lldb-dap/categories       |   1 +
 .../completions/Makefile                      |   0
 .../completions/TestDAP_completions.py}       |  36 +-
 .../completions/main.cpp                      |   8 +-
 .../console/Makefile                          |   0
 .../console/TestDAP_console.py}               |  20 +-
 .../TestDAP_redirection_to_console.py}        |  14 +-
 .../console/main.cpp                          |   0
 .../coreFile/TestDAP_coreFile.py}             |  10 +-
 .../coreFile/linux-x86_64.core                | Bin
 .../coreFile/linux-x86_64.out                 | Bin
 .../{lldb-vscode => lldb-dap}/coreFile/main.c |   0
 .../correct-thread/Makefile                   |   0
 .../correct-thread/TestDAP_correct_thread.py} |  12 +-
 .../correct-thread/main.c                     |   6 +-
 .../disassemble/Makefile                      |   0
 .../disassemble/TestDAP_disassemble.py}       |   8 +-
 .../API/tools/lldb-dap/disassemble/main.c     |  30 +
 .../disconnect/Makefile                       |   0
 .../disconnect/TestDAP_disconnect.py}         |  16 +-
 .../disconnect/main.cpp                       |   0
 .../evaluate/Makefile                         |   0
 .../evaluate/TestDAP_evaluate.py}             |  12 +-
 .../evaluate/foo.cpp                          |   0
 .../{lldb-vscode => lldb-dap}/evaluate/foo.h  |   0
 .../evaluate/main.cpp                         |   4 +-
 .../exception/Makefile                        |   0
 .../exception/TestDAP_exception.py}           |   8 +-
 .../exception/main.cpp                        |   0
 .../{lldb-vscode => lldb-dap}/launch/Makefile |   0
 .../launch/TestDAP_launch.py}                 |  34 +-
 .../{lldb-vscode => lldb-dap}/launch/main.c   |   6 +-
 .../{lldb-vscode => lldb-dap}/module/Makefile |   0
 .../module/TestDAP_module.py}                 |  20 +-
 lldb/test/API/tools/lldb-dap/module/foo.cpp   |   1 +
 .../{lldb-vscode => lldb-dap}/module/foo.h    |   0
 .../{lldb-vscode => lldb-dap}/module/main.cpp |   0
 .../optimized/Makefile                        |   0
 .../optimized/TestDAP_optimized.py}           |  14 +-
 .../optimized/main.cpp                        |   0
 .../restart/Makefile                          |   0
 .../restart/TestDAP_restart.py}               |  36 +-
 .../restart/TestDAP_restart_runInTerminal.py} |  30 +-
 .../{lldb-vscode => lldb-dap}/restart/main.c  |   2 +-
 .../runInTerminal/Makefile                    |   0
 .../runInTerminal/TestDAP_runInTerminal.py}   |  40 +-
 .../runInTerminal/main.c                      |   0
 .../stackTrace/Makefile                       |   0
 .../stackTrace/TestDAP_stackTrace.py}         |   8 +-
 .../stackTrace/main.c                         |   4 +-
 .../stackTraceMissingFunctionName/Makefile    |   0
 .../TestDAP_stackTraceMissingFunctionName.py} |   8 +-
 .../stackTraceMissingFunctionName/main.cpp    |   0
 .../startDebugging/Makefile                   |   0
 .../startDebugging/TestDAP_startDebugging.py} |  18 +-
 .../startDebugging/main.c                     |   0
 .../{lldb-vscode => lldb-dap}/step/Makefile   |   0
 .../step/TestDAP_step.py}                     |  10 +-
 lldb/test/API/tools/lldb-dap/step/main.cpp    |   8 +
 .../stop-hooks/Makefile                       |   0
 .../stop-hooks/TestDAP_stop_hooks.py}         |  10 +-
 .../stop-hooks/main.c                         |   0
 .../terminated-event/Makefile                 |   0
 .../TestDAP_terminatedEvent.py}               |  12 +-
 .../tools/lldb-dap/terminated-event/foo.cpp   |   1 +
 .../terminated-event/foo.h                    |   0
 .../terminated-event/main.cpp                 |   2 +-
 .../variables/Makefile                        |   0
 .../variables/TestDAP_variables.py}           |  86 +--
 .../variables/main.cpp                        |   4 +-
 lldb/test/API/tools/lldb-vscode/categories    |   1 -
 .../API/tools/lldb-vscode/disassemble/main.c  |  30 -
 .../test/API/tools/lldb-vscode/module/foo.cpp |   3 -
 lldb/test/API/tools/lldb-vscode/step/main.cpp |  10 -
 .../lldb-vscode/terminated-event/foo.cpp      |   3 -
 .../Shell/{VSCode => DAP}/TestOptions.test    |   2 +-
 lldb/test/Shell/helper/toolchain.py           |   2 +-
 lldb/tools/CMakeLists.txt                     |   2 +-
 .../BreakpointBase.cpp                        |  12 +-
 .../BreakpointBase.h                          |   8 +-
 .../{lldb-vscode => lldb-dap}/CMakeLists.txt  |  16 +-
 .../VSCode.cpp => lldb-dap/DAP.cpp}           | 156 +++--
 .../{lldb-vscode/VSCode.h => lldb-dap/DAP.h}  |  26 +-
 .../VSCodeForward.h => lldb-dap/DAPForward.h} |  10 +-
 .../ExceptionBreakpoint.cpp                   |  10 +-
 .../ExceptionBreakpoint.h                     |   8 +-
 .../{lldb-vscode => lldb-dap}/FifoFiles.cpp   |   4 +-
 .../{lldb-vscode => lldb-dap}/FifoFiles.h     |  10 +-
 .../FunctionBreakpoint.cpp                    |   8 +-
 .../FunctionBreakpoint.h                      |   8 +-
 .../{lldb-vscode => lldb-dap}/IOStream.cpp    |   2 +-
 .../{lldb-vscode => lldb-dap}/IOStream.h      |   8 +-
 .../{lldb-vscode => lldb-dap}/JSONUtils.cpp   |  32 +-
 .../{lldb-vscode => lldb-dap}/JSONUtils.h     |  37 +-
 .../{lldb-vscode => lldb-dap}/LLDBUtils.cpp   |  10 +-
 .../{lldb-vscode => lldb-dap}/LLDBUtils.h     |  26 +-
 .../{lldb-vscode => lldb-dap}/Options.td      |   6 +-
 .../OutputRedirector.cpp                      |   4 +-
 .../OutputRedirector.h                        |  10 +-
 .../ProgressEvent.cpp                         |   2 +-
 .../{lldb-vscode => lldb-dap}/ProgressEvent.h |  12 +-
 .../tools/{lldb-vscode => lldb-dap}/README.md |  77 +--
 .../RunInTerminal.cpp                         |   6 +-
 .../{lldb-vscode => lldb-dap}/RunInTerminal.h |  10 +-
 .../SourceBreakpoint.cpp                      |   8 +-
 .../SourceBreakpoint.h                        |   8 +-
 .../lldb-dap-Info.plist.in}                   |   4 +-
 .../lldb-vscode.cpp => lldb-dap/lldb-dap.cpp} | 617 +++++++++---------
 .../{lldb-vscode => lldb-dap}/package.json    |  14 +-
 .../syntaxes/arm.disasm                       |   0
 .../syntaxes/arm64.disasm                     |   0
 .../syntaxes/disassembly.json                 |   0
 .../syntaxes/x86.disasm                       |   0
 lldb/tools/lldb-vscode                        |   1 +
 llvm/docs/ReleaseNotes.rst                    |   3 +
 135 files changed, 1043 insertions(+), 1040 deletions(-)
 rename lldb/packages/Python/lldbsuite/test/tools/{lldb-vscode/vscode.py => lldb-dap/dap_server.py} (99%)
 rename lldb/packages/Python/lldbsuite/test/tools/{lldb-vscode/lldbvscode_testcase.py => lldb-dap/lldbdap_testcase.py} (82%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/attach/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/attach/TestVSCode_attach.py => lldb-dap/attach/TestDAP_attach.py} (96%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/attach/main.c (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/breakpoint-events/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/breakpoint-events/TestVSCode_breakpointEvents.py => lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py} (83%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/breakpoint-events/foo.cpp (91%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/breakpoint-events/foo.h (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/breakpoint-events/main.cpp (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/breakpoint/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/breakpoint/TestVSCode_logpoints.py => lldb-dap/breakpoint/TestDAP_logpoints.py} (94%)
 rename lldb/test/API/tools/{lldb-vscode/breakpoint/TestVSCode_setBreakpoints.py => lldb-dap/breakpoint/TestDAP_setBreakpoints.py} (89%)
 rename lldb/test/API/tools/{lldb-vscode/breakpoint/TestVSCode_setExceptionBreakpoints.py => lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py} (86%)
 rename lldb/test/API/tools/{lldb-vscode/breakpoint/TestVSCode_setFunctionBreakpoints.py => lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py} (88%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/breakpoint/main.cpp (82%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/breakpoint/other.c (100%)
 create mode 100644 lldb/test/API/tools/lldb-dap/categories
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/completions/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/completions/TestVSCode_completions.py => lldb-dap/completions/TestDAP_completions.py} (81%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/completions/main.cpp (81%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/console/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/console/TestVSCode_console.py => lldb-dap/console/TestDAP_console.py} (83%)
 rename lldb/test/API/tools/{lldb-vscode/console/TestVSCode_redirection_to_console.py => lldb-dap/console/TestDAP_redirection_to_console.py} (66%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/console/main.cpp (100%)
 rename lldb/test/API/tools/{lldb-vscode/coreFile/TestVSCode_coreFile.py => lldb-dap/coreFile/TestDAP_coreFile.py} (92%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/coreFile/linux-x86_64.core (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/coreFile/linux-x86_64.out (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/coreFile/main.c (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/correct-thread/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/correct-thread/TestVSCode_correct_thread.py => lldb-dap/correct-thread/TestDAP_correct_thread.py} (86%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/correct-thread/main.c (84%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/disassemble/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/disassemble/TestVSCode_disassemble.py => lldb-dap/disassemble/TestDAP_disassemble.py} (89%)
 create mode 100644 lldb/test/API/tools/lldb-dap/disassemble/main.c
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/disconnect/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/disconnect/TestVSCode_disconnect.py => lldb-dap/disconnect/TestDAP_disconnect.py} (87%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/disconnect/main.cpp (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/evaluate/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/evaluate/TestVSCode_evaluate.py => lldb-dap/evaluate/TestDAP_evaluate.py} (95%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/evaluate/foo.cpp (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/evaluate/foo.h (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/evaluate/main.cpp (94%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/exception/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/exception/TestVSCode_exception.py => lldb-dap/exception/TestDAP_exception.py} (74%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/exception/main.cpp (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/launch/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/launch/TestVSCode_launch.py => lldb-dap/launch/TestDAP_launch.py} (94%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/launch/main.c (77%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/module/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/module/TestVSCode_module.py => lldb-dap/module/TestDAP_module.py} (88%)
 create mode 100644 lldb/test/API/tools/lldb-dap/module/foo.cpp
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/module/foo.h (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/module/main.cpp (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/optimized/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/optimized/TestVSCode_optimized.py => lldb-dap/optimized/TestDAP_optimized.py} (81%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/optimized/main.cpp (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/restart/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/restart/TestVSCode_restart.py => lldb-dap/restart/TestDAP_restart.py} (79%)
 rename lldb/test/API/tools/{lldb-vscode/restart/TestVSCode_restart_runInTerminal.py => lldb-dap/restart/TestDAP_restart_runInTerminal.py} (80%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/restart/main.c (86%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/runInTerminal/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py => lldb-dap/runInTerminal/TestDAP_runInTerminal.py} (84%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/runInTerminal/main.c (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/stackTrace/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/stackTrace/TestVSCode_stackTrace.py => lldb-dap/stackTrace/TestDAP_stackTrace.py} (97%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/stackTrace/main.c (65%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/stackTraceMissingFunctionName/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/stackTraceMissingFunctionName/TestVSCode_stackTraceMissingFunctionName.py => lldb-dap/stackTraceMissingFunctionName/TestDAP_stackTraceMissingFunctionName.py} (77%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/stackTraceMissingFunctionName/main.cpp (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/startDebugging/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/startDebugging/TestVSCode_startDebugging.py => lldb-dap/startDebugging/TestDAP_startDebugging.py} (64%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/startDebugging/main.c (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/step/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/step/TestVSCode_step.py => lldb-dap/step/TestDAP_step.py} (94%)
 create mode 100644 lldb/test/API/tools/lldb-dap/step/main.cpp
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/stop-hooks/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/stop-hooks/TestVSCode_stop_hooks.py => lldb-dap/stop-hooks/TestDAP_stop_hooks.py} (82%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/stop-hooks/main.c (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/terminated-event/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py => lldb-dap/terminated-event/TestDAP_terminatedEvent.py} (89%)
 create mode 100644 lldb/test/API/tools/lldb-dap/terminated-event/foo.cpp
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/terminated-event/foo.h (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/terminated-event/main.cpp (100%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/variables/Makefile (100%)
 rename lldb/test/API/tools/{lldb-vscode/variables/TestVSCode_variables.py => lldb-dap/variables/TestDAP_variables.py} (89%)
 rename lldb/test/API/tools/{lldb-vscode => lldb-dap}/variables/main.cpp (90%)
 delete mode 100644 lldb/test/API/tools/lldb-vscode/categories
 delete mode 100644 lldb/test/API/tools/lldb-vscode/disassemble/main.c
 delete mode 100644 lldb/test/API/tools/lldb-vscode/module/foo.cpp
 delete mode 100644 lldb/test/API/tools/lldb-vscode/step/main.cpp
 delete mode 100644 lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp
 rename lldb/test/Shell/{VSCode => DAP}/TestOptions.test (70%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/BreakpointBase.cpp (97%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/BreakpointBase.h (93%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/CMakeLists.txt (79%)
 rename lldb/tools/{lldb-vscode/VSCode.cpp => lldb-dap/DAP.cpp} (86%)
 rename lldb/tools/{lldb-vscode/VSCode.h => lldb-dap/DAP.h} (96%)
 rename lldb/tools/{lldb-vscode/VSCodeForward.h => lldb-dap/DAPForward.h} (81%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/ExceptionBreakpoint.cpp (84%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/ExceptionBreakpoint.h (83%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/FifoFiles.cpp (98%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/FifoFiles.h (93%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/FunctionBreakpoint.cpp (87%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/FunctionBreakpoint.h (80%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/IOStream.cpp (99%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/IOStream.h (93%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/JSONUtils.cpp (98%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/JSONUtils.h (94%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/LLDBUtils.cpp (92%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/LLDBUtils.h (82%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/Options.td (85%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/OutputRedirector.cpp (96%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/OutputRedirector.h (77%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/ProgressEvent.cpp (99%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/ProgressEvent.h (97%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/README.md (83%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/RunInTerminal.cpp (98%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/RunInTerminal.h (94%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/SourceBreakpoint.cpp (88%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/SourceBreakpoint.h (87%)
 rename lldb/tools/{lldb-vscode/lldb-vscode-Info.plist.in => lldb-dap/lldb-dap-Info.plist.in} (88%)
 rename lldb/tools/{lldb-vscode/lldb-vscode.cpp => lldb-dap/lldb-dap.cpp} (88%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/package.json (97%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/syntaxes/arm.disasm (100%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/syntaxes/arm64.disasm (100%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/syntaxes/disassembly.json (100%)
 rename lldb/tools/{lldb-vscode => lldb-dap}/syntaxes/x86.disasm (100%)
 create mode 120000 lldb/tools/lldb-vscode

diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py
index a27d8cf321f8293..a639714480cf4eb 100644
--- a/lldb/packages/Python/lldbsuite/test/dotest.py
+++ b/lldb/packages/Python/lldbsuite/test/dotest.py
@@ -485,15 +485,15 @@ def setupSysPath():
     os.environ["LLDB_SRC"] = lldbsuite.lldb_root
 
     pluginPath = os.path.join(scriptPath, "plugins")
-    toolsLLDBVSCode = os.path.join(scriptPath, "tools", "lldb-vscode")
+    toolsLLDBDAP = os.path.join(scriptPath, "tools", "lldb-dap")
     toolsLLDBServerPath = os.path.join(scriptPath, "tools", "lldb-server")
     intelpt = os.path.join(scriptPath, "tools", "intelpt")
 
     # Insert script dir, plugin dir and lldb-server dir to the sys.path.
     sys.path.insert(0, pluginPath)
-    # Adding test/tools/lldb-vscode to the path makes it easy to
-    # "import lldb_vscode_testcase" from the VSCode tests
-    sys.path.insert(0, toolsLLDBVSCode)
+    # Adding test/tools/lldb-dap to the path makes it easy to
+    # "import lldb_dap_testcase" from the DAP tests
+    sys.path.insert(0, toolsLLDBDAP)
     # Adding test/tools/lldb-server to the path makes it easy
     # to "import lldbgdbserverutils" from the lldb-server tests
     sys.path.insert(0, toolsLLDBServerPath)
@@ -538,15 +538,15 @@ def setupSysPath():
 
     lldbDir = os.path.dirname(lldbtest_config.lldbExec)
 
-    lldbVSCodeExec = os.path.join(lldbDir, "lldb-vscode")
-    if is_exe(lldbVSCodeExec):
-        os.environ["LLDBVSCODE_EXEC"] = lldbVSCodeExec
+    lldbDAPExec = os.path.join(lldbDir, "lldb-dap")
+    if is_exe(lldbDAPExec):
+        os.environ["LLDBDAP_EXEC"] = lldbDAPExec
     else:
-        if not configuration.shouldSkipBecauseOfCategories(["lldb-vscode"]):
+        if not configuration.shouldSkipBecauseOfCategories(["lldb-dap"]):
             print(
-                "The 'lldb-vscode' executable cannot be located.  The lldb-vscode tests can not be run as a result."
+                "The 'lldb-dap' executable cannot be located.  The lldb-dap tests can not be run as a result."
             )
-            configuration.skip_categories.append("lldb-vscode")
+            configuration.skip_categories.append("lldb-dap")
 
     lldbPythonDir = None  # The directory that contains 'lldb/__init__.py'
 
diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py
index c8670b208ec3f0c..6d736a56ecb89bf 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbtest.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py
@@ -801,10 +801,10 @@ def setUp(self):
         else:
             self.libcxxPath = None
 
-        if "LLDBVSCODE_EXEC" in os.environ:
-            self.lldbVSCodeExec = os.environ["LLDBVSCODE_EXEC"]
+        if "LLDBDAP_EXEC" in os.environ:
+            self.lldbDAPExec = os.environ["LLDBDAP_EXEC"]
         else:
-            self.lldbVSCodeExec = None
+            self.lldbDAPExec = None
 
         self.lldbOption = " ".join("-o '" + s + "'" for s in self.setUpCommands())
 
diff --git a/lldb/packages/Python/lldbsuite/test/test_categories.py b/lldb/packages/Python/lldbsuite/test/test_categories.py
index 3883c4de5e19957..3f8de175e29df3f 100644
--- a/lldb/packages/Python/lldbsuite/test/test_categories.py
+++ b/lldb/packages/Python/lldbsuite/test/test_categories.py
@@ -31,7 +31,7 @@
     "libc++": "Test for libc++ data formatters",
     "libstdcxx": "Test for libstdcxx data formatters",
     "lldb-server": "Tests related to lldb-server",
-    "lldb-vscode": "Visual Studio Code debug adaptor tests",
+    "lldb-dap": "Tests for the Debug Adaptor Protocol with lldb-dap",
     "llgs": "Tests for the gdb-server functionality of lldb-server",
     "objc": "Tests related to the Objective-C programming language support",
     "pyapi": "Tests related to the Python API",
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
similarity index 99%
rename from lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py
rename to lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index 5ee0800b27a5699..ca11f34be450c9a 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -1013,7 +1013,7 @@ def terminate(self):
         # self.recv.close()
 
 
-class DebugAdaptor(DebugCommunication):
+class DebugAdaptorServer(DebugCommunication):
     def __init__(
         self, executable=None, port=None, init_commands=[], log_file=None, env=None
     ):
@@ -1024,7 +1024,7 @@ def __init__(
                 adaptor_env.update(env)
 
             if log_file:
-                adaptor_env["LLDBVSCODE_LOG"] = log_file
+                adaptor_env["LLDBDAP_LOG"] = log_file
             self.process = subprocess.Popen(
                 [executable],
                 stdin=subprocess.PIPE,
@@ -1048,7 +1048,7 @@ def get_pid(self):
         return -1
 
     def terminate(self):
-        super(DebugAdaptor, self).terminate()
+        super(DebugAdaptorServer, self).terminate()
         if self.process is not None:
             self.process.terminate()
             self.process.wait()
@@ -1364,7 +1364,7 @@ def main():
             "using the --port option"
         )
         return
-    dbg = DebugAdaptor(executable=options.vscode_path, port=options.port)
+    dbg = DebugAdaptorServer(executable=options.vscode_path, port=options.port)
     if options.debug:
         raw_input('Waiting for debugger to attach pid "%i"' % (dbg.get_pid()))
     if options.replay:
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
similarity index 82%
rename from lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py
rename to lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
index 8cd4e8454c89099..3094b66af4792db 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
@@ -1,29 +1,29 @@
 import os
 import time
 
-import vscode
+import dap_server
 from lldbsuite.test.lldbtest import *
 
 
-class VSCodeTestCaseBase(TestBase):
+class DAPTestCaseBase(TestBase):
     NO_DEBUG_INFO_TESTCASE = True
 
-    def create_debug_adaptor(self, lldbVSCodeEnv=None):
+    def create_debug_adaptor(self, lldbDAPEnv=None):
         """Create the Visual Studio Code debug adaptor"""
         self.assertTrue(
-            is_exe(self.lldbVSCodeExec), "lldb-vscode must exist and be executable"
+            is_exe(self.lldbDAPExec), "lldb-dap must exist and be executable"
         )
-        log_file_path = self.getBuildArtifact("vscode.txt")
-        self.vscode = vscode.DebugAdaptor(
-            executable=self.lldbVSCodeExec,
+        log_file_path = self.getBuildArtifact("dap.txt")
+        self.dap_server = dap_server.DebugAdaptorServer(
+            executable=self.lldbDAPExec,
             init_commands=self.setUpCommands(),
             log_file=log_file_path,
-            env=lldbVSCodeEnv,
+            env=lldbDAPEnv,
         )
 
-    def build_and_create_debug_adaptor(self, lldbVSCodeEnv=None):
+    def build_and_create_debug_adaptor(self, lldbDAPEnv=None):
         self.build()
-        self.create_debug_adaptor(lldbVSCodeEnv)
+        self.create_debug_adaptor(lldbDAPEnv)
 
     def set_source_breakpoints(self, source_path, lines, data=None):
         """Sets source breakpoints and returns an array of strings containing
@@ -32,7 +32,7 @@ def set_source_breakpoints(self, source_path, lines, data=None):
         Each object in data is 1:1 mapping with the entry in lines.
         It contains optional location/hitCondition/logMessage parameters.
         """
-        response = self.vscode.request_setBreakpoints(source_path, lines, data)
+        response = self.dap_server.request_setBreakpoints(source_path, lines, data)
         if response is None:
             return []
         breakpoints = response["body"]["breakpoints"]
@@ -46,7 +46,7 @@ def set_function_breakpoints(self, functions, condition=None, hitCondition=None)
         and returns an array of strings containing the breakpoint IDs
         ("1", "2") for each breakpoint that was set.
         """
-        response = self.vscode.request_setFunctionBreakpoints(
+        response = self.dap_server.request_setFunctionBreakpoints(
             functions, condition=condition, hitCondition=hitCondition
         )
         if response is None:
@@ -70,7 +70,7 @@ def verify_breakpoint_hit(self, breakpoint_ids):
         "breakpoint_ids" should be a list of breakpoint ID strings
         (["1", "2"]). The return value from self.set_source_breakpoints()
         or self.set_function_breakpoints() can be passed to this function"""
-        stopped_events = self.vscode.wait_for_stopped()
+        stopped_events = self.dap_server.wait_for_stopped()
         for stopped_event in stopped_events:
             if "body" in stopped_event:
                 body = stopped_event["body"]
@@ -83,7 +83,7 @@ def verify_breakpoint_hit(self, breakpoint_ids):
                 # Descriptions for breakpoints will be in the form
                 # "breakpoint 1.1", so look for any description that matches
                 # ("breakpoint 1.") in the description field as verification
-                # that one of the breakpoint locations was hit. VSCode doesn't
+                # that one of the breakpoint locations was hit. DAP doesn't
                 # allow breakpoints to have multiple locations, but LLDB does.
                 # So when looking at the description we just want to make sure
                 # the right breakpoint matches and not worry about the actual
@@ -100,7 +100,7 @@ def verify_stop_exception_info(self, expected_description):
         reason is 'exception' and that the description matches
         'expected_description'
         """
-        stopped_events = self.vscode.wait_for_stopped()
+        stopped_events = self.dap_server.wait_for_stopped()
         for stopped_event in stopped_events:
             if "body" in stopped_event:
                 body = stopped_event["body"]
@@ -150,7 +150,7 @@ def get_dict_value(self, d, key_path):
     def get_stackFrames_and_totalFramesCount(
         self, threadId=None, startFrame=None, levels=None, dump=False
     ):
-        response = self.vscode.request_stackTrace(
+        response = self.dap_server.request_stackTrace(
             threadId=threadId, startFrame=startFrame, levels=levels, dump=dump
         )
         if response:
@@ -185,16 +185,16 @@ def get_source_and_line(self, threadId=None, frameIndex=0):
         return ("", 0)
 
     def get_stdout(self, timeout=0.0):
-        return self.vscode.get_output("stdout", timeout=timeout)
+        return self.dap_server.get_output("stdout", timeout=timeout)
 
     def get_console(self, timeout=0.0):
-        return self.vscode.get_output("console", timeout=timeout)
+        return self.dap_server.get_output("console", timeout=timeout)
 
     def collect_console(self, duration):
-        return self.vscode.collect_output("console", duration=duration)
+        return self.dap_server.collect_output("console", duration=duration)
 
     def get_local_as_int(self, name, threadId=None):
-        value = self.vscode.get_local_variable_value(name, threadId=threadId)
+        value = self.dap_server.get_local_variable_value(name, threadId=threadId)
         if value.startswith("0x"):
             return int(value, 16)
         elif value.startswith("0"):
@@ -204,48 +204,48 @@ def get_local_as_int(self, name, threadId=None):
 
     def set_local(self, name, value, id=None):
         """Set a top level local variable only."""
-        return self.vscode.request_setVariable(1, name, str(value), id=id)
+        return self.dap_server.request_setVariable(1, name, str(value), id=id)
 
     def set_global(self, name, value, id=None):
         """Set a top level global variable only."""
-        return self.vscode.request_setVariable(2, name, str(value), id=id)
+        return self.dap_server.request_setVariable(2, name, str(value), id=id)
 
     def stepIn(self, threadId=None, waitForStop=True):
-        self.vscode.request_stepIn(threadId=threadId)
+        self.dap_server.request_stepIn(threadId=threadId)
         if waitForStop:
-            return self.vscode.wait_for_stopped()
+            return self.dap_server.wait_for_stopped()
         return None
 
     def stepOver(self, threadId=None, waitForStop=True):
-        self.vscode.request_next(threadId=threadId)
+        self.dap_server.request_next(threadId=threadId)
         if waitForStop:
-            return self.vscode.wait_for_stopped()
+            return self.dap_server.wait_for_stopped()
         return None
 
     def stepOut(self, threadId=None, waitForStop=True):
-        self.vscode.request_stepOut(threadId=threadId)
+        self.dap_server.request_stepOut(threadId=threadId)
         if waitForStop:
-            return self.vscode.wait_for_stopped()
+            return self.dap_server.wait_for_stopped()
         return None
 
     def continue_to_next_stop(self):
-        self.vscode.request_continue()
-        return self.vscode.wait_for_stopped()
+        self.dap_server.request_continue()
+        return self.dap_server.wait_for_stopped()
 
     def continue_to_breakpoints(self, breakpoint_ids):
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
         self.verify_breakpoint_hit(breakpoint_ids)
 
     def continue_to_exception_breakpoint(self, filter_label):
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
         self.assertTrue(
             self.verify_stop_exception_info(filter_label),
             'verify we got "%s"' % (filter_label),
         )
 
     def continue_to_exit(self, exitCode=0):
-        self.vscode.request_continue()
-        stopped_events = self.vscode.wait_for_stopped()
+        self.dap_server.request_continue()
+        stopped_events = self.dap_server.wait_for_stopped()
         self.assertEquals(
             len(stopped_events), 1, "stopped_events = {}".format(stopped_events)
         )
@@ -266,10 +266,10 @@ def disassemble(self, threadId=None, frameIndex=None):
         memoryReference = stackFrames[0]["instructionPointerReference"]
         self.assertIsNotNone(memoryReference)
 
-        if memoryReference not in self.vscode.disassembled_instructions:
-            self.vscode.request_disassemble(memoryReference=memoryReference)
+        if memoryReference not in self.dap_server.disassembled_instructions:
+            self.dap_server.request_disassemble(memoryReference=memoryReference)
 
-        return self.vscode.disassembled_instructions[memoryReference]
+        return self.dap_server.disassembled_instructions[memoryReference]
 
     def attach(
         self,
@@ -289,22 +289,22 @@ def attach(
         sourceMap=None,
         sourceInitFile=False,
     ):
-        """Build the default Makefile target, create the VSCode debug adaptor,
+        """Build the default Makefile target, create the DAP debug adaptor,
         and attach to the process.
         """
 
-        # Make sure we disconnect and terminate the VSCode debug adaptor even
+        # Make sure we disconnect and terminate the DAP debug adaptor even
         # if we throw an exception during the test case.
         def cleanup():
             if disconnectAutomatically:
-                self.vscode.request_disconnect(terminateDebuggee=True)
-            self.vscode.terminate()
+                self.dap_server.request_disconnect(terminateDebuggee=True)
+            self.dap_server.terminate()
 
         # Execute the cleanup function during test case tear down.
         self.addTearDownHook(cleanup)
         # Initialize and launch the program
-        self.vscode.request_initialize(sourceInitFile)
-        response = self.vscode.request_attach(
+        self.dap_server.request_initialize(sourceInitFile)
+        response = self.dap_server.request_attach(
             program=program,
             pid=pid,
             waitFor=waitFor,
@@ -352,21 +352,21 @@ def launch(
         enableAutoVariableSummaries=False,
         enableSyntheticChildDebugging=False,
     ):
-        """Sending launch request to vscode"""
+        """Sending launch request to dap"""
 
-        # Make sure we disconnect and terminate the VSCode debug adapter,
+        # Make sure we disconnect and terminate the DAP debug adapter,
         # if we throw an exception during the test case
         def cleanup():
             if disconnectAutomatically:
-                self.vscode.request_disconnect(terminateDebuggee=True)
-            self.vscode.terminate()
+                self.dap_server.request_disconnect(terminateDebuggee=True)
+            self.dap_server.terminate()
 
         # Execute the cleanup function during test case tear down.
         self.addTearDownHook(cleanup)
 
         # Initialize and launch the program
-        self.vscode.request_initialize(sourceInitFile)
-        response = self.vscode.request_launch(
+        self.dap_server.request_initialize(sourceInitFile)
+        response = self.dap_server.request_launch(
             program,
             args=args,
             cwd=cwd,
@@ -422,14 +422,14 @@ def build_and_launch(
         runInTerminal=False,
         disconnectAutomatically=True,
         postRunCommands=None,
-        lldbVSCodeEnv=None,
+        lldbDAPEnv=None,
         enableAutoVariableSummaries=False,
         enableSyntheticChildDebugging=False,
     ):
-        """Build the default Makefile target, create the VSCode debug adaptor,
+        """Build the default Makefile target, create the DAP debug adaptor,
         and launch the process.
         """
-        self.build_and_create_debug_adaptor(lldbVSCodeEnv)
+        self.build_and_create_debug_adaptor(lldbDAPEnv)
         self.assertTrue(os.path.exists(program), "executable must exist")
 
         return self.launch(
diff --git a/lldb/test/API/tools/lldb-vscode/attach/Makefile b/lldb/test/API/tools/lldb-dap/attach/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/attach/Makefile
rename to lldb/test/API/tools/lldb-dap/attach/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
similarity index 96%
rename from lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py
rename to lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
index a53dac77e39d3bd..04017a0521e9557 100644
--- a/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py
+++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py
@@ -1,13 +1,13 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import os
 import shutil
 import subprocess
@@ -25,7 +25,7 @@ def spawn_and_wait(program, delay):
     process.wait()
 
 
-class TestVSCode_attach(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_attach(lldbdap_testcase.DAPTestCaseBase):
     def set_and_hit_breakpoint(self, continueToExit=True):
         source = "main.c"
         breakpoint1_line = line_number(source, "// breakpoint 1")
@@ -190,10 +190,10 @@ def test_commands(self):
         # Continue after launch and hit the "pause()" call and stop the target.
         # Get output from the console. This should contain both the
         # "stopCommands" that were run after we stop.
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
         time.sleep(0.5)
-        self.vscode.request_pause()
-        self.vscode.wait_for_stopped()
+        self.dap_server.request_pause()
+        self.dap_server.wait_for_stopped()
         output = self.get_console(timeout=1.0)
         self.verify_commands("stopCommands", output, stopCommands)
 
@@ -236,6 +236,6 @@ def test_terminate_commands(self):
         self.get_console()
         # Once it's disconnected the console should contain the
         # "terminateCommands"
-        self.vscode.request_disconnect(terminateDebuggee=True)
+        self.dap_server.request_disconnect(terminateDebuggee=True)
         output = self.collect_console(duration=1.0)
         self.verify_commands("terminateCommands", output, terminateCommands)
diff --git a/lldb/test/API/tools/lldb-vscode/attach/main.c b/lldb/test/API/tools/lldb-dap/attach/main.c
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/attach/main.c
rename to lldb/test/API/tools/lldb-dap/attach/main.c
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint-events/Makefile b/lldb/test/API/tools/lldb-dap/breakpoint-events/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/breakpoint-events/Makefile
rename to lldb/test/API/tools/lldb-dap/breakpoint-events/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint-events/TestVSCode_breakpointEvents.py b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
similarity index 83%
rename from lldb/test/API/tools/lldb-vscode/breakpoint-events/TestVSCode_breakpointEvents.py
rename to lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
index 51996d03d9b1704..a20384b75f5c099 100644
--- a/lldb/test/API/tools/lldb-vscode/breakpoint-events/TestVSCode_breakpointEvents.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py
@@ -1,17 +1,17 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import os
 
 
-class TestVSCode_breakpointEvents(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_breakpointEvents(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipUnlessDarwin
     @expectedFailureAll(macos_version=[">=", "13.0"])
@@ -48,7 +48,7 @@ def test_breakpoint_events(self):
 
         # Set a breakpoint after creating the target by running a command line
         # command. It will eventually resolve and cause a breakpoint changed
-        # event to be sent to lldb-vscode. We want to make sure we don't send a
+        # event to be sent to lldb-dap. We want to make sure we don't send a
         # breakpoint any breakpoints that were set from the command line.
         # Breakpoints that are set via the VS code DAP packets will be
         # registered and marked with a special keyword to ensure we deliver
@@ -59,24 +59,28 @@ def test_breakpoint_events(self):
         main_bp_id = 0
         foo_bp_id = 0
         # Set breakpoints and verify that they got set correctly
-        vscode_breakpoint_ids = []
-        response = self.vscode.request_setBreakpoints(main_source_path, [main_bp_line])
+        dap_breakpoint_ids = []
+        response = self.dap_server.request_setBreakpoints(
+            main_source_path, [main_bp_line]
+        )
         if response:
             breakpoints = response["body"]["breakpoints"]
             for breakpoint in breakpoints:
                 main_bp_id = breakpoint["id"]
-                vscode_breakpoint_ids.append("%i" % (main_bp_id))
+                dap_breakpoint_ids.append("%i" % (main_bp_id))
                 # line = breakpoint['line']
                 self.assertTrue(
                     breakpoint["verified"], "expect main breakpoint to be verified"
                 )
 
-        response = self.vscode.request_setBreakpoints(foo_source_path, [foo_bp1_line])
+        response = self.dap_server.request_setBreakpoints(
+            foo_source_path, [foo_bp1_line]
+        )
         if response:
             breakpoints = response["body"]["breakpoints"]
             for breakpoint in breakpoints:
                 foo_bp_id = breakpoint["id"]
-                vscode_breakpoint_ids.append("%i" % (foo_bp_id))
+                dap_breakpoint_ids.append("%i" % (foo_bp_id))
                 self.assertFalse(
                     breakpoint["verified"], "expect foo breakpoint to not be verified"
                 )
@@ -88,21 +92,23 @@ def test_breakpoint_events(self):
         # libraries are not loaded yet (at least on macOS they aren't) and any
         # breakpoints set in foo.cpp should not be resolved.
         self.assertEqual(
-            len(self.vscode.breakpoint_events),
+            len(self.dap_server.breakpoint_events),
             0,
             "no breakpoint events when stopped at entry point",
         )
 
         # Continue to the breakpoint
-        self.continue_to_breakpoints(vscode_breakpoint_ids)
+        self.continue_to_breakpoints(dap_breakpoint_ids)
 
         # Make sure we only get an event for the breakpoint we set via a call
-        # to self.vscode.request_setBreakpoints(...), not the breakpoint
+        # to self.dap_server.request_setBreakpoints(...), not the breakpoint
         # we set with with a LLDB command in preRunCommands.
         self.assertEqual(
-            len(self.vscode.breakpoint_events), 1, "make sure we got a breakpoint event"
+            len(self.dap_server.breakpoint_events),
+            1,
+            "make sure we got a breakpoint event",
         )
-        event = self.vscode.breakpoint_events[0]
+        event = self.dap_server.breakpoint_events[0]
         # Verify the details of the breakpoint changed notification.
         body = event["body"]
         self.assertEqual(
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint-events/foo.cpp b/lldb/test/API/tools/lldb-dap/breakpoint-events/foo.cpp
similarity index 91%
rename from lldb/test/API/tools/lldb-vscode/breakpoint-events/foo.cpp
rename to lldb/test/API/tools/lldb-dap/breakpoint-events/foo.cpp
index fa4f5657e69b281..7a4f90d7dd58171 100644
--- a/lldb/test/API/tools/lldb-vscode/breakpoint-events/foo.cpp
+++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/foo.cpp
@@ -7,5 +7,5 @@ static void unique_function_name() {
 int foo(int x) {
   // foo breakpoint 1
   unique_function_name();
-  return x+42;
+  return x + 42;
 }
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint-events/foo.h b/lldb/test/API/tools/lldb-dap/breakpoint-events/foo.h
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/breakpoint-events/foo.h
rename to lldb/test/API/tools/lldb-dap/breakpoint-events/foo.h
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint-events/main.cpp b/lldb/test/API/tools/lldb-dap/breakpoint-events/main.cpp
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/breakpoint-events/main.cpp
rename to lldb/test/API/tools/lldb-dap/breakpoint-events/main.cpp
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint/Makefile b/lldb/test/API/tools/lldb-dap/breakpoint/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/breakpoint/Makefile
rename to lldb/test/API/tools/lldb-dap/breakpoint/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_logpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_logpoints.py
similarity index 94%
rename from lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_logpoints.py
rename to lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_logpoints.py
index f530542749f7da4..25e794a49d3ac12 100644
--- a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_logpoints.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_logpoints.py
@@ -1,20 +1,20 @@
 """
-Test lldb-vscode logpoints feature.
+Test lldb-dap logpoints feature.
 """
 
 
-import vscode
+import dap_server
 import shutil
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import os
 
 
-class TestVSCode_logpoints(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_logpoints(lldbdap_testcase.DAPTestCaseBase):
     def setUp(self):
-        lldbvscode_testcase.VSCodeTestCaseBase.setUp(self)
+        lldbdap_testcase.DAPTestCaseBase.setUp(self)
 
         self.main_basename = "main-copy.cpp"
         self.main_path = os.path.realpath(self.getBuildArtifact(self.main_basename))
@@ -36,7 +36,7 @@ def test_logmessage_basic(self):
         )
         self.assertEquals(len(before_loop_breakpoint_ids), 1, "expect one breakpoint")
 
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
 
         # Verify we hit the breakpoint before loop line
         self.verify_breakpoint_hit(before_loop_breakpoint_ids)
@@ -56,7 +56,7 @@ def test_logmessage_basic(self):
         )
 
         # Continue to trigger the breakpoint with log messages
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
 
         # Verify we hit the breakpoint after loop line
         self.verify_breakpoint_hit([post_loop_breakpoint_id])
@@ -94,7 +94,7 @@ def test_logmessage_advanced(self):
         )
         self.assertEquals(len(before_loop_breakpoint_ids), 1, "expect one breakpoint")
 
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
 
         # Verify we hit the breakpoint before loop line
         self.verify_breakpoint_hit(before_loop_breakpoint_ids)
@@ -117,7 +117,7 @@ def test_logmessage_advanced(self):
         )
 
         # Continue to trigger the breakpoint with log messages
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
 
         # Verify we hit the breakpoint after loop line
         self.verify_breakpoint_hit([post_loop_breakpoint_id])
@@ -157,7 +157,7 @@ def test_logmessage_format(self):
         )
         self.assertEquals(len(before_loop_breakpoint_ids), 1, "expect one breakpoint")
 
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
 
         # Verify we hit the breakpoint before loop line
         self.verify_breakpoint_hit(before_loop_breakpoint_ids)
@@ -179,7 +179,7 @@ def test_logmessage_format(self):
         )
 
         # Continue to trigger the breakpoint with log messages
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
 
         # Verify we hit the breakpoint after loop line
         self.verify_breakpoint_hit([post_loop_breakpoint_id])
@@ -222,7 +222,7 @@ def test_logmessage_format_failure(self):
         )
         self.assertEquals(len(before_loop_breakpoint_ids), 1, "expect one breakpoint")
 
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
 
         # Verify we hit the breakpoint before loop line
         self.verify_breakpoint_hit(before_loop_breakpoint_ids)
@@ -244,7 +244,7 @@ def test_logmessage_format_failure(self):
         )
 
         # Continue to trigger the breakpoint with log messages
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
 
         # Verify we hit logpoint breakpoint if it's format has error.
         self.verify_breakpoint_hit([loop_breakpoint_id])
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py
similarity index 89%
rename from lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setBreakpoints.py
rename to lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py
index a918ed42f5a07ea..3d3252a78b19aac 100644
--- a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setBreakpoints.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setBreakpoints.py
@@ -1,20 +1,20 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
 
-import vscode
+import dap_server
 import shutil
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import os
 
 
-class TestVSCode_setBreakpoints(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_setBreakpoints(lldbdap_testcase.DAPTestCaseBase):
     def setUp(self):
-        lldbvscode_testcase.VSCodeTestCaseBase.setUp(self)
+        lldbdap_testcase.DAPTestCaseBase.setUp(self)
 
         self.main_basename = "main-copy.cpp"
         self.main_path = os.path.realpath(self.getBuildArtifact(self.main_basename))
@@ -58,7 +58,7 @@ def test_source_map(self):
         self.launch(program, sourceMap=source_map)
 
         # breakpoint in main.cpp
-        response = self.vscode.request_setBreakpoints(new_main_path, [main_line])
+        response = self.dap_server.request_setBreakpoints(new_main_path, [main_line])
         breakpoints = response["body"]["breakpoints"]
         self.assertEquals(len(breakpoints), 1)
         breakpoint = breakpoints[0]
@@ -68,7 +68,7 @@ def test_source_map(self):
         self.assertEqual(new_main_path, breakpoint["source"]["path"])
 
         # 2nd breakpoint, which is from a dynamically loaded library
-        response = self.vscode.request_setBreakpoints(new_other_path, [other_line])
+        response = self.dap_server.request_setBreakpoints(new_other_path, [other_line])
         breakpoints = response["body"]["breakpoints"]
         breakpoint = breakpoints[0]
         self.assertEqual(breakpoint["line"], other_line)
@@ -77,11 +77,11 @@ def test_source_map(self):
         self.assertEqual(new_other_path, breakpoint["source"]["path"])
         other_breakpoint_id = breakpoint["id"]
 
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
         self.verify_breakpoint_hit([other_breakpoint_id])
 
         # 2nd breakpoint again, which should be valid at this point
-        response = self.vscode.request_setBreakpoints(new_other_path, [other_line])
+        response = self.dap_server.request_setBreakpoints(new_other_path, [other_line])
         breakpoints = response["body"]["breakpoints"]
         breakpoint = breakpoints[0]
         self.assertEqual(breakpoint["line"], other_line)
@@ -90,7 +90,7 @@ def test_source_map(self):
         self.assertEqual(new_other_path, breakpoint["source"]["path"])
 
         # now we check the stack trace making sure that we got mapped source paths
-        frames = self.vscode.request_stackTrace()["body"]["stackFrames"]
+        frames = self.dap_server.request_stackTrace()["body"]["stackFrames"]
 
         self.assertEqual(frames[0]["source"]["name"], other_basename)
         self.assertEqual(frames[0]["source"]["path"], new_other_path)
@@ -125,7 +125,7 @@ def test_set_and_clear(self):
         self.build_and_launch(program)
 
         # Set 3 breakpoints and verify that they got set correctly
-        response = self.vscode.request_setBreakpoints(self.main_path, lines)
+        response = self.dap_server.request_setBreakpoints(self.main_path, lines)
         line_to_id = {}
         if response:
             breakpoints = response["body"]["breakpoints"]
@@ -152,7 +152,7 @@ def test_set_and_clear(self):
         lines.remove(second_line)
         # Set 2 breakpoints and verify that the previous breakpoints that were
         # set above are still set.
-        response = self.vscode.request_setBreakpoints(self.main_path, lines)
+        response = self.dap_server.request_setBreakpoints(self.main_path, lines)
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -179,7 +179,7 @@ def test_set_and_clear(self):
         # we have only 2 breakpoints set. The response above could have told
         # us about 2 breakpoints, but we want to make sure we don't have the
         # third one still set in the target
-        response = self.vscode.request_testGetTargetBreakpoints()
+        response = self.dap_server.request_testGetTargetBreakpoints()
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -204,7 +204,7 @@ def test_set_and_clear(self):
         # Now clear all breakpoints for the source file by passing down an
         # empty lines array
         lines = []
-        response = self.vscode.request_setBreakpoints(self.main_path, lines)
+        response = self.dap_server.request_setBreakpoints(self.main_path, lines)
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -214,7 +214,7 @@ def test_set_and_clear(self):
             )
 
         # Verify with the target that all breakpoints have been cleared
-        response = self.vscode.request_testGetTargetBreakpoints()
+        response = self.dap_server.request_testGetTargetBreakpoints()
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -226,7 +226,7 @@ def test_set_and_clear(self):
         # Now set a breakpoint again in the same source file and verify it
         # was added.
         lines = [second_line]
-        response = self.vscode.request_setBreakpoints(self.main_path, lines)
+        response = self.dap_server.request_setBreakpoints(self.main_path, lines)
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -245,7 +245,7 @@ def test_set_and_clear(self):
         # we have only 2 breakpoints set. The response above could have told
         # us about 2 breakpoints, but we want to make sure we don't have the
         # third one still set in the target
-        response = self.vscode.request_testGetTargetBreakpoints()
+        response = self.dap_server.request_testGetTargetBreakpoints()
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -278,7 +278,7 @@ def test_clear_breakpoints_unset_breakpoints(self):
         self.build_and_launch(program)
 
         # Set one breakpoint and verify that it got set correctly.
-        response = self.vscode.request_setBreakpoints(self.main_path, lines)
+        response = self.dap_server.request_setBreakpoints(self.main_path, lines)
         line_to_id = {}
         breakpoints = response["body"]["breakpoints"]
         self.assertEquals(
@@ -295,12 +295,12 @@ def test_clear_breakpoints_unset_breakpoints(self):
         # Now clear all breakpoints for the source file by not setting the
         # lines array.
         lines = None
-        response = self.vscode.request_setBreakpoints(self.main_path, lines)
+        response = self.dap_server.request_setBreakpoints(self.main_path, lines)
         breakpoints = response["body"]["breakpoints"]
         self.assertEquals(len(breakpoints), 0, "expect no source breakpoints")
 
         # Verify with the target that all breakpoints have been cleared.
-        response = self.vscode.request_testGetTargetBreakpoints()
+        response = self.dap_server.request_testGetTargetBreakpoints()
         breakpoints = response["body"]["breakpoints"]
         self.assertEquals(len(breakpoints), 0, "expect no source breakpoints")
 
@@ -317,13 +317,13 @@ def test_functionality(self):
         # hitCondition
         breakpoint_ids = self.set_source_breakpoints(self.main_path, [loop_line])
         self.assertEquals(len(breakpoint_ids), 1, "expect one breakpoint")
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
 
         # Verify we hit the breakpoint we just set
         self.verify_breakpoint_hit(breakpoint_ids)
 
         # Make sure i is zero at first breakpoint
-        i = int(self.vscode.get_local_variable_value("i"))
+        i = int(self.dap_server.get_local_variable_value("i"))
         self.assertEquals(i, 0, "i != 0 after hitting breakpoint")
 
         # Update the condition on our breakpoint
@@ -337,7 +337,7 @@ def test_functionality(self):
         )
 
         self.continue_to_breakpoints(breakpoint_ids)
-        i = int(self.vscode.get_local_variable_value("i"))
+        i = int(self.dap_server.get_local_variable_value("i"))
         self.assertEquals(i, 4, "i != 4 showing conditional works")
 
         new_breakpoint_ids = self.set_source_breakpoints(
@@ -352,11 +352,11 @@ def test_functionality(self):
 
         # Continue with a hitCondition of 2 and expect it to skip 1 value
         self.continue_to_breakpoints(breakpoint_ids)
-        i = int(self.vscode.get_local_variable_value("i"))
+        i = int(self.dap_server.get_local_variable_value("i"))
         self.assertEquals(i, 6, "i != 6 showing hitCondition works")
 
         # continue after hitting our hitCondition and make sure it only goes
         # up by 1
         self.continue_to_breakpoints(breakpoint_ids)
-        i = int(self.vscode.get_local_variable_value("i"))
+        i = int(self.dap_server.get_local_variable_value("i"))
         self.assertEquals(i, 7, "i != 7 showing post hitCondition hits every time")
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setExceptionBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py
similarity index 86%
rename from lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setExceptionBreakpoints.py
rename to lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py
index 9ce9b1144d04c7f..84d3f12490f3e41 100644
--- a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setExceptionBreakpoints.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setExceptionBreakpoints.py
@@ -1,16 +1,16 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_setExceptionBreakpoints(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_setExceptionBreakpoints(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_functionality(self):
@@ -34,7 +34,7 @@ def test_functionality(self):
         self.build_and_launch(program)
 
         filters = ["cpp_throw", "cpp_catch"]
-        response = self.vscode.request_setExceptionBreakpoints(filters)
+        response = self.dap_server.request_setExceptionBreakpoints(filters)
         if response:
             self.assertTrue(response["success"])
 
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setFunctionBreakpoints.py b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py
similarity index 88%
rename from lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setFunctionBreakpoints.py
rename to lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py
index 8748d0526a0e158..f90dc0f041ecb05 100644
--- a/lldb/test/API/tools/lldb-vscode/breakpoint/TestVSCode_setFunctionBreakpoints.py
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/TestDAP_setFunctionBreakpoints.py
@@ -1,16 +1,16 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_setFunctionBreakpoints(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_setFunctionBreakpoints(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_set_and_clear(self):
@@ -34,7 +34,7 @@ def test_set_and_clear(self):
         bp_id_12 = None
         functions = ["twelve"]
         # Set a function breakpoint at 'twelve'
-        response = self.vscode.request_setFunctionBreakpoints(functions)
+        response = self.dap_server.request_setFunctionBreakpoints(functions)
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -48,7 +48,7 @@ def test_set_and_clear(self):
 
         # Add an extra name and make sure we have two breakpoints after this
         functions.append("thirteen")
-        response = self.vscode.request_setFunctionBreakpoints(functions)
+        response = self.dap_server.request_setFunctionBreakpoints(functions)
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -62,7 +62,7 @@ def test_set_and_clear(self):
         # There is no breakpoint delete packet, clients just send another
         # setFunctionBreakpoints packet with the different function names.
         functions.remove("thirteen")
-        response = self.vscode.request_setFunctionBreakpoints(functions)
+        response = self.dap_server.request_setFunctionBreakpoints(functions)
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -83,7 +83,7 @@ def test_set_and_clear(self):
         # we have only 1 breakpoints set. The response above could have told
         # us about 1 breakpoints, but we want to make sure we don't have the
         # second one still set in the target
-        response = self.vscode.request_testGetTargetBreakpoints()
+        response = self.dap_server.request_testGetTargetBreakpoints()
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -103,7 +103,7 @@ def test_set_and_clear(self):
         # Now clear all breakpoints for the source file by passing down an
         # empty lines array
         functions = []
-        response = self.vscode.request_setFunctionBreakpoints(functions)
+        response = self.dap_server.request_setFunctionBreakpoints(functions)
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -113,7 +113,7 @@ def test_set_and_clear(self):
             )
 
         # Verify with the target that all breakpoints have been cleared
-        response = self.vscode.request_testGetTargetBreakpoints()
+        response = self.dap_server.request_testGetTargetBreakpoints()
         if response:
             breakpoints = response["body"]["breakpoints"]
             self.assertEquals(
@@ -140,7 +140,7 @@ def test_functionality(self):
         self.continue_to_breakpoints(breakpoint_ids)
 
         # Make sure i is zero at first breakpoint
-        i = int(self.vscode.get_local_variable_value("i"))
+        i = int(self.dap_server.get_local_variable_value("i"))
         self.assertEquals(i, 0, "i != 0 after hitting breakpoint")
 
         # Update the condition on our breakpoint
@@ -152,7 +152,7 @@ def test_functionality(self):
         )
 
         self.continue_to_breakpoints(breakpoint_ids)
-        i = int(self.vscode.get_local_variable_value("i"))
+        i = int(self.dap_server.get_local_variable_value("i"))
         self.assertEquals(i, 4, "i != 4 showing conditional works")
         new_breakpoint_ids = self.set_function_breakpoints(functions, hitCondition="2")
 
@@ -164,11 +164,11 @@ def test_functionality(self):
 
         # Continue with a hitCondition of 2 and expect it to skip 1 value
         self.continue_to_breakpoints(breakpoint_ids)
-        i = int(self.vscode.get_local_variable_value("i"))
+        i = int(self.dap_server.get_local_variable_value("i"))
         self.assertEquals(i, 6, "i != 6 showing hitCondition works")
 
         # continue after hitting our hitCondition and make sure it only goes
         # up by 1
         self.continue_to_breakpoints(breakpoint_ids)
-        i = int(self.vscode.get_local_variable_value("i"))
+        i = int(self.dap_server.get_local_variable_value("i"))
         self.assertEquals(i, 7, "i != 7 showing post hitCondition hits every time")
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint/main.cpp b/lldb/test/API/tools/lldb-dap/breakpoint/main.cpp
similarity index 82%
rename from lldb/test/API/tools/lldb-vscode/breakpoint/main.cpp
rename to lldb/test/API/tools/lldb-dap/breakpoint/main.cpp
index d4e0ac26dd11a74..935a63fab6d0c36 100644
--- a/lldb/test/API/tools/lldb-vscode/breakpoint/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/breakpoint/main.cpp
@@ -11,10 +11,10 @@ int thirteen(int i) {
 }
 
 namespace a {
-  int fourteen(int i) {
-    return 14 + i; // break 14
-  }
+int fourteen(int i) {
+  return 14 + i; // break 14
 }
+} // namespace a
 int main(int argc, char const *argv[]) {
 #if defined(__APPLE__)
   const char *libother_name = "libother.dylib";
@@ -35,11 +35,11 @@ int main(int argc, char const *argv[]) {
   }
   foo(12); // before loop
 
-  for (int i=0; i<10; ++i) {
+  for (int i = 0; i < 10; ++i) {
     int x = twelve(i) + thirteen(i) + a::fourteen(i); // break loop
   }
   try {
-    throw std::invalid_argument( "throwing exception for testing" );
+    throw std::invalid_argument("throwing exception for testing");
   } catch (...) {
     puts("caught exception...");
   }
diff --git a/lldb/test/API/tools/lldb-vscode/breakpoint/other.c b/lldb/test/API/tools/lldb-dap/breakpoint/other.c
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/breakpoint/other.c
rename to lldb/test/API/tools/lldb-dap/breakpoint/other.c
diff --git a/lldb/test/API/tools/lldb-dap/categories b/lldb/test/API/tools/lldb-dap/categories
new file mode 100644
index 000000000000000..0902bbb595aa6e2
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/categories
@@ -0,0 +1 @@
+lldb-dap
diff --git a/lldb/test/API/tools/lldb-vscode/completions/Makefile b/lldb/test/API/tools/lldb-dap/completions/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/completions/Makefile
rename to lldb/test/API/tools/lldb-dap/completions/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/completions/TestVSCode_completions.py b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
similarity index 81%
rename from lldb/test/API/tools/lldb-vscode/completions/TestVSCode_completions.py
rename to lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
index 1b44c3c99361e3f..5f6d63392f4d5fc 100644
--- a/lldb/test/API/tools/lldb-vscode/completions/TestVSCode_completions.py
+++ b/lldb/test/API/tools/lldb-dap/completions/TestDAP_completions.py
@@ -1,16 +1,16 @@
 """
-Test lldb-vscode completions request
+Test lldb-dap completions request
 """
 
 
-import lldbvscode_testcase
-import vscode
+import lldbdap_testcase
+import dap_server
 from lldbsuite.test import lldbutil
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 
 
-class TestVSCode_completions(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_completions(lldbdap_testcase.DAPTestCaseBase):
     def verify_completions(self, actual_list, expected_list, not_expected_list=[]):
         for expected_item in expected_list:
             self.assertIn(expected_item, actual_list)
@@ -36,7 +36,7 @@ def test_completions(self):
 
         # shouldn't see variables inside main
         self.verify_completions(
-            self.vscode.get_completions("var"),
+            self.dap_server.get_completions("var"),
             [
                 {
                     "text": "var",
@@ -54,7 +54,7 @@ def test_completions(self):
 
         # should see global keywords but not variables inside main
         self.verify_completions(
-            self.vscode.get_completions("str"),
+            self.dap_server.get_completions("str"),
             [{"text": "struct", "label": "struct"}],
             [{"text": "str1", "label": "str1 -- std::string &"}],
         )
@@ -63,7 +63,7 @@ def test_completions(self):
 
         # should see variables from main but not from the other function
         self.verify_completions(
-            self.vscode.get_completions("var"),
+            self.dap_server.get_completions("var"),
             [
                 {"text": "var1", "label": "var1 -- int &"},
                 {"text": "var2", "label": "var2 -- int &"},
@@ -77,7 +77,7 @@ def test_completions(self):
         )
 
         self.verify_completions(
-            self.vscode.get_completions("str"),
+            self.dap_server.get_completions("str"),
             [
                 {"text": "struct", "label": "struct"},
                 {"text": "str1", "label": "str1 -- string &"},
@@ -86,19 +86,19 @@ def test_completions(self):
 
         # should complete arbitrary commands including word starts
         self.verify_completions(
-            self.vscode.get_completions("`log enable  "),
+            self.dap_server.get_completions("`log enable  "),
             [{"text": "gdb-remote", "label": "gdb-remote"}],
         )
 
         # should complete expressions with quotes inside
         self.verify_completions(
-            self.vscode.get_completions('`expr " "; typed'),
+            self.dap_server.get_completions('`expr " "; typed'),
             [{"text": "typedef", "label": "typedef"}],
         )
 
         # should complete an incomplete quoted token
         self.verify_completions(
-            self.vscode.get_completions('`setting "se'),
+            self.dap_server.get_completions('`setting "se'),
             [
                 {
                     "text": "set",
@@ -107,7 +107,7 @@ def test_completions(self):
             ],
         )
         self.verify_completions(
-            self.vscode.get_completions("`'comm"),
+            self.dap_server.get_completions("`'comm"),
             [
                 {
                     "text": "command",
@@ -117,34 +117,34 @@ def test_completions(self):
         )
 
         self.verify_completions(
-            self.vscode.get_completions("foo1.v"),
+            self.dap_server.get_completions("foo1.v"),
             [{"text": "var1", "label": "foo1.var1 -- int"}],
         )
 
         self.verify_completions(
-            self.vscode.get_completions("foo1.my_bar_object.v"),
+            self.dap_server.get_completions("foo1.my_bar_object.v"),
             [{"text": "var1", "label": "foo1.my_bar_object.var1 -- int"}],
         )
 
         self.verify_completions(
-            self.vscode.get_completions("foo1.var1 + foo1.v"),
+            self.dap_server.get_completions("foo1.var1 + foo1.v"),
             [{"text": "var1", "label": "foo1.var1 -- int"}],
         )
 
         self.verify_completions(
-            self.vscode.get_completions("foo1.var1 + v"),
+            self.dap_server.get_completions("foo1.var1 + v"),
             [{"text": "var1", "label": "var1 -- int &"}],
         )
 
         # should correctly handle spaces between objects and member operators
         self.verify_completions(
-            self.vscode.get_completions("foo1 .v"),
+            self.dap_server.get_completions("foo1 .v"),
             [{"text": "var1", "label": ".var1 -- int"}],
             [{"text": "var2", "label": ".var2 -- int"}],
         )
 
         self.verify_completions(
-            self.vscode.get_completions("foo1 . v"),
+            self.dap_server.get_completions("foo1 . v"),
             [{"text": "var1", "label": "var1 -- int"}],
             [{"text": "var2", "label": "var2 -- int"}],
         )
diff --git a/lldb/test/API/tools/lldb-vscode/completions/main.cpp b/lldb/test/API/tools/lldb-dap/completions/main.cpp
similarity index 81%
rename from lldb/test/API/tools/lldb-vscode/completions/main.cpp
rename to lldb/test/API/tools/lldb-dap/completions/main.cpp
index dccf43ff6feb817..4314067cfe951b9 100644
--- a/lldb/test/API/tools/lldb-vscode/completions/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/completions/main.cpp
@@ -7,9 +7,9 @@ struct bar {
 
 struct foo {
   int var1;
-  bar* my_bar_pointer;
+  bar *my_bar_pointer;
   bar my_bar_object;
-  foo* next_foo;
+  foo *next_foo;
 };
 
 struct baz {
@@ -28,7 +28,7 @@ int main(int argc, char const *argv[]) {
   std::vector<baz> vec;
   fun(vec);
   bar bar1 = {2};
-  bar* bar2 = &bar1;
-  foo foo1 = {3,&bar1, bar1, NULL};
+  bar *bar2 = &bar1;
+  foo foo1 = {3, &bar1, bar1, NULL};
   return 0; // breakpoint 2
 }
diff --git a/lldb/test/API/tools/lldb-vscode/console/Makefile b/lldb/test/API/tools/lldb-dap/console/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/console/Makefile
rename to lldb/test/API/tools/lldb-dap/console/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/console/TestVSCode_console.py b/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
similarity index 83%
rename from lldb/test/API/tools/lldb-vscode/console/TestVSCode_console.py
rename to lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
index d28e98b37c589dd..47c706f2ca72106 100644
--- a/lldb/test/API/tools/lldb-vscode/console/TestVSCode_console.py
+++ b/lldb/test/API/tools/lldb-dap/console/TestDAP_console.py
@@ -1,17 +1,19 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_console(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_console(lldbdap_testcase.DAPTestCaseBase):
     def check_lldb_command(self, lldb_command, contains_string, assert_msg):
-        response = self.vscode.request_evaluate("`%s" % (lldb_command), context="repl")
+        response = self.dap_server.request_evaluate(
+            "`%s" % (lldb_command), context="repl"
+        )
         output = response["body"]["result"]
         self.assertIn(
             contains_string,
@@ -29,8 +31,8 @@ def test_scopes_variables_setVariable_evaluate(self):
         """
         Tests that the "scopes" request causes the currently selected
         thread and frame to be updated. There are no DAP packets that tell
-        lldb-vscode which thread and frame are selected other than the
-        "scopes" request. lldb-vscode will now select the thread and frame
+        lldb-dap which thread and frame are selected other than the
+        "scopes" request. lldb-dap will now select the thread and frame
         for the latest "scopes" request that it receives.
 
         The LLDB command interpreter needs to have the right thread and
@@ -52,7 +54,7 @@ def test_scopes_variables_setVariable_evaluate(self):
         self.continue_to_breakpoints(breakpoint_ids)
         # Cause a "scopes" to be sent for frame zero which should update the
         # selected thread and frame to frame 0.
-        self.vscode.get_local_variables(frameIndex=0)
+        self.dap_server.get_local_variables(frameIndex=0)
         # Verify frame #0 is selected in the command interpreter by running
         # the "frame select" command with no frame index which will print the
         # currently selected frame.
@@ -60,7 +62,7 @@ def test_scopes_variables_setVariable_evaluate(self):
 
         # Cause a "scopes" to be sent for frame one which should update the
         # selected thread and frame to frame 1.
-        self.vscode.get_local_variables(frameIndex=1)
+        self.dap_server.get_local_variables(frameIndex=1)
         # Verify frame #1 is selected in the command interpreter by running
         # the "frame select" command with no frame index which will print the
         # currently selected frame.
diff --git a/lldb/test/API/tools/lldb-vscode/console/TestVSCode_redirection_to_console.py b/lldb/test/API/tools/lldb-dap/console/TestDAP_redirection_to_console.py
similarity index 66%
rename from lldb/test/API/tools/lldb-vscode/console/TestVSCode_redirection_to_console.py
rename to lldb/test/API/tools/lldb-dap/console/TestDAP_redirection_to_console.py
index 27b0215c5443071..85911a449efef01 100644
--- a/lldb/test/API/tools/lldb-vscode/console/TestVSCode_redirection_to_console.py
+++ b/lldb/test/API/tools/lldb-dap/console/TestDAP_redirection_to_console.py
@@ -1,12 +1,12 @@
-import vscode
+import dap_server
 import json
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_redirection_to_console(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_redirection_to_console(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test(self):
@@ -14,11 +14,11 @@ def test(self):
         Without proper stderr and stdout redirection, the following code would throw an
         exception, like the following:
 
-            Exception: unexpected malformed message from lldb-vscode
+            Exception: unexpected malformed message from lldb-dap
         """
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(
-            program, lldbVSCodeEnv={"LLDB_VSCODE_TEST_STDOUT_STDERR_REDIRECTION": ""}
+            program, lldbDAPEnv={"LLDB_DAP_TEST_STDOUT_STDERR_REDIRECTION": ""}
         )
 
         source = "main.cpp"
@@ -29,4 +29,6 @@ def test(self):
         self.assertEqual(len(breakpoint_ids), 1, "expect correct number of breakpoints")
         self.continue_to_breakpoints(breakpoint_ids)
 
-        self.assertIn("argc", json.dumps(self.vscode.get_local_variables(frameIndex=1)))
+        self.assertIn(
+            "argc", json.dumps(self.dap_server.get_local_variables(frameIndex=1))
+        )
diff --git a/lldb/test/API/tools/lldb-vscode/console/main.cpp b/lldb/test/API/tools/lldb-dap/console/main.cpp
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/console/main.cpp
rename to lldb/test/API/tools/lldb-dap/console/main.cpp
diff --git a/lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py
similarity index 92%
rename from lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py
rename to lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py
index 8cbdb8fa7e98a2c..2ac1f39772bbc52 100644
--- a/lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py
+++ b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py
@@ -1,17 +1,17 @@
 """
-Test lldb-vscode coreFile attaching
+Test lldb-dap coreFile attaching
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import os
 
 
-class TestVSCode_coreFile(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_coreFile(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     @skipIfLLVMTargetMissing("X86")
@@ -53,7 +53,7 @@ def test_core_file(self):
         self.continue_to_next_stop()
         self.assertEquals(self.get_stackFrames(), expected_frames)
 
-        self.vscode.request_next(threadId=32259)
+        self.dap_server.request_next(threadId=32259)
         self.assertEquals(self.get_stackFrames(), expected_frames)
 
     @skipIfWindows
diff --git a/lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.core b/lldb/test/API/tools/lldb-dap/coreFile/linux-x86_64.core
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.core
rename to lldb/test/API/tools/lldb-dap/coreFile/linux-x86_64.core
diff --git a/lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.out b/lldb/test/API/tools/lldb-dap/coreFile/linux-x86_64.out
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/coreFile/linux-x86_64.out
rename to lldb/test/API/tools/lldb-dap/coreFile/linux-x86_64.out
diff --git a/lldb/test/API/tools/lldb-vscode/coreFile/main.c b/lldb/test/API/tools/lldb-dap/coreFile/main.c
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/coreFile/main.c
rename to lldb/test/API/tools/lldb-dap/coreFile/main.c
diff --git a/lldb/test/API/tools/lldb-vscode/correct-thread/Makefile b/lldb/test/API/tools/lldb-dap/correct-thread/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/correct-thread/Makefile
rename to lldb/test/API/tools/lldb-dap/correct-thread/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/correct-thread/TestVSCode_correct_thread.py b/lldb/test/API/tools/lldb-dap/correct-thread/TestDAP_correct_thread.py
similarity index 86%
rename from lldb/test/API/tools/lldb-vscode/correct-thread/TestVSCode_correct_thread.py
rename to lldb/test/API/tools/lldb-dap/correct-thread/TestDAP_correct_thread.py
index 5318163465e4e12..43d3b02e408c79b 100644
--- a/lldb/test/API/tools/lldb-vscode/correct-thread/TestVSCode_correct_thread.py
+++ b/lldb/test/API/tools/lldb-dap/correct-thread/TestDAP_correct_thread.py
@@ -1,15 +1,15 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_correct_thread(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_correct_thread(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_correct_thread(self):
@@ -33,8 +33,8 @@ def test_correct_thread(self):
         # We're now stopped at the breakpoint in the first thread, thread #2.
         # Continue to join the first thread and hit the breakpoint in the
         # second thread, thread #3.
-        self.vscode.request_continue()
-        stopped_event = self.vscode.wait_for_stopped()
+        self.dap_server.request_continue()
+        stopped_event = self.dap_server.wait_for_stopped()
         # Verify that the description is the relevant breakpoint,
         # preserveFocusHint is False and threadCausedFocus is True
         self.assertTrue(
diff --git a/lldb/test/API/tools/lldb-vscode/correct-thread/main.c b/lldb/test/API/tools/lldb-dap/correct-thread/main.c
similarity index 84%
rename from lldb/test/API/tools/lldb-vscode/correct-thread/main.c
rename to lldb/test/API/tools/lldb-dap/correct-thread/main.c
index 157c3f994db1e7a..3eeb1ef02cb8705 100644
--- a/lldb/test/API/tools/lldb-vscode/correct-thread/main.c
+++ b/lldb/test/API/tools/lldb-dap/correct-thread/main.c
@@ -3,14 +3,12 @@
 
 int state_var;
 
-void *thread (void *in)
-{
+void *thread(void *in) {
   state_var++; // break here
   return NULL;
 }
 
-int main(int argc, char **argv)
-{
+int main(int argc, char **argv) {
   pthread_t t1, t2;
 
   pthread_create(&t1, NULL, *thread, NULL);
diff --git a/lldb/test/API/tools/lldb-vscode/disassemble/Makefile b/lldb/test/API/tools/lldb-dap/disassemble/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/disassemble/Makefile
rename to lldb/test/API/tools/lldb-dap/disassemble/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/disassemble/TestVSCode_disassemble.py b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
similarity index 89%
rename from lldb/test/API/tools/lldb-vscode/disassemble/TestVSCode_disassemble.py
rename to lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
index 521e8bf56a1d4a6..cb4e946c52112dc 100644
--- a/lldb/test/API/tools/lldb-vscode/disassemble/TestVSCode_disassemble.py
+++ b/lldb/test/API/tools/lldb-dap/disassemble/TestDAP_disassemble.py
@@ -1,17 +1,17 @@
 """
-Test lldb-vscode disassemble request
+Test lldb-dap disassemble request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import os
 
 
-class TestVSCode_disassemble(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_disassemble(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_disassemble(self):
diff --git a/lldb/test/API/tools/lldb-dap/disassemble/main.c b/lldb/test/API/tools/lldb-dap/disassemble/main.c
new file mode 100644
index 000000000000000..6609a4c37a70f04
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/disassemble/main.c
@@ -0,0 +1,30 @@
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int compare_ints(const void *a, const void *b) {
+  int arg1 = *(const int *)a;
+  int arg2 = *(const int *)b;
+
+  // breakpoint 1
+
+  if (arg1 < arg2)
+    return -1;
+  if (arg1 > arg2)
+    return 1;
+  return 0;
+}
+
+int main(void) {
+  int ints[] = {-2, 99, 0, -743, 2, INT_MIN, 4};
+  int size = sizeof ints / sizeof *ints;
+
+  qsort(ints, size, sizeof(int), compare_ints);
+
+  for (int i = 0; i < size; i++) {
+    printf("%d ", ints[i]);
+  }
+
+  printf("\n");
+  return 0;
+}
\ No newline at end of file
diff --git a/lldb/test/API/tools/lldb-vscode/disconnect/Makefile b/lldb/test/API/tools/lldb-dap/disconnect/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/disconnect/Makefile
rename to lldb/test/API/tools/lldb-dap/disconnect/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/disconnect/TestVSCode_disconnect.py b/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py
similarity index 87%
rename from lldb/test/API/tools/lldb-vscode/disconnect/TestVSCode_disconnect.py
rename to lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py
index bc7b74e5039b47d..e5aab88c7fa462c 100644
--- a/lldb/test/API/tools/lldb-vscode/disconnect/TestVSCode_disconnect.py
+++ b/lldb/test/API/tools/lldb-dap/disconnect/TestDAP_disconnect.py
@@ -1,23 +1,23 @@
 """
-Test lldb-vscode disconnect request
+Test lldb-dap disconnect request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import subprocess
 import time
 import os
 
 
-class TestVSCode_launch(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase):
     source = "main.cpp"
 
     def disconnect_and_assert_no_output_printed(self):
-        self.vscode.request_disconnect()
+        self.dap_server.request_disconnect()
         # verify we didn't get any input after disconnect
         time.sleep(2)
         output = self.get_stdout()
@@ -40,7 +40,7 @@ def test_launch(self):
         )
         self.continue_to_next_stop()
 
-        self.vscode.request_disconnect()
+        self.dap_server.request_disconnect()
         # verify we didn't produce the side effect file
         time.sleep(1)
         self.assertFalse(os.path.exists(program + ".side_effect"))
@@ -69,13 +69,13 @@ def test_attach(self):
         lldbutil.wait_for_file_on_target(self, sync_file_path)
 
         self.attach(pid=self.process.pid, disconnectAutomatically=False)
-        response = self.vscode.request_evaluate("wait_for_attach = false;")
+        response = self.dap_server.request_evaluate("wait_for_attach = false;")
         self.assertTrue(response["success"])
 
         # verify we haven't produced the side effect file yet
         self.assertFalse(os.path.exists(program + ".side_effect"))
 
-        self.vscode.request_disconnect()
+        self.dap_server.request_disconnect()
         time.sleep(2)
         # verify we produced the side effect file, as the program continued after disconnecting
         self.assertTrue(os.path.exists(program + ".side_effect"))
diff --git a/lldb/test/API/tools/lldb-vscode/disconnect/main.cpp b/lldb/test/API/tools/lldb-dap/disconnect/main.cpp
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/disconnect/main.cpp
rename to lldb/test/API/tools/lldb-dap/disconnect/main.cpp
diff --git a/lldb/test/API/tools/lldb-vscode/evaluate/Makefile b/lldb/test/API/tools/lldb-dap/evaluate/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/evaluate/Makefile
rename to lldb/test/API/tools/lldb-dap/evaluate/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/evaluate/TestVSCode_evaluate.py b/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py
similarity index 95%
rename from lldb/test/API/tools/lldb-vscode/evaluate/TestVSCode_evaluate.py
rename to lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py
index d1b73e1a057e1da..de9d2c93a1109dc 100644
--- a/lldb/test/API/tools/lldb-vscode/evaluate/TestVSCode_evaluate.py
+++ b/lldb/test/API/tools/lldb-dap/evaluate/TestDAP_evaluate.py
@@ -1,19 +1,19 @@
 """
-Test lldb-vscode completions request
+Test lldb-dap completions request
 """
 
 
-import lldbvscode_testcase
-import vscode
+import lldbdap_testcase
+import dap_server
 from lldbsuite.test import lldbutil
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 
 
-class TestVSCode_variables(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
     def assertEvaluate(self, expression, regex):
         self.assertRegexpMatches(
-            self.vscode.request_evaluate(expression, context=self.context)["body"][
+            self.dap_server.request_evaluate(expression, context=self.context)["body"][
                 "result"
             ],
             regex,
@@ -22,7 +22,7 @@ def assertEvaluate(self, expression, regex):
     def assertEvaluateFailure(self, expression):
         self.assertNotIn(
             "result",
-            self.vscode.request_evaluate(expression, context=self.context)["body"],
+            self.dap_server.request_evaluate(expression, context=self.context)["body"],
         )
 
     def isExpressionParsedExpected(self):
diff --git a/lldb/test/API/tools/lldb-vscode/evaluate/foo.cpp b/lldb/test/API/tools/lldb-dap/evaluate/foo.cpp
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/evaluate/foo.cpp
rename to lldb/test/API/tools/lldb-dap/evaluate/foo.cpp
diff --git a/lldb/test/API/tools/lldb-vscode/evaluate/foo.h b/lldb/test/API/tools/lldb-dap/evaluate/foo.h
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/evaluate/foo.h
rename to lldb/test/API/tools/lldb-dap/evaluate/foo.h
diff --git a/lldb/test/API/tools/lldb-vscode/evaluate/main.cpp b/lldb/test/API/tools/lldb-dap/evaluate/main.cpp
similarity index 94%
rename from lldb/test/API/tools/lldb-vscode/evaluate/main.cpp
rename to lldb/test/API/tools/lldb-dap/evaluate/main.cpp
index f09d00e6444bb79..ca27b5ba5ca19d4 100644
--- a/lldb/test/API/tools/lldb-vscode/evaluate/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/evaluate/main.cpp
@@ -1,7 +1,7 @@
 #include "foo.h"
 
-#include <vector>
 #include <map>
+#include <vector>
 
 static int static_int = 42;
 
@@ -43,7 +43,7 @@ int main(int argc, char const *argv[]) {
   std::vector<bool> my_bool_vec;
   my_bool_vec.push_back(true);
   my_bool_vec.push_back(false); // breakpoint 6
-  my_bool_vec.push_back(true); // breakpoint 7
+  my_bool_vec.push_back(true);  // breakpoint 7
 
   return 0;
 }
diff --git a/lldb/test/API/tools/lldb-vscode/exception/Makefile b/lldb/test/API/tools/lldb-dap/exception/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/exception/Makefile
rename to lldb/test/API/tools/lldb-dap/exception/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/exception/TestVSCode_exception.py b/lldb/test/API/tools/lldb-dap/exception/TestDAP_exception.py
similarity index 74%
rename from lldb/test/API/tools/lldb-vscode/exception/TestVSCode_exception.py
rename to lldb/test/API/tools/lldb-dap/exception/TestDAP_exception.py
index f52daaa713a6bc8..8c2c0154ba65c0a 100644
--- a/lldb/test/API/tools/lldb-vscode/exception/TestVSCode_exception.py
+++ b/lldb/test/API/tools/lldb-dap/exception/TestDAP_exception.py
@@ -1,14 +1,14 @@
 """
-Test exception behavior in VSCode
+Test exception behavior in DAP
 """
 
 
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_exception(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_exception(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     def test_stopped_description(self):
         """
@@ -19,5 +19,5 @@ def test_stopped_description(self):
         print("test_stopped_description called", flush=True)
         self.build_and_launch(program)
 
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
         self.assertTrue(self.verify_stop_exception_info("signal SIGABRT"))
diff --git a/lldb/test/API/tools/lldb-vscode/exception/main.cpp b/lldb/test/API/tools/lldb-dap/exception/main.cpp
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/exception/main.cpp
rename to lldb/test/API/tools/lldb-dap/exception/main.cpp
diff --git a/lldb/test/API/tools/lldb-vscode/launch/Makefile b/lldb/test/API/tools/lldb-dap/launch/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/launch/Makefile
rename to lldb/test/API/tools/lldb-dap/launch/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
similarity index 94%
rename from lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py
rename to lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
index 39845468888fb21..78958612253691b 100644
--- a/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py
+++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py
@@ -1,18 +1,18 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import time
 import os
 
 
-class TestVSCode_launch(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_default(self):
@@ -33,22 +33,22 @@ def test_default(self):
     @skipIfRemote
     def test_termination(self):
         """
-        Tests the correct termination of lldb-vscode upon a 'disconnect'
+        Tests the correct termination of lldb-dap upon a 'disconnect'
         request.
         """
         self.create_debug_adaptor()
-        # The underlying lldb-vscode process must be alive
-        self.assertEqual(self.vscode.process.poll(), None)
+        # The underlying lldb-dap process must be alive
+        self.assertEqual(self.dap_server.process.poll(), None)
 
-        # The lldb-vscode process should finish even though
+        # The lldb-dap process should finish even though
         # we didn't close the communication socket explicitly
-        self.vscode.request_disconnect()
+        self.dap_server.request_disconnect()
 
-        # Wait until the underlying lldb-vscode process dies.
-        self.vscode.process.wait(timeout=10)
+        # Wait until the underlying lldb-dap process dies.
+        self.dap_server.process.wait(timeout=10)
 
         # Check the return code
-        self.assertEqual(self.vscode.process.poll(), 0)
+        self.assertEqual(self.dap_server.process.poll(), 0)
 
     @skipIfWindows
     @skipIfRemote
@@ -102,7 +102,7 @@ def test_cwd(self):
     def test_debuggerRoot(self):
         """
         Tests the "debuggerRoot" will change the working directory of
-        the lldb-vscode debug adaptor.
+        the lldb-dap debug adaptor.
         """
         program = self.getBuildArtifact("a.out")
         program_parent_dir = os.path.realpath(os.path.dirname(os.path.dirname(program)))
@@ -121,10 +121,10 @@ def test_debuggerRoot(self):
                 self.assertEquals(
                     program_parent_dir,
                     line[len(prefix) :],
-                    "lldb-vscode working dir '%s' == '%s'"
+                    "lldb-dap working dir '%s' == '%s'"
                     % (program_parent_dir, line[6:]),
                 )
-        self.assertTrue(found, "verified lldb-vscode working directory")
+        self.assertTrue(found, "verified lldb-dap working directory")
         self.continue_to_exit()
 
     @skipIfWindows
@@ -148,7 +148,7 @@ def test_sourcePath(self):
                 self.assertEquals(
                     quoted_path,
                     line[len(prefix) :],
-                    "lldb-vscode working dir %s == %s" % (quoted_path, line[6:]),
+                    "lldb-dap working dir %s == %s" % (quoted_path, line[6:]),
                 )
         self.assertTrue(found, 'found "sourcePath" in console output')
         self.continue_to_exit()
@@ -440,6 +440,6 @@ def test_terminate_commands(self):
         self.get_console()
         # Once it's disconnected the console should contain the
         # "terminateCommands"
-        self.vscode.request_disconnect(terminateDebuggee=True)
+        self.dap_server.request_disconnect(terminateDebuggee=True)
         output = self.collect_console(duration=1.0)
         self.verify_commands("terminateCommands", output, terminateCommands)
diff --git a/lldb/test/API/tools/lldb-vscode/launch/main.c b/lldb/test/API/tools/lldb-dap/launch/main.c
similarity index 77%
rename from lldb/test/API/tools/lldb-vscode/launch/main.c
rename to lldb/test/API/tools/lldb-dap/launch/main.c
index aed2af9828f34bd..01e209ac12c6650 100644
--- a/lldb/test/API/tools/lldb-vscode/launch/main.c
+++ b/lldb/test/API/tools/lldb-dap/launch/main.c
@@ -3,13 +3,13 @@
 #include <unistd.h>
 
 int main(int argc, char const *argv[], char const *envp[]) {
-  for (int i=0; i<argc; ++i)
+  for (int i = 0; i < argc; ++i)
     printf("arg[%i] = \"%s\"\n", i, argv[i]);
-  for (int i=0; envp[i]; ++i)
+  for (int i = 0; envp[i]; ++i)
     printf("env[%i] = \"%s\"\n", i, envp[i]);
   char *cwd = getcwd(NULL, 0);
   printf("cwd = \"%s\"\n", cwd); // breakpoint 1
   free(cwd);
   cwd = NULL;
-  return 0;  // breakpoint 2
+  return 0; // breakpoint 2
 }
diff --git a/lldb/test/API/tools/lldb-vscode/module/Makefile b/lldb/test/API/tools/lldb-dap/module/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/module/Makefile
rename to lldb/test/API/tools/lldb-dap/module/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py
similarity index 88%
rename from lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py
rename to lldb/test/API/tools/lldb-dap/module/TestDAP_module.py
index 5ddad1beabce02d..91321a00ed31ce2 100644
--- a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py
+++ b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py
@@ -1,16 +1,16 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import re
 
 
-class TestVSCode_module(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_module(lldbdap_testcase.DAPTestCaseBase):
     def run_test(self, symbol_basename, expect_debug_info_size):
         program_basename = "a.out.stripped"
         program = self.getBuildArtifact(program_basename)
@@ -19,7 +19,7 @@ def run_test(self, symbol_basename, expect_debug_info_size):
         breakpoint_ids = self.set_function_breakpoints(functions)
         self.assertEquals(len(breakpoint_ids), len(functions), "expect one breakpoint")
         self.continue_to_breakpoints(breakpoint_ids)
-        active_modules = self.vscode.get_modules()
+        active_modules = self.dap_server.get_modules()
         program_module = active_modules[program_basename]
         self.assertIn(
             program_basename,
@@ -36,13 +36,13 @@ def run_test(self, symbol_basename, expect_debug_info_size):
             "Make sure a.out.stripped has no debug info",
         )
         symbols_path = self.getBuildArtifact(symbol_basename)
-        self.vscode.request_evaluate(
+        self.dap_server.request_evaluate(
             "`%s" % ('target symbols add -s "%s" "%s"' % (program, symbols_path)),
             context="repl",
         )
 
         def checkSymbolsLoadedWithSize():
-            active_modules = self.vscode.get_modules()
+            active_modules = self.dap_server.get_modules()
             program_module = active_modules[program_basename]
             self.assertIn("symbolFilePath", program_module)
             self.assertIn(symbols_path, program_module["symbolFilePath"])
@@ -51,7 +51,7 @@ def checkSymbolsLoadedWithSize():
 
         if expect_debug_info_size:
             self.waitUntil(checkSymbolsLoadedWithSize)
-        active_modules = self.vscode.get_modules()
+        active_modules = self.dap_server.get_modules()
         program_module = active_modules[program_basename]
         self.assertEqual(program_basename, program_module["name"])
         self.assertEqual(program, program_module["path"])
@@ -95,8 +95,8 @@ def test_compile_units(self):
         lines = [breakpoint1_line]
         breakpoint_ids = self.set_source_breakpoints(source, lines)
         self.continue_to_breakpoints(breakpoint_ids)
-        moduleId = self.vscode.get_modules()["a.out"]["id"]
-        response = self.vscode.request_compileUnits(moduleId)
+        moduleId = self.dap_server.get_modules()["a.out"]["id"]
+        response = self.dap_server.request_compileUnits(moduleId)
         self.assertTrue(response["body"])
         cu_paths = [cu["compileUnitPath"] for cu in response["body"]["compileUnits"]]
         self.assertIn(main_source_path, cu_paths, "Real path to main.cpp matches")
diff --git a/lldb/test/API/tools/lldb-dap/module/foo.cpp b/lldb/test/API/tools/lldb-dap/module/foo.cpp
new file mode 100644
index 000000000000000..b6f33b8e070a4ed
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/module/foo.cpp
@@ -0,0 +1 @@
+int foo() { return 12; }
diff --git a/lldb/test/API/tools/lldb-vscode/module/foo.h b/lldb/test/API/tools/lldb-dap/module/foo.h
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/module/foo.h
rename to lldb/test/API/tools/lldb-dap/module/foo.h
diff --git a/lldb/test/API/tools/lldb-vscode/module/main.cpp b/lldb/test/API/tools/lldb-dap/module/main.cpp
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/module/main.cpp
rename to lldb/test/API/tools/lldb-dap/module/main.cpp
diff --git a/lldb/test/API/tools/lldb-vscode/optimized/Makefile b/lldb/test/API/tools/lldb-dap/optimized/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/optimized/Makefile
rename to lldb/test/API/tools/lldb-dap/optimized/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py b/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py
similarity index 81%
rename from lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py
rename to lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py
index 3f2d0dcab7441af..cc544919b6a8234 100644
--- a/lldb/test/API/tools/lldb-vscode/optimized/TestVSCode_optimized.py
+++ b/lldb/test/API/tools/lldb-dap/optimized/TestDAP_optimized.py
@@ -1,15 +1,15 @@
 """
-Test lldb-vscode variables/stackTrace request for optimized code
+Test lldb-dap variables/stackTrace request for optimized code
 """
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_optimized(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_optimized(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_stack_frame_name(self):
@@ -24,9 +24,9 @@ def test_stack_frame_name(self):
             len(breakpoint_ids), len(lines), "expect correct number of breakpoints"
         )
         self.continue_to_breakpoints(breakpoint_ids)
-        leaf_frame = self.vscode.get_stackFrame(frameIndex=0)
+        leaf_frame = self.dap_server.get_stackFrame(frameIndex=0)
         self.assertTrue(leaf_frame["name"].endswith(" [opt]"))
-        parent_frame = self.vscode.get_stackFrame(frameIndex=1)
+        parent_frame = self.dap_server.get_stackFrame(frameIndex=1)
         self.assertTrue(parent_frame["name"].endswith(" [opt]"))
 
     @skipIfWindows
@@ -44,6 +44,6 @@ def test_optimized_variable(self):
             len(breakpoint_ids), len(lines), "expect correct number of breakpoints"
         )
         self.continue_to_breakpoints(breakpoint_ids)
-        optimized_variable = self.vscode.get_local_variable("argc")
+        optimized_variable = self.dap_server.get_local_variable("argc")
 
         self.assertTrue(optimized_variable["value"].startswith("<error:"))
diff --git a/lldb/test/API/tools/lldb-vscode/optimized/main.cpp b/lldb/test/API/tools/lldb-dap/optimized/main.cpp
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/optimized/main.cpp
rename to lldb/test/API/tools/lldb-dap/optimized/main.cpp
diff --git a/lldb/test/API/tools/lldb-vscode/restart/Makefile b/lldb/test/API/tools/lldb-dap/restart/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/restart/Makefile
rename to lldb/test/API/tools/lldb-dap/restart/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/restart/TestVSCode_restart.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py
similarity index 79%
rename from lldb/test/API/tools/lldb-vscode/restart/TestVSCode_restart.py
rename to lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py
index ec541a84f75dbd9..b622792db56536c 100644
--- a/lldb/test/API/tools/lldb-vscode/restart/TestVSCode_restart.py
+++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py
@@ -1,13 +1,13 @@
 """
-Test lldb-vscode RestartRequest.
+Test lldb-dap RestartRequest.
 """
 
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import line_number
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_restart(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_restart(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_basic_functionality(self):
@@ -23,23 +23,23 @@ def test_basic_functionality(self):
         [bp_A, bp_B] = self.set_source_breakpoints("main.c", [line_A, line_B])
 
         # Verify we hit A, then B.
-        self.vscode.request_configurationDone()
+        self.dap_server.request_configurationDone()
         self.verify_breakpoint_hit([bp_A])
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
         self.verify_breakpoint_hit([bp_B])
 
         # Make sure i has been modified from its initial value of 0.
         self.assertEquals(
-            int(self.vscode.get_local_variable_value("i")),
+            int(self.dap_server.get_local_variable_value("i")),
             1234,
             "i != 1234 after hitting breakpoint B",
         )
 
         # Restart then check we stop back at A and program state has been reset.
-        self.vscode.request_restart()
+        self.dap_server.request_restart()
         self.verify_breakpoint_hit([bp_A])
         self.assertEquals(
-            int(self.vscode.get_local_variable_value("i")),
+            int(self.dap_server.get_local_variable_value("i")),
             0,
             "i != 0 after hitting breakpoint A on restart",
         )
@@ -53,11 +53,11 @@ def test_stopOnEntry(self):
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(program, stopOnEntry=True)
         [bp_main] = self.set_function_breakpoints(["main"])
-        self.vscode.request_configurationDone()
+        self.dap_server.request_configurationDone()
 
         # Once the "configuration done" event is sent, we should get a stopped
         # event immediately because of stopOnEntry.
-        stopped_events = self.vscode.wait_for_stopped()
+        stopped_events = self.dap_server.wait_for_stopped()
         for stopped_event in stopped_events:
             if "body" in stopped_event:
                 body = stopped_event["body"]
@@ -68,13 +68,13 @@ def test_stopOnEntry(self):
                     )
 
         # Then, if we continue, we should hit the breakpoint at main.
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
         self.verify_breakpoint_hit([bp_main])
 
         # Restart and check that we still get a stopped event before reaching
         # main.
-        self.vscode.request_restart()
-        stopped_events = self.vscode.wait_for_stopped()
+        self.dap_server.request_restart()
+        stopped_events = self.dap_server.wait_for_stopped()
         for stopped_event in stopped_events:
             if "body" in stopped_event:
                 body = stopped_event["body"]
@@ -90,7 +90,7 @@ def test_stopOnEntry(self):
     @skipIfRemote
     def test_arguments(self):
         """
-        Tests that lldb-vscode will use updated launch arguments included
+        Tests that lldb-dap will use updated launch arguments included
         with a restart request.
         """
         line_A = line_number("main.c", "// breakpoint A")
@@ -100,20 +100,20 @@ def test_arguments(self):
         [bp_A] = self.set_source_breakpoints("main.c", [line_A])
 
         # Verify we hit A, then B.
-        self.vscode.request_configurationDone()
+        self.dap_server.request_configurationDone()
         self.verify_breakpoint_hit([bp_A])
 
         # We don't set any arguments in the initial launch request, so argc
         # should be 1.
         self.assertEquals(
-            int(self.vscode.get_local_variable_value("argc")),
+            int(self.dap_server.get_local_variable_value("argc")),
             1,
             "argc != 1 before restart",
         )
 
         # Restart with some extra 'args' and check that the new argc reflects
         # the updated launch config.
-        self.vscode.request_restart(
+        self.dap_server.request_restart(
             restartArguments={
                 "arguments": {
                     "program": program,
@@ -123,7 +123,7 @@ def test_arguments(self):
         )
         self.verify_breakpoint_hit([bp_A])
         self.assertEquals(
-            int(self.vscode.get_local_variable_value("argc")),
+            int(self.dap_server.get_local_variable_value("argc")),
             5,
             "argc != 5 after restart",
         )
diff --git a/lldb/test/API/tools/lldb-vscode/restart/TestVSCode_restart_runInTerminal.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py
similarity index 80%
rename from lldb/test/API/tools/lldb-vscode/restart/TestVSCode_restart_runInTerminal.py
rename to lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py
index 38eebc92bc049d7..789f7b817dd9bbf 100644
--- a/lldb/test/API/tools/lldb-vscode/restart/TestVSCode_restart_runInTerminal.py
+++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_runInTerminal.py
@@ -1,22 +1,22 @@
 """
-Test lldb-vscode RestartRequest.
+Test lldb-dap RestartRequest.
 """
 
 import os
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import line_number
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_restart_runInTerminal(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_restart_runInTerminal(lldbdap_testcase.DAPTestCaseBase):
     def isTestSupported(self):
         try:
             # We skip this test for debug builds because it takes too long
             # parsing lldb's own debug info. Release builds are fine.
-            # Checking the size of the lldb-vscode binary seems to be a decent
+            # Checking the size of the lldb-dap binary seems to be a decent
             # proxy for a quick detection. It should be far less than 1 MB in
             # Release builds.
-            return os.path.getsize(os.environ["LLDBVSCODE_EXEC"]) < 1000000
+            return os.path.getsize(os.environ["LLDBDAP_EXEC"]) < 1000000
         except:
             return False
 
@@ -38,25 +38,25 @@ def test_basic_functionality(self):
         [bp_A, bp_B] = self.set_source_breakpoints("main.c", [line_A, line_B])
 
         # Verify we hit A, then B.
-        self.vscode.request_configurationDone()
+        self.dap_server.request_configurationDone()
         self.verify_breakpoint_hit([bp_A])
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
         self.verify_breakpoint_hit([bp_B])
 
         # Make sure i has been modified from its initial value of 0.
         self.assertEquals(
-            int(self.vscode.get_local_variable_value("i")),
+            int(self.dap_server.get_local_variable_value("i")),
             1234,
             "i != 1234 after hitting breakpoint B",
         )
 
         # Restart.
-        self.vscode.request_restart()
+        self.dap_server.request_restart()
 
         # Finally, check we stop back at A and program state has been reset.
         self.verify_breakpoint_hit([bp_A])
         self.assertEquals(
-            int(self.vscode.get_local_variable_value("i")),
+            int(self.dap_server.get_local_variable_value("i")),
             0,
             "i != 0 after hitting breakpoint A on restart",
         )
@@ -76,11 +76,11 @@ def test_stopOnEntry(self):
         program = self.getBuildArtifact("a.out")
         self.build_and_launch(program, runInTerminal=True, stopOnEntry=True)
         [bp_main] = self.set_function_breakpoints(["main"])
-        self.vscode.request_configurationDone()
+        self.dap_server.request_configurationDone()
 
         # When using stopOnEntry, configurationDone doesn't result in a running
         # process, we should immediately get a stopped event instead.
-        stopped_events = self.vscode.wait_for_stopped()
+        stopped_events = self.dap_server.wait_for_stopped()
         # We should be stopped at the entry point.
         for stopped_event in stopped_events:
             if "body" in stopped_event:
@@ -92,13 +92,13 @@ def test_stopOnEntry(self):
                     )
 
         # Then, if we continue, we should hit the breakpoint at main.
-        self.vscode.request_continue()
+        self.dap_server.request_continue()
         self.verify_breakpoint_hit([bp_main])
 
         # Restart and check that we still get a stopped event before reaching
         # main.
-        self.vscode.request_restart()
-        stopped_events = self.vscode.wait_for_stopped()
+        self.dap_server.request_restart()
+        stopped_events = self.dap_server.wait_for_stopped()
         for stopped_event in stopped_events:
             if "body" in stopped_event:
                 body = stopped_event["body"]
diff --git a/lldb/test/API/tools/lldb-vscode/restart/main.c b/lldb/test/API/tools/lldb-dap/restart/main.c
similarity index 86%
rename from lldb/test/API/tools/lldb-vscode/restart/main.c
rename to lldb/test/API/tools/lldb-dap/restart/main.c
index 710af8e9750200f..b02928a694c9abf 100644
--- a/lldb/test/API/tools/lldb-vscode/restart/main.c
+++ b/lldb/test/API/tools/lldb-dap/restart/main.c
@@ -5,5 +5,5 @@ int main(int argc, char const *argv[], char const *envp[]) {
   printf("Do something\n"); // breakpoint A
   printf("Do something else\n");
   i = 1234;
-  return 0;  // breakpoint B
+  return 0; // breakpoint B
 }
diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile b/lldb/test/API/tools/lldb-dap/runInTerminal/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile
rename to lldb/test/API/tools/lldb-dap/runInTerminal/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py
similarity index 84%
rename from lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py
rename to lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py
index a6919f1f63328e3..a72571898ab500a 100644
--- a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py
+++ b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py
@@ -1,13 +1,13 @@
 """
-Test lldb-vscode runInTerminal reverse request
+Test lldb-dap runInTerminal reverse request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import time
 import os
 import subprocess
@@ -16,7 +16,7 @@
 from threading import Thread
 
 
-class TestVSCode_runInTerminal(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_runInTerminal(lldbdap_testcase.DAPTestCaseBase):
     def readPidMessage(self, fifo_file):
         with open(fifo_file, "r") as file:
             self.assertIn("pid", file.readline())
@@ -36,9 +36,9 @@ def isTestSupported(self):
         try:
             # We skip this test for debug builds because it takes too long parsing lldb's own
             # debug info. Release builds are fine.
-            # Checking the size of the lldb-vscode binary seems to be a decent proxy for a quick
+            # Checking the size of the lldb-dap binary seems to be a decent proxy for a quick
             # detection. It should be far less than 1 MB in Release builds.
-            if os.path.getsize(os.environ["LLDBVSCODE_EXEC"]) < 1000000:
+            if os.path.getsize(os.environ["LLDBDAP_EXEC"]) < 1000000:
                 return True
         except:
             return False
@@ -60,11 +60,13 @@ def test_runInTerminal(self):
         )
 
         self.assertEqual(
-            len(self.vscode.reverse_requests), 1, "make sure we got a reverse request"
+            len(self.dap_server.reverse_requests),
+            1,
+            "make sure we got a reverse request",
         )
 
-        request = self.vscode.reverse_requests[0]
-        self.assertIn(self.lldbVSCodeExec, request["arguments"]["args"])
+        request = self.dap_server.reverse_requests[0]
+        self.assertIn(self.lldbDAPExec, request["arguments"]["args"])
         self.assertIn(program, request["arguments"]["args"])
         self.assertIn("foobar", request["arguments"]["args"])
         self.assertIn("FOO", request["arguments"]["env"])
@@ -75,18 +77,18 @@ def test_runInTerminal(self):
         self.continue_to_next_stop()
 
         # We verify we actually stopped inside the loop
-        counter = int(self.vscode.get_local_variable_value("counter"))
+        counter = int(self.dap_server.get_local_variable_value("counter"))
         self.assertTrue(counter > 0)
 
         # We verify we were able to set the launch arguments
-        argc = int(self.vscode.get_local_variable_value("argc"))
+        argc = int(self.dap_server.get_local_variable_value("argc"))
         self.assertEqual(argc, 2)
 
-        argv1 = self.vscode.request_evaluate("argv[1]")["body"]["result"]
+        argv1 = self.dap_server.request_evaluate("argv[1]")["body"]["result"]
         self.assertIn("foobar", argv1)
 
         # We verify we were able to set the environment
-        env = self.vscode.request_evaluate("foo")["body"]["result"]
+        env = self.dap_server.request_evaluate("foo")["body"]["result"]
         self.assertIn("bar", env)
 
     @skipIfWindows
@@ -116,7 +118,7 @@ def test_missingArgInRunInTerminalLauncher(self):
         if not self.isTestSupported():
             return
         proc = subprocess.run(
-            [self.lldbVSCodeExec, "--launch-target", "INVALIDPROGRAM"],
+            [self.lldbDAPExec, "--launch-target", "INVALIDPROGRAM"],
             capture_output=True,
             universal_newlines=True,
         )
@@ -136,7 +138,7 @@ def test_FakeAttachedRunInTerminalLauncherWithInvalidProgram(self):
 
         proc = subprocess.Popen(
             [
-                self.lldbVSCodeExec,
+                self.lldbDAPExec,
                 "--comm-file",
                 comm_file,
                 "--launch-target",
@@ -164,7 +166,7 @@ def test_FakeAttachedRunInTerminalLauncherWithValidProgram(self):
 
         proc = subprocess.Popen(
             [
-                self.lldbVSCodeExec,
+                self.lldbDAPExec,
                 "--comm-file",
                 comm_file,
                 "--launch-target",
@@ -191,7 +193,7 @@ def test_FakeAttachedRunInTerminalLauncherAndCheckEnvironment(self):
         os.mkfifo(comm_file)
 
         proc = subprocess.Popen(
-            [self.lldbVSCodeExec, "--comm-file", comm_file, "--launch-target", "env"],
+            [self.lldbDAPExec, "--comm-file", comm_file, "--launch-target", "env"],
             universal_newlines=True,
             stdout=subprocess.PIPE,
             env={**os.environ, "FOO": "BAR"},
@@ -214,7 +216,7 @@ def test_NonAttachedRunInTerminalLauncher(self):
 
         proc = subprocess.Popen(
             [
-                self.lldbVSCodeExec,
+                self.lldbDAPExec,
                 "--comm-file",
                 comm_file,
                 "--launch-target",
@@ -223,7 +225,7 @@ def test_NonAttachedRunInTerminalLauncher(self):
             ],
             universal_newlines=True,
             stderr=subprocess.PIPE,
-            env={**os.environ, "LLDB_VSCODE_RIT_TIMEOUT_IN_MS": "1000"},
+            env={**os.environ, "LLDB_DAP_RIT_TIMEOUT_IN_MS": "1000"},
         )
 
         self.readPidMessage(comm_file)
diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c b/lldb/test/API/tools/lldb-dap/runInTerminal/main.c
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/runInTerminal/main.c
rename to lldb/test/API/tools/lldb-dap/runInTerminal/main.c
diff --git a/lldb/test/API/tools/lldb-vscode/stackTrace/Makefile b/lldb/test/API/tools/lldb-dap/stackTrace/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/stackTrace/Makefile
rename to lldb/test/API/tools/lldb-dap/stackTrace/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/stackTrace/TestVSCode_stackTrace.py b/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
similarity index 97%
rename from lldb/test/API/tools/lldb-vscode/stackTrace/TestVSCode_stackTrace.py
rename to lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
index fa9e9f30cef2f99..245b3f34b70c868 100644
--- a/lldb/test/API/tools/lldb-vscode/stackTrace/TestVSCode_stackTrace.py
+++ b/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py
@@ -1,17 +1,17 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import os
 
 
-class TestVSCode_stackTrace(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_stackTrace(lldbdap_testcase.DAPTestCaseBase):
     name_key_path = ["name"]
     source_key_path = ["source", "path"]
     line_key_path = ["line"]
diff --git a/lldb/test/API/tools/lldb-vscode/stackTrace/main.c b/lldb/test/API/tools/lldb-dap/stackTrace/main.c
similarity index 65%
rename from lldb/test/API/tools/lldb-vscode/stackTrace/main.c
rename to lldb/test/API/tools/lldb-dap/stackTrace/main.c
index 85b41c492817f0c..862473a3e6ac8c2 100644
--- a/lldb/test/API/tools/lldb-vscode/stackTrace/main.c
+++ b/lldb/test/API/tools/lldb-dap/stackTrace/main.c
@@ -3,8 +3,8 @@
 
 int recurse(int x) {
   if (x <= 1)
-    return 1; // recurse end
-  return recurse(x-1) + x; // recurse call
+    return 1;                // recurse end
+  return recurse(x - 1) + x; // recurse call
 }
 
 int main(int argc, char const *argv[]) {
diff --git a/lldb/test/API/tools/lldb-vscode/stackTraceMissingFunctionName/Makefile b/lldb/test/API/tools/lldb-dap/stackTraceMissingFunctionName/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/stackTraceMissingFunctionName/Makefile
rename to lldb/test/API/tools/lldb-dap/stackTraceMissingFunctionName/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/stackTraceMissingFunctionName/TestVSCode_stackTraceMissingFunctionName.py b/lldb/test/API/tools/lldb-dap/stackTraceMissingFunctionName/TestDAP_stackTraceMissingFunctionName.py
similarity index 77%
rename from lldb/test/API/tools/lldb-vscode/stackTraceMissingFunctionName/TestVSCode_stackTraceMissingFunctionName.py
rename to lldb/test/API/tools/lldb-dap/stackTraceMissingFunctionName/TestDAP_stackTraceMissingFunctionName.py
index 8e7da6386d8e505..344629c1d5dad32 100644
--- a/lldb/test/API/tools/lldb-vscode/stackTraceMissingFunctionName/TestVSCode_stackTraceMissingFunctionName.py
+++ b/lldb/test/API/tools/lldb-dap/stackTraceMissingFunctionName/TestDAP_stackTraceMissingFunctionName.py
@@ -1,17 +1,17 @@
 """
-Test lldb-vscode stack trace response
+Test lldb-dap stack trace response
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 import os
 
-import lldbvscode_testcase
+import lldbdap_testcase
 from lldbsuite.test import lldbtest, lldbutil
 
 
-class TestVSCode_stackTraceMissingFunctionName(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_stackTraceMissingFunctionName(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_missingFunctionName(self):
diff --git a/lldb/test/API/tools/lldb-vscode/stackTraceMissingFunctionName/main.cpp b/lldb/test/API/tools/lldb-dap/stackTraceMissingFunctionName/main.cpp
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/stackTraceMissingFunctionName/main.cpp
rename to lldb/test/API/tools/lldb-dap/stackTraceMissingFunctionName/main.cpp
diff --git a/lldb/test/API/tools/lldb-vscode/startDebugging/Makefile b/lldb/test/API/tools/lldb-dap/startDebugging/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/startDebugging/Makefile
rename to lldb/test/API/tools/lldb-dap/startDebugging/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/startDebugging/TestVSCode_startDebugging.py b/lldb/test/API/tools/lldb-dap/startDebugging/TestDAP_startDebugging.py
similarity index 64%
rename from lldb/test/API/tools/lldb-vscode/startDebugging/TestVSCode_startDebugging.py
rename to lldb/test/API/tools/lldb-dap/startDebugging/TestDAP_startDebugging.py
index d8bc910c352334c..fd48e69cae5e252 100644
--- a/lldb/test/API/tools/lldb-vscode/startDebugging/TestVSCode_startDebugging.py
+++ b/lldb/test/API/tools/lldb-dap/startDebugging/TestDAP_startDebugging.py
@@ -1,16 +1,16 @@
 """
-Test lldb-vscode startDebugging reverse request
+Test lldb-dap startDebugging reverse request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_startDebugging(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_startDebugging(lldbdap_testcase.DAPTestCaseBase):
     def test_startDebugging(self):
         """
         Tests the "startDebugging" reverse request. It makes sure that the IDE can
@@ -24,16 +24,18 @@ def test_startDebugging(self):
 
         self.set_source_breakpoints(source, [breakpoint_line])
         self.continue_to_next_stop()
-        self.vscode.request_evaluate(
-            "`lldb-vscode startDebugging attach '{\"pid\":321}'", context="repl"
+        self.dap_server.request_evaluate(
+            "`lldb-dap startDebugging attach '{\"pid\":321}'", context="repl"
         )
 
         self.continue_to_exit()
 
         self.assertEqual(
-            len(self.vscode.reverse_requests), 1, "make sure we got a reverse request"
+            len(self.dap_server.reverse_requests),
+            1,
+            "make sure we got a reverse request",
         )
 
-        request = self.vscode.reverse_requests[0]
+        request = self.dap_server.reverse_requests[0]
         self.assertEqual(request["arguments"]["configuration"]["pid"], 321)
         self.assertEqual(request["arguments"]["request"], "attach")
diff --git a/lldb/test/API/tools/lldb-vscode/startDebugging/main.c b/lldb/test/API/tools/lldb-dap/startDebugging/main.c
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/startDebugging/main.c
rename to lldb/test/API/tools/lldb-dap/startDebugging/main.c
diff --git a/lldb/test/API/tools/lldb-vscode/step/Makefile b/lldb/test/API/tools/lldb-dap/step/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/step/Makefile
rename to lldb/test/API/tools/lldb-dap/step/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/step/TestVSCode_step.py b/lldb/test/API/tools/lldb-dap/step/TestDAP_step.py
similarity index 94%
rename from lldb/test/API/tools/lldb-vscode/step/TestVSCode_step.py
rename to lldb/test/API/tools/lldb-dap/step/TestDAP_step.py
index 14dd20d0b8af35d..578e64e36ea0554 100644
--- a/lldb/test/API/tools/lldb-vscode/step/TestVSCode_step.py
+++ b/lldb/test/API/tools/lldb-dap/step/TestDAP_step.py
@@ -1,16 +1,16 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_step(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_step(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_step(self):
@@ -29,7 +29,7 @@ def test_step(self):
             len(breakpoint_ids), len(lines), "expect correct number of breakpoints"
         )
         self.continue_to_breakpoints(breakpoint_ids)
-        threads = self.vscode.get_threads()
+        threads = self.dap_server.get_threads()
         for thread in threads:
             if "reason" in thread:
                 reason = thread["reason"]
diff --git a/lldb/test/API/tools/lldb-dap/step/main.cpp b/lldb/test/API/tools/lldb-dap/step/main.cpp
new file mode 100644
index 000000000000000..8905beb5e7eff6d
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/step/main.cpp
@@ -0,0 +1,8 @@
+int function(int x) {
+  if ((x % 2) == 0)
+    return function(x - 1) + x; // breakpoint 1
+  else
+    return x;
+}
+
+int main(int argc, char const *argv[]) { return function(2); }
diff --git a/lldb/test/API/tools/lldb-vscode/stop-hooks/Makefile b/lldb/test/API/tools/lldb-dap/stop-hooks/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/stop-hooks/Makefile
rename to lldb/test/API/tools/lldb-dap/stop-hooks/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/stop-hooks/TestVSCode_stop_hooks.py b/lldb/test/API/tools/lldb-dap/stop-hooks/TestDAP_stop_hooks.py
similarity index 82%
rename from lldb/test/API/tools/lldb-vscode/stop-hooks/TestVSCode_stop_hooks.py
rename to lldb/test/API/tools/lldb-dap/stop-hooks/TestDAP_stop_hooks.py
index 16a09f9d1484044..c538e8002a03263 100644
--- a/lldb/test/API/tools/lldb-vscode/stop-hooks/TestVSCode_stop_hooks.py
+++ b/lldb/test/API/tools/lldb-dap/stop-hooks/TestDAP_stop_hooks.py
@@ -5,14 +5,14 @@
 
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
-import lldbvscode_testcase
+import lldbdap_testcase
 
 
-class TestVSCode_stop_hooks(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_stop_hooks(lldbdap_testcase.DAPTestCaseBase):
     @skipIfRemote
     def test_stop_hooks_before_run(self):
         """
-        Test that there is no race condition between lldb-vscode and
+        Test that there is no race condition between lldb-dap and
         stop hooks executor
         """
         program = self.getBuildArtifact("a.out")
@@ -24,9 +24,9 @@ def test_stop_hooks_before_run(self):
 
         breakpoint_ids = self.set_function_breakpoints(["main"])
         # This request hangs if the race happens, because, in that case, the
-        # command interpreter is in synchronous mode while lldb-vscode expects
+        # command interpreter is in synchronous mode while lldb-dap expects
         # it to be in asynchronous mode, so, the process doesn't send the stop
-        # event to "lldb.Debugger" listener (which is monitored by lldb-vscode).
+        # event to "lldb.Debugger" listener (which is monitored by lldb-dap).
         self.continue_to_breakpoints(breakpoint_ids)
 
         self.continue_to_exit()
diff --git a/lldb/test/API/tools/lldb-vscode/stop-hooks/main.c b/lldb/test/API/tools/lldb-dap/stop-hooks/main.c
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/stop-hooks/main.c
rename to lldb/test/API/tools/lldb-dap/stop-hooks/main.c
diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/Makefile b/lldb/test/API/tools/lldb-dap/terminated-event/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/terminated-event/Makefile
rename to lldb/test/API/tools/lldb-dap/terminated-event/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py b/lldb/test/API/tools/lldb-dap/terminated-event/TestDAP_terminatedEvent.py
similarity index 89%
rename from lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py
rename to lldb/test/API/tools/lldb-dap/terminated-event/TestDAP_terminatedEvent.py
index 525de0b1201c4c1..78bc14c380f96f3 100644
--- a/lldb/test/API/tools/lldb-vscode/terminated-event/TestVSCode_terminatedEvent.py
+++ b/lldb/test/API/tools/lldb-dap/terminated-event/TestDAP_terminatedEvent.py
@@ -1,17 +1,17 @@
 """
-Test lldb-vscode terminated event
+Test lldb-dap terminated event
 """
 
-import vscode
+import dap_server
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
 from lldbsuite.test import lldbutil
-import lldbvscode_testcase
+import lldbdap_testcase
 import re
 import json
 
 
-class TestVSCode_terminatedEvent(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_terminatedEvent(lldbdap_testcase.DAPTestCaseBase):
     @skipIfWindows
     @skipIfRemote
     def test_terminated_event(self):
@@ -44,7 +44,7 @@ def test_terminated_event(self):
         self.continue_to_breakpoints(breakpoint_ids)
         self.continue_to_exit()
 
-        statistics = self.vscode.wait_for_terminated()["statistics"]
+        statistics = self.dap_server.wait_for_terminated()["statistics"]
         self.assertTrue(statistics["totalDebugInfoByteSize"] > 0)
         self.assertTrue(statistics["totalDebugInfoEnabled"] > 0)
         self.assertTrue(statistics["totalModuleCountHasDebugInfo"] > 0)
@@ -52,7 +52,7 @@ def test_terminated_event(self):
         self.assertIsNotNone(statistics["memory"])
         self.assertNotIn("modules", statistics.keys())
 
-        # lldb-vscode debugs one target at a time
+        # lldb-dap debugs one target at a time
         target = json.loads(statistics["targets"])[0]
         self.assertTrue(target["totalBreakpointResolveTime"] > 0)
 
diff --git a/lldb/test/API/tools/lldb-dap/terminated-event/foo.cpp b/lldb/test/API/tools/lldb-dap/terminated-event/foo.cpp
new file mode 100644
index 000000000000000..b6f33b8e070a4ed
--- /dev/null
+++ b/lldb/test/API/tools/lldb-dap/terminated-event/foo.cpp
@@ -0,0 +1 @@
+int foo() { return 12; }
diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/foo.h b/lldb/test/API/tools/lldb-dap/terminated-event/foo.h
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/terminated-event/foo.h
rename to lldb/test/API/tools/lldb-dap/terminated-event/foo.h
diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp b/lldb/test/API/tools/lldb-dap/terminated-event/main.cpp
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp
rename to lldb/test/API/tools/lldb-dap/terminated-event/main.cpp
index cd984e560e0d29e..50dd77c0a9c1db7 100644
--- a/lldb/test/API/tools/lldb-vscode/terminated-event/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/terminated-event/main.cpp
@@ -1,5 +1,5 @@
-#include <iostream>
 #include "foo.h"
+#include <iostream>
 
 int main(int argc, char const *argv[]) {
   std::cout << "Hello World!" << std::endl; // main breakpoint 1
diff --git a/lldb/test/API/tools/lldb-vscode/variables/Makefile b/lldb/test/API/tools/lldb-dap/variables/Makefile
similarity index 100%
rename from lldb/test/API/tools/lldb-vscode/variables/Makefile
rename to lldb/test/API/tools/lldb-dap/variables/Makefile
diff --git a/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
similarity index 89%
rename from lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py
rename to lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
index fc24b3b34e70283..11d9bf18db7a6e1 100644
--- a/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py
+++ b/lldb/test/API/tools/lldb-dap/variables/TestDAP_variables.py
@@ -1,11 +1,11 @@
 """
-Test lldb-vscode setBreakpoints request
+Test lldb-dap setBreakpoints request
 """
 
 import os
 
-import lldbvscode_testcase
-import vscode
+import lldbdap_testcase
+import dap_server
 from lldbsuite.test import lldbutil
 from lldbsuite.test.decorators import *
 from lldbsuite.test.lldbtest import *
@@ -18,7 +18,7 @@ def make_buffer_verify_dict(start_idx, count, offset=0):
     return verify_dict
 
 
-class TestVSCode_variables(lldbvscode_testcase.VSCodeTestCaseBase):
+class TestDAP_variables(lldbdap_testcase.DAPTestCaseBase):
     def verify_values(self, verify_dict, actual, varref_dict=None, expression=None):
         if "equals" in verify_dict:
             verify = verify_dict["equals"]
@@ -79,7 +79,7 @@ def verify_values(self, verify_dict, actual, varref_dict=None, expression=None):
                 ("children verify values specified for " "variable without children"),
             )
 
-            response = self.vscode.request_variables(varRef)
+            response = self.dap_server.request_variables(varRef)
             self.verify_variables(
                 verify_dict["children"], response["body"]["variables"], varref_dict
             )
@@ -111,7 +111,7 @@ def darwin_dwarf_missing_obj(self, initCommands):
         self.assertEquals(len(breakpoint_ids), len(functions), "expect one breakpoint")
         self.continue_to_breakpoints(breakpoint_ids)
 
-        locals = self.vscode.get_local_variables()
+        locals = self.dap_server.get_local_variables()
 
         verify_locals = {
             "<error>": {
@@ -147,8 +147,8 @@ def do_test_scopes_variables_setVariable_evaluate(
             len(breakpoint_ids), len(lines), "expect correct number of breakpoints"
         )
         self.continue_to_breakpoints(breakpoint_ids)
-        locals = self.vscode.get_local_variables()
-        globals = self.vscode.get_global_variables()
+        locals = self.dap_server.get_local_variables()
+        globals = self.dap_server.get_global_variables()
         buffer_children = make_buffer_verify_dict(0, 32)
         verify_locals = {
             "argc": {"equals": {"type": "int", "value": "1"}},
@@ -181,28 +181,28 @@ def do_test_scopes_variables_setVariable_evaluate(
         # has optional parameters like "start" and "count" to limit the number
         # of variables that are fetched
         varRef = varref_dict["pt.buffer"]
-        response = self.vscode.request_variables(varRef)
+        response = self.dap_server.request_variables(varRef)
         self.verify_variables(buffer_children, response["body"]["variables"])
         # Verify setting start=0 in the arguments still gets all children
-        response = self.vscode.request_variables(varRef, start=0)
+        response = self.dap_server.request_variables(varRef, start=0)
         self.verify_variables(buffer_children, response["body"]["variables"])
         # Verify setting count=0 in the arguments still gets all children.
         # If count is zero, it means to get all children.
-        response = self.vscode.request_variables(varRef, count=0)
+        response = self.dap_server.request_variables(varRef, count=0)
         self.verify_variables(buffer_children, response["body"]["variables"])
         # Verify setting count to a value that is too large in the arguments
         # still gets all children, and no more
-        response = self.vscode.request_variables(varRef, count=1000)
+        response = self.dap_server.request_variables(varRef, count=1000)
         self.verify_variables(buffer_children, response["body"]["variables"])
         # Verify setting the start index and count gets only the children we
         # want
-        response = self.vscode.request_variables(varRef, start=5, count=5)
+        response = self.dap_server.request_variables(varRef, start=5, count=5)
         self.verify_variables(
             make_buffer_verify_dict(5, 5), response["body"]["variables"]
         )
         # Verify setting the start index to a value that is out of range
         # results in an empty list
-        response = self.vscode.request_variables(varRef, start=32, count=1)
+        response = self.dap_server.request_variables(varRef, start=32, count=1)
         self.assertEqual(
             len(response["body"]["variables"]),
             0,
@@ -253,7 +253,7 @@ def do_test_scopes_variables_setVariable_evaluate(
             },
         }
         for expression in expressions:
-            response = self.vscode.request_evaluate(expression)
+            response = self.dap_server.request_evaluate(expression)
             self.verify_values(expressions[expression], response["body"])
 
         # Test setting variables
@@ -269,8 +269,8 @@ def do_test_scopes_variables_setVariable_evaluate(
 
         # Set a variable value whose name is synthetic, like a variable index
         # and verify the value by reading it
-        self.vscode.request_setVariable(varRef, "[0]", 100)
-        response = self.vscode.request_variables(varRef, start=0, count=1)
+        self.dap_server.request_setVariable(varRef, "[0]", 100)
+        response = self.dap_server.request_variables(varRef, start=0, count=1)
         self.verify_variables(
             make_buffer_verify_dict(0, 1, 100), response["body"]["variables"]
         )
@@ -278,8 +278,8 @@ def do_test_scopes_variables_setVariable_evaluate(
         # Set a variable value whose name is a real child value, like "pt.x"
         # and verify the value by reading it
         varRef = varref_dict["pt"]
-        self.vscode.request_setVariable(varRef, "x", 111)
-        response = self.vscode.request_variables(varRef, start=0, count=1)
+        self.dap_server.request_setVariable(varRef, "x", 111)
+        response = self.dap_server.request_variables(varRef, start=0, count=1)
         value = response["body"]["variables"][0]["value"]
         self.assertEqual(
             value, "111", "verify pt.x got set to 111 (111 != %s)" % (value)
@@ -301,40 +301,42 @@ def do_test_scopes_variables_setVariable_evaluate(
         verify_locals["x @ main.cpp:19"] = {"equals": {"type": "int", "value": "42"}}
         verify_locals["x @ main.cpp:21"] = {"equals": {"type": "int", "value": "72"}}
 
-        self.verify_variables(verify_locals, self.vscode.get_local_variables())
+        self.verify_variables(verify_locals, self.dap_server.get_local_variables())
 
         # Now we verify that we correctly change the name of a variable with and without differentiator suffix
-        self.assertFalse(self.vscode.request_setVariable(1, "x2", 9)["success"])
+        self.assertFalse(self.dap_server.request_setVariable(1, "x2", 9)["success"])
         self.assertFalse(
-            self.vscode.request_setVariable(1, "x @ main.cpp:0", 9)["success"]
+            self.dap_server.request_setVariable(1, "x @ main.cpp:0", 9)["success"]
         )
 
         self.assertTrue(
-            self.vscode.request_setVariable(1, "x @ main.cpp:17", 17)["success"]
+            self.dap_server.request_setVariable(1, "x @ main.cpp:17", 17)["success"]
         )
         self.assertTrue(
-            self.vscode.request_setVariable(1, "x @ main.cpp:19", 19)["success"]
+            self.dap_server.request_setVariable(1, "x @ main.cpp:19", 19)["success"]
         )
         self.assertTrue(
-            self.vscode.request_setVariable(1, "x @ main.cpp:21", 21)["success"]
+            self.dap_server.request_setVariable(1, "x @ main.cpp:21", 21)["success"]
         )
 
         # The following should have no effect
         self.assertFalse(
-            self.vscode.request_setVariable(1, "x @ main.cpp:21", "invalid")["success"]
+            self.dap_server.request_setVariable(1, "x @ main.cpp:21", "invalid")[
+                "success"
+            ]
         )
 
         verify_locals["x @ main.cpp:17"]["equals"]["value"] = "17"
         verify_locals["x @ main.cpp:19"]["equals"]["value"] = "19"
         verify_locals["x @ main.cpp:21"]["equals"]["value"] = "21"
 
-        self.verify_variables(verify_locals, self.vscode.get_local_variables())
+        self.verify_variables(verify_locals, self.dap_server.get_local_variables())
 
         # The plain x variable shold refer to the innermost x
-        self.assertTrue(self.vscode.request_setVariable(1, "x", 22)["success"])
+        self.assertTrue(self.dap_server.request_setVariable(1, "x", 22)["success"])
         verify_locals["x @ main.cpp:21"]["equals"]["value"] = "22"
 
-        self.verify_variables(verify_locals, self.vscode.get_local_variables())
+        self.verify_variables(verify_locals, self.dap_server.get_local_variables())
 
         # In breakpoint 3, there should be no shadowed variables
         breakpoint3_line = line_number(source, "// breakpoint 3")
@@ -345,7 +347,7 @@ def do_test_scopes_variables_setVariable_evaluate(
         )
         self.continue_to_breakpoints(breakpoint_ids)
 
-        locals = self.vscode.get_local_variables()
+        locals = self.dap_server.get_local_variables()
         names = [var["name"] for var in locals]
         # The first shadowed x shouldn't have a suffix anymore
         verify_locals["x"] = {"equals": {"type": "int", "value": "17"}}
@@ -389,7 +391,7 @@ def do_test_scopes_and_evaluate_expansion(self, enableAutoVariableSummaries: boo
         self.continue_to_breakpoints(breakpoint_ids)
 
         # Verify locals
-        locals = self.vscode.get_local_variables()
+        locals = self.dap_server.get_local_variables()
         buffer_children = make_buffer_verify_dict(0, 32)
         verify_locals = {
             "argc": {
@@ -451,7 +453,7 @@ def do_test_scopes_and_evaluate_expansion(self, enableAutoVariableSummaries: boo
 
         # Evaluate from permanent UI.
         permanent_expr_varref_dict = {}
-        response = self.vscode.request_evaluate(
+        response = self.dap_server.request_evaluate(
             expandable_expression["name"], frameIndex=0, threadId=None, context="repl"
         )
         self.verify_values(
@@ -463,7 +465,7 @@ def do_test_scopes_and_evaluate_expansion(self, enableAutoVariableSummaries: boo
 
         # Evaluate from temporary UI.
         temporary_expr_varref_dict = {}
-        response = self.vscode.request_evaluate(expandable_expression["name"])
+        response = self.dap_server.request_evaluate(expandable_expression["name"])
         self.verify_values(
             expandable_expression["response"],
             response["body"],
@@ -472,13 +474,13 @@ def do_test_scopes_and_evaluate_expansion(self, enableAutoVariableSummaries: boo
         )
 
         # Evaluate locals again.
-        locals = self.vscode.get_local_variables()
+        locals = self.dap_server.get_local_variables()
         self.verify_variables(verify_locals, locals)
 
         # Verify the evaluated expressions before second locals evaluation
         # can be expanded.
         var_ref = temporary_expr_varref_dict[expandable_expression["name"]]
-        response = self.vscode.request_variables(var_ref)
+        response = self.dap_server.request_variables(var_ref)
         self.verify_variables(
             expandable_expression["children"], response["body"]["variables"]
         )
@@ -494,14 +496,14 @@ def do_test_scopes_and_evaluate_expansion(self, enableAutoVariableSummaries: boo
         self.continue_to_breakpoints(breakpoint_ids)
 
         var_ref = permanent_expr_varref_dict[expandable_expression["name"]]
-        response = self.vscode.request_variables(var_ref)
+        response = self.dap_server.request_variables(var_ref)
         self.verify_variables(
             expandable_expression["children"], response["body"]["variables"]
         )
 
         # Test that frame scopes have corresponding presentation hints.
-        frame_id = self.vscode.get_stackFrame()["id"]
-        scopes = self.vscode.request_scopes(frame_id)["body"]["scopes"]
+        frame_id = self.dap_server.get_stackFrame()["id"]
+        scopes = self.dap_server.request_scopes(frame_id)["body"]["scopes"]
 
         scope_names = [scope["name"] for scope in scopes]
         self.assertIn("Locals", scope_names)
@@ -544,7 +546,7 @@ def do_test_indexedVariables(self, enableSyntheticChildDebugging: bool):
         self.continue_to_breakpoints(breakpoint_ids)
 
         # Verify locals
-        locals = self.vscode.get_local_variables()
+        locals = self.dap_server.get_local_variables()
         # The vector variables might have one additional entry from the fake
         # "[raw]" child.
         raw_child_count = 1 if enableSyntheticChildDebugging else 0
@@ -569,7 +571,7 @@ def do_test_indexedVariables(self, enableSyntheticChildDebugging: bool):
         if enableSyntheticChildDebugging:
             verify_children["[raw]"] = ({"contains": {"type": ["vector"]}},)
 
-        children = self.vscode.request_variables(locals[2]["variablesReference"])[
+        children = self.dap_server.request_variables(locals[2]["variablesReference"])[
             "body"
         ]["variables"]
         self.verify_variables(verify_children, children)
@@ -620,11 +622,11 @@ def test_registers(self):
         if pc_name is None:
             return
         # Verify locals
-        reg_sets = self.vscode.get_registers()
+        reg_sets = self.dap_server.get_registers()
         for reg_set in reg_sets:
             if reg_set["name"] == "General Purpose Registers":
                 varRef = reg_set["variablesReference"]
-                regs = self.vscode.request_variables(varRef)["body"]["variables"]
+                regs = self.dap_server.request_variables(varRef)["body"]["variables"]
                 for reg in regs:
                     if reg["name"] == pc_name:
                         value = reg["value"]
diff --git a/lldb/test/API/tools/lldb-vscode/variables/main.cpp b/lldb/test/API/tools/lldb-dap/variables/main.cpp
similarity index 90%
rename from lldb/test/API/tools/lldb-vscode/variables/main.cpp
rename to lldb/test/API/tools/lldb-dap/variables/main.cpp
index d81a9a20544a856..da09ecb474f3b29 100644
--- a/lldb/test/API/tools/lldb-vscode/variables/main.cpp
+++ b/lldb/test/API/tools/lldb-dap/variables/main.cpp
@@ -11,8 +11,8 @@ static int s_global = 234;
 int test_indexedVariables();
 int main(int argc, char const *argv[]) {
   static float s_local = 2.25;
-  PointType pt = { 11,22, {0}};
-  for (int i=0; i<BUFFER_SIZE; ++i)
+  PointType pt = {11, 22, {0}};
+  for (int i = 0; i < BUFFER_SIZE; ++i)
     pt.buffer[i] = i;
   int x = s_global - g_global - pt.y; // breakpoint 1
   {
diff --git a/lldb/test/API/tools/lldb-vscode/categories b/lldb/test/API/tools/lldb-vscode/categories
deleted file mode 100644
index ce2cfd048e38068..000000000000000
--- a/lldb/test/API/tools/lldb-vscode/categories
+++ /dev/null
@@ -1 +0,0 @@
-lldb-vscode
diff --git a/lldb/test/API/tools/lldb-vscode/disassemble/main.c b/lldb/test/API/tools/lldb-vscode/disassemble/main.c
deleted file mode 100644
index 8dd32f263c28bac..000000000000000
--- a/lldb/test/API/tools/lldb-vscode/disassemble/main.c
+++ /dev/null
@@ -1,30 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <limits.h>
- 
-int compare_ints(const void* a, const void* b)
-{
-    int arg1 = *(const int*)a;
-    int arg2 = *(const int*)b; 
-    
-    // breakpoint 1
- 
-    if (arg1 < arg2) return -1;
-    if (arg1 > arg2) return 1;
-    return 0;
-}
- 
-int main(void)
-{
-    int ints[] = { -2, 99, 0, -743, 2, INT_MIN, 4 };
-    int size = sizeof ints / sizeof *ints;
- 
-    qsort(ints, size, sizeof(int), compare_ints);
- 
-    for (int i = 0; i < size; i++) {
-        printf("%d ", ints[i]);
-    }
- 
-    printf("\n");
-    return 0;
-}
\ No newline at end of file
diff --git a/lldb/test/API/tools/lldb-vscode/module/foo.cpp b/lldb/test/API/tools/lldb-vscode/module/foo.cpp
deleted file mode 100644
index 9dba85a9cccab88..000000000000000
--- a/lldb/test/API/tools/lldb-vscode/module/foo.cpp
+++ /dev/null
@@ -1,3 +0,0 @@
-int foo() {
-    return 12;
-}
diff --git a/lldb/test/API/tools/lldb-vscode/step/main.cpp b/lldb/test/API/tools/lldb-vscode/step/main.cpp
deleted file mode 100644
index 3027551972fcc62..000000000000000
--- a/lldb/test/API/tools/lldb-vscode/step/main.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-int function(int x) {
-  if ((x % 2) == 0)
-    return function(x-1) + x; // breakpoint 1
-  else
-    return x;
-}
-
-int main(int argc, char const *argv[]) {
-  return function(2);
-}
diff --git a/lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp b/lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp
deleted file mode 100644
index 9dba85a9cccab88..000000000000000
--- a/lldb/test/API/tools/lldb-vscode/terminated-event/foo.cpp
+++ /dev/null
@@ -1,3 +0,0 @@
-int foo() {
-    return 12;
-}
diff --git a/lldb/test/Shell/VSCode/TestOptions.test b/lldb/test/Shell/DAP/TestOptions.test
similarity index 70%
rename from lldb/test/Shell/VSCode/TestOptions.test
rename to lldb/test/Shell/DAP/TestOptions.test
index 05588ecd9df6c1c..e37e9116e3cddb5 100644
--- a/lldb/test/Shell/VSCode/TestOptions.test
+++ b/lldb/test/Shell/DAP/TestOptions.test
@@ -1,4 +1,4 @@
-# RUN: lldb-vscode --help | FileCheck %s
+# RUN: lldb-dap --help | FileCheck %s
 # CHECK: -g
 # CHECK: --help
 # CHECK: -h
diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py
index 8b56c659b13bf78..255955fc70d8c41 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -90,7 +90,7 @@ def use_lldb_substitutions(config):
             unresolved="ignore",
         ),
         "lldb-test",
-        "lldb-vscode",
+        "lldb-dap",
         ToolSubst(
             "%build", command="'" + sys.executable + "'", extra_args=build_script_args
         ),
diff --git a/lldb/tools/CMakeLists.txt b/lldb/tools/CMakeLists.txt
index 16a2c7956aeff4d..6804dc234555b18 100644
--- a/lldb/tools/CMakeLists.txt
+++ b/lldb/tools/CMakeLists.txt
@@ -9,7 +9,7 @@ add_subdirectory(lldb-test EXCLUDE_FROM_ALL)
 add_subdirectory(lldb-fuzzer EXCLUDE_FROM_ALL)
 
 add_lldb_tool_subdirectory(lldb-instr)
-add_lldb_tool_subdirectory(lldb-vscode)
+add_lldb_tool_subdirectory(lldb-dap)
 
 if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
   add_lldb_tool_subdirectory(darwin-debug)
diff --git a/lldb/tools/lldb-vscode/BreakpointBase.cpp b/lldb/tools/lldb-dap/BreakpointBase.cpp
similarity index 97%
rename from lldb/tools/lldb-vscode/BreakpointBase.cpp
rename to lldb/tools/lldb-dap/BreakpointBase.cpp
index 9ec392c9afe471b..cd12f97fb13dfe9 100644
--- a/lldb/tools/lldb-vscode/BreakpointBase.cpp
+++ b/lldb/tools/lldb-dap/BreakpointBase.cpp
@@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "BreakpointBase.h"
-#include "VSCode.h"
+#include "DAP.h"
 #include "llvm/ADT/StringExtras.h"
 
-using namespace lldb_vscode;
+using namespace lldb_dap;
 
 BreakpointBase::BreakpointBase(const llvm::json::Object &obj)
     : condition(std::string(GetString(obj, "condition"))),
@@ -270,7 +270,7 @@ void BreakpointBase::SetLogMessage() {
 void BreakpointBase::NotifyLogMessageError(llvm::StringRef error) {
   std::string message = "Log message has error: ";
   message += error;
-  g_vsc.SendOutput(OutputType::Console, message);
+  g_dap.SendOutput(OutputType::Console, message);
 }
 
 /*static*/
@@ -304,7 +304,7 @@ bool BreakpointBase::BreakpointHitCallback(
   }
   if (!output.empty() && output.back() != '\n')
     output.push_back('\n'); // Ensure log message has line break.
-  g_vsc.SendOutput(OutputType::Console, output.c_str());
+  g_dap.SendOutput(OutputType::Console, output.c_str());
 
   // Do not stop.
   return false;
@@ -328,7 +328,7 @@ void BreakpointBase::UpdateBreakpoint(const BreakpointBase &request_bp) {
 const char *BreakpointBase::GetBreakpointLabel() {
   // Breakpoints in LLDB can have names added to them which are kind of like
   // labels or categories. All breakpoints that are set through the IDE UI get
-  // sent through the various VS code DAP set*Breakpoint packets, and these
+  // sent through the various DAP set*Breakpoint packets, and these
   // breakpoints will be labeled with this name so if breakpoint update events
   // come in for breakpoints that the IDE doesn't know about, like if a
   // breakpoint is set manually using the debugger console, we won't report any
@@ -338,5 +338,5 @@ const char *BreakpointBase::GetBreakpointLabel() {
   // in via LLDB breakpoint changed events and check the breakpoint by calling
   // "bool lldb::SBBreakpoint::MatchesName(const char *)" to check if a
   // breakpoint in one of the UI breakpoints that we should report changes for.
-  return "vscode";
+  return "dap";
 }
diff --git a/lldb/tools/lldb-vscode/BreakpointBase.h b/lldb/tools/lldb-dap/BreakpointBase.h
similarity index 93%
rename from lldb/tools/lldb-vscode/BreakpointBase.h
rename to lldb/tools/lldb-dap/BreakpointBase.h
index 91e0dd600163c47..41787f786102157 100644
--- a/lldb/tools/lldb-vscode/BreakpointBase.h
+++ b/lldb/tools/lldb-dap/BreakpointBase.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_BREAKPOINTBASE_H
-#define LLDB_TOOLS_LLDB_VSCODE_BREAKPOINTBASE_H
+#ifndef LLDB_TOOLS_LLDB_DAP_BREAKPOINTBASE_H
+#define LLDB_TOOLS_LLDB_DAP_BREAKPOINTBASE_H
 
 #include "JSONUtils.h"
 #include "lldb/API/SBBreakpoint.h"
@@ -15,7 +15,7 @@
 #include <string>
 #include <vector>
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 struct BreakpointBase {
   // logMessage part can be either a raw text or an expression.
@@ -58,6 +58,6 @@ struct BreakpointBase {
                                     lldb::SBBreakpointLocation &location);
 };
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-vscode/CMakeLists.txt b/lldb/tools/lldb-dap/CMakeLists.txt
similarity index 79%
rename from lldb/tools/lldb-vscode/CMakeLists.txt
rename to lldb/tools/lldb-dap/CMakeLists.txt
index b68a4f3f67ab90f..c71c80981890bd5 100644
--- a/lldb/tools/lldb-vscode/CMakeLists.txt
+++ b/lldb/tools/lldb-dap/CMakeLists.txt
@@ -9,11 +9,11 @@ endif ()
 
 if(APPLE)
   configure_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/lldb-vscode-Info.plist.in
-    ${CMAKE_CURRENT_BINARY_DIR}/lldb-vscode-Info.plist
+    ${CMAKE_CURRENT_SOURCE_DIR}/lldb-dap-Info.plist.in
+    ${CMAKE_CURRENT_BINARY_DIR}/lldb-dap-Info.plist
     )
   # Inline info plist in binary (use target_link_options for this as soon as CMake 3.13 is available)
-  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-sectcreate,__TEXT,__info_plist,${CMAKE_CURRENT_BINARY_DIR}/lldb-vscode-Info.plist")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-sectcreate,__TEXT,__info_plist,${CMAKE_CURRENT_BINARY_DIR}/lldb-dap-Info.plist")
 endif()
 
 # We need to include the llvm components we depend on manually, as liblldb does
@@ -21,9 +21,9 @@ endif()
 set(LLVM_LINK_COMPONENTS Support)
 set(LLVM_TARGET_DEFINITIONS Options.td)
 tablegen(LLVM Options.inc -gen-opt-parser-defs)
-add_public_tablegen_target(LLDBVSCodeOptionsTableGen)
-add_lldb_tool(lldb-vscode
-  lldb-vscode.cpp
+add_public_tablegen_target(LLDBDAPOptionsTableGen)
+add_lldb_tool(lldb-dap
+  lldb-dap.cpp
   BreakpointBase.cpp
   ExceptionBreakpoint.cpp
   FifoFiles.cpp
@@ -35,7 +35,7 @@ add_lldb_tool(lldb-vscode
   ProgressEvent.cpp
   RunInTerminal.cpp
   SourceBreakpoint.cpp
-  VSCode.cpp
+  DAP.cpp
 
   LINK_LIBS
     liblldb
@@ -49,7 +49,7 @@ add_lldb_tool(lldb-vscode
 if(LLDB_BUILD_FRAMEWORK)
   # In the build-tree, we know the exact path to the framework directory.
   # The installed framework can be in different locations.
-  lldb_setup_rpaths(lldb-vscode
+  lldb_setup_rpaths(lldb-dap
     BUILD_RPATH
       "${LLDB_FRAMEWORK_ABSOLUTE_BUILD_DIR}"
     INSTALL_RPATH
diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-dap/DAP.cpp
similarity index 86%
rename from lldb/tools/lldb-vscode/VSCode.cpp
rename to lldb/tools/lldb-dap/DAP.cpp
index 1384604c48371b7..af20e6460685eb3 100644
--- a/lldb/tools/lldb-vscode/VSCode.cpp
+++ b/lldb/tools/lldb-dap/DAP.cpp
@@ -1,4 +1,4 @@
-//===-- VSCode.cpp ----------------------------------------------*- C++ -*-===//
+//===-- DAP.cpp -------------------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -12,8 +12,8 @@
 #include <mutex>
 #include <sstream>
 
+#include "DAP.h"
 #include "LLDBUtils.h"
-#include "VSCode.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 
@@ -24,14 +24,14 @@
 #include <windows.h>
 #endif
 
-using namespace lldb_vscode;
+using namespace lldb_dap;
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
-VSCode g_vsc;
+DAP g_dap;
 
-VSCode::VSCode()
-    : broadcaster("lldb-vscode"),
+DAP::DAP()
+    : broadcaster("lldb-dap"),
       exception_breakpoints(
           {{"cpp_catch", "C++ Catch", lldb::eLanguageTypeC_plus_plus},
            {"cpp_throw", "C++ Throw", lldb::eLanguageTypeC_plus_plus},
@@ -49,7 +49,7 @@ VSCode::VSCode()
           [&](const ProgressEvent &event) { SendJSON(event.ToJSON()); }),
       reverse_request_seq(0), repl_mode(ReplMode::Auto),
       auto_repl_mode_collision_warning(false) {
-  const char *log_file_path = getenv("LLDBVSCODE_LOG");
+  const char *log_file_path = getenv("LLDBDAP_LOG");
 #if defined(_WIN32)
   // Windows opens stdout and stdin in text mode which converts \n to 13,10
   // while the value is just 10 on Darwin/Linux. Setting the file mode to binary
@@ -64,9 +64,9 @@ VSCode::VSCode()
     log.reset(new std::ofstream(log_file_path));
 }
 
-VSCode::~VSCode() = default;
+DAP::~DAP() = default;
 
-ExceptionBreakpoint *VSCode::GetExceptionBreakpoint(const std::string &filter) {
+ExceptionBreakpoint *DAP::GetExceptionBreakpoint(const std::string &filter) {
   for (auto &bp : exception_breakpoints) {
     if (bp.filter == filter)
       return &bp;
@@ -74,8 +74,7 @@ ExceptionBreakpoint *VSCode::GetExceptionBreakpoint(const std::string &filter) {
   return nullptr;
 }
 
-ExceptionBreakpoint *
-VSCode::GetExceptionBreakpoint(const lldb::break_id_t bp_id) {
+ExceptionBreakpoint *DAP::GetExceptionBreakpoint(const lldb::break_id_t bp_id) {
   for (auto &bp : exception_breakpoints) {
     if (bp.bp.GetID() == bp_id)
       return &bp;
@@ -86,7 +85,7 @@ VSCode::GetExceptionBreakpoint(const lldb::break_id_t bp_id) {
 // Send the JSON in "json_str" to the "out" stream. Correctly send the
 // "Content-Length:" field followed by the length, followed by the raw
 // JSON bytes.
-void VSCode::SendJSON(const std::string &json_str) {
+void DAP::SendJSON(const std::string &json_str) {
   output.write_full("Content-Length: ");
   output.write_full(llvm::utostr(json_str.size()));
   output.write_full("\r\n\r\n");
@@ -95,7 +94,7 @@ void VSCode::SendJSON(const std::string &json_str) {
 
 // Serialize the JSON value into a string and send the JSON packet to
 // the "out" stream.
-void VSCode::SendJSON(const llvm::json::Value &json) {
+void DAP::SendJSON(const llvm::json::Value &json) {
   std::string s;
   llvm::raw_string_ostream strm(s);
   strm << json;
@@ -112,7 +111,7 @@ void VSCode::SendJSON(const llvm::json::Value &json) {
 }
 
 // Read a JSON packet from the "in" stream.
-std::string VSCode::ReadJSON() {
+std::string DAP::ReadJSON() {
   std::string length_str;
   std::string json_str;
   int length;
@@ -198,7 +197,7 @@ std::string VSCode::ReadJSON() {
 //     "required": [ "event", "body" ]
 //   }]
 // }
-void VSCode::SendOutput(OutputType o, const llvm::StringRef output) {
+void DAP::SendOutput(OutputType o, const llvm::StringRef output) {
   if (output.empty())
     return;
 
@@ -318,13 +317,13 @@ void VSCode::SendOutput(OutputType o, const llvm::StringRef output) {
 //   };
 // }
 
-void VSCode::SendProgressEvent(uint64_t progress_id, const char *message,
-                               uint64_t completed, uint64_t total) {
+void DAP::SendProgressEvent(uint64_t progress_id, const char *message,
+                            uint64_t completed, uint64_t total) {
   progress_event_reporter.Push(progress_id, message, completed, total);
 }
 
 void __attribute__((format(printf, 3, 4)))
-VSCode::SendFormattedOutput(OutputType o, const char *format, ...) {
+DAP::SendFormattedOutput(OutputType o, const char *format, ...) {
   char buffer[1024];
   va_list args;
   va_start(args, format);
@@ -334,8 +333,7 @@ VSCode::SendFormattedOutput(OutputType o, const char *format, ...) {
       o, llvm::StringRef(buffer, std::min<int>(actual_length, sizeof(buffer))));
 }
 
-ExceptionBreakpoint *
-VSCode::GetExceptionBPFromStopReason(lldb::SBThread &thread) {
+ExceptionBreakpoint *DAP::GetExceptionBPFromStopReason(lldb::SBThread &thread) {
   const auto num = thread.GetStopReasonDataCount();
   // Check to see if have hit an exception breakpoint and change the
   // reason to "exception", but only do so if all breakpoints that were
@@ -356,12 +354,12 @@ VSCode::GetExceptionBPFromStopReason(lldb::SBThread &thread) {
   return exc_bp;
 }
 
-lldb::SBThread VSCode::GetLLDBThread(const llvm::json::Object &arguments) {
+lldb::SBThread DAP::GetLLDBThread(const llvm::json::Object &arguments) {
   auto tid = GetSigned(arguments, "threadId", LLDB_INVALID_THREAD_ID);
   return target.GetProcess().GetThreadByID(tid);
 }
 
-lldb::SBFrame VSCode::GetLLDBFrame(const llvm::json::Object &arguments) {
+lldb::SBFrame DAP::GetLLDBFrame(const llvm::json::Object &arguments) {
   const uint64_t frame_id = GetUnsigned(arguments, "frameId", UINT64_MAX);
   lldb::SBProcess process = target.GetProcess();
   // Upper 32 bits is the thread index ID
@@ -371,19 +369,19 @@ lldb::SBFrame VSCode::GetLLDBFrame(const llvm::json::Object &arguments) {
   return thread.GetFrameAtIndex(GetLLDBFrameID(frame_id));
 }
 
-llvm::json::Value VSCode::CreateTopLevelScopes() {
+llvm::json::Value DAP::CreateTopLevelScopes() {
   llvm::json::Array scopes;
   scopes.emplace_back(CreateScope("Locals", VARREF_LOCALS,
-                                  g_vsc.variables.locals.GetSize(), false));
+                                  g_dap.variables.locals.GetSize(), false));
   scopes.emplace_back(CreateScope("Globals", VARREF_GLOBALS,
-                                  g_vsc.variables.globals.GetSize(), false));
+                                  g_dap.variables.globals.GetSize(), false));
   scopes.emplace_back(CreateScope("Registers", VARREF_REGS,
-                                  g_vsc.variables.registers.GetSize(), false));
+                                  g_dap.variables.registers.GetSize(), false));
   return llvm::json::Value(std::move(scopes));
 }
 
-ExpressionContext VSCode::DetectExpressionContext(lldb::SBFrame &frame,
-                                                  std::string &text) {
+ExpressionContext DAP::DetectExpressionContext(lldb::SBFrame &frame,
+                                               std::string &text) {
   // Include ` as an escape hatch.
   if (!text.empty() && text[0] == '`') {
     text = text.substr(1);
@@ -433,35 +431,35 @@ ExpressionContext VSCode::DetectExpressionContext(lldb::SBFrame &frame,
   return ExpressionContext::Variable;
 }
 
-void VSCode::RunLLDBCommands(llvm::StringRef prefix,
-                             const std::vector<std::string> &commands) {
+void DAP::RunLLDBCommands(llvm::StringRef prefix,
+                          const std::vector<std::string> &commands) {
   SendOutput(OutputType::Console,
              llvm::StringRef(::RunLLDBCommands(prefix, commands)));
 }
 
-void VSCode::RunInitCommands() {
+void DAP::RunInitCommands() {
   RunLLDBCommands("Running initCommands:", init_commands);
 }
 
-void VSCode::RunPreRunCommands() {
+void DAP::RunPreRunCommands() {
   RunLLDBCommands("Running preRunCommands:", pre_run_commands);
 }
 
-void VSCode::RunStopCommands() {
+void DAP::RunStopCommands() {
   RunLLDBCommands("Running stopCommands:", stop_commands);
 }
 
-void VSCode::RunExitCommands() {
+void DAP::RunExitCommands() {
   RunLLDBCommands("Running exitCommands:", exit_commands);
 }
 
-void VSCode::RunTerminateCommands() {
+void DAP::RunTerminateCommands() {
   RunLLDBCommands("Running terminateCommands:", terminate_commands);
 }
 
 lldb::SBTarget
-VSCode::CreateTargetFromArguments(const llvm::json::Object &arguments,
-                                  lldb::SBError &error) {
+DAP::CreateTargetFromArguments(const llvm::json::Object &arguments,
+                               lldb::SBError &error) {
   // Grab the name of the program we need to debug and create a target using
   // the given program as an argument. Executable file can be a source of target
   // architecture and platform, if they differ from the host. Setting exe path
@@ -490,7 +488,7 @@ VSCode::CreateTargetFromArguments(const llvm::json::Object &arguments,
   return target;
 }
 
-void VSCode::SetTarget(const lldb::SBTarget target) {
+void DAP::SetTarget(const lldb::SBTarget target) {
   this->target = target;
 
   if (target.IsValid()) {
@@ -504,7 +502,7 @@ void VSCode::SetTarget(const lldb::SBTarget target) {
   }
 }
 
-PacketStatus VSCode::GetNextObject(llvm::json::Object &object) {
+PacketStatus DAP::GetNextObject(llvm::json::Object &object) {
   std::string json = ReadJSON();
   if (json.empty())
     return PacketStatus::EndOfFile;
@@ -538,7 +536,7 @@ PacketStatus VSCode::GetNextObject(llvm::json::Object &object) {
   return PacketStatus::Success;
 }
 
-bool VSCode::HandleObject(const llvm::json::Object &object) {
+bool DAP::HandleObject(const llvm::json::Object &object) {
   const auto packet_type = GetString(object, "type");
   if (packet_type == "request") {
     const auto command = GetString(object, "command");
@@ -591,16 +589,16 @@ bool VSCode::HandleObject(const llvm::json::Object &object) {
   return false;
 }
 
-llvm::Error VSCode::Loop() {
+llvm::Error DAP::Loop() {
   while (!sent_terminated_event) {
     llvm::json::Object object;
-    lldb_vscode::PacketStatus status = GetNextObject(object);
+    lldb_dap::PacketStatus status = GetNextObject(object);
 
-    if (status == lldb_vscode::PacketStatus::EndOfFile) {
+    if (status == lldb_dap::PacketStatus::EndOfFile) {
       break;
     }
 
-    if (status != lldb_vscode::PacketStatus::Success) {
+    if (status != lldb_dap::PacketStatus::Success) {
       return llvm::createStringError(llvm::inconvertibleErrorCode(),
                                      "failed to send packet");
     }
@@ -614,9 +612,9 @@ llvm::Error VSCode::Loop() {
   return llvm::Error::success();
 }
 
-void VSCode::SendReverseRequest(llvm::StringRef command,
-                                llvm::json::Value arguments,
-                                ResponseCallback callback) {
+void DAP::SendReverseRequest(llvm::StringRef command,
+                             llvm::json::Value arguments,
+                             ResponseCallback callback) {
   int64_t id;
   {
     std::lock_guard<std::mutex> locker(call_mutex);
@@ -632,12 +630,12 @@ void VSCode::SendReverseRequest(llvm::StringRef command,
   });
 }
 
-void VSCode::RegisterRequestCallback(std::string request,
-                                     RequestCallback callback) {
+void DAP::RegisterRequestCallback(std::string request,
+                                  RequestCallback callback) {
   request_handlers[request] = callback;
 }
 
-lldb::SBError VSCode::WaitForProcessToStop(uint32_t seconds) {
+lldb::SBError DAP::WaitForProcessToStop(uint32_t seconds) {
   lldb::SBError error;
   lldb::SBProcess process = target.GetProcess();
   if (!process.IsValid()) {
@@ -649,26 +647,26 @@ lldb::SBError VSCode::WaitForProcessToStop(uint32_t seconds) {
   while (std::chrono::steady_clock::now() < timeout_time) {
     const auto state = process.GetState();
     switch (state) {
-      case lldb::eStateAttaching:
-      case lldb::eStateConnected:
-      case lldb::eStateInvalid:
-      case lldb::eStateLaunching:
-      case lldb::eStateRunning:
-      case lldb::eStateStepping:
-      case lldb::eStateSuspended:
-        break;
-      case lldb::eStateDetached:
-        error.SetErrorString("process detached during launch or attach");
-        return error;
-      case lldb::eStateExited:
-        error.SetErrorString("process exited during launch or attach");
-        return error;
-      case lldb::eStateUnloaded:
-        error.SetErrorString("process unloaded during launch or attach");
-        return error;
-      case lldb::eStateCrashed:
-      case lldb::eStateStopped:
-        return lldb::SBError(); // Success!
+    case lldb::eStateAttaching:
+    case lldb::eStateConnected:
+    case lldb::eStateInvalid:
+    case lldb::eStateLaunching:
+    case lldb::eStateRunning:
+    case lldb::eStateStepping:
+    case lldb::eStateSuspended:
+      break;
+    case lldb::eStateDetached:
+      error.SetErrorString("process detached during launch or attach");
+      return error;
+    case lldb::eStateExited:
+      error.SetErrorString("process exited during launch or attach");
+      return error;
+    case lldb::eStateUnloaded:
+      error.SetErrorString("process unloaded during launch or attach");
+      return error;
+    case lldb::eStateCrashed:
+    case lldb::eStateStopped:
+      return lldb::SBError(); // Success!
     }
     std::this_thread::sleep_for(std::chrono::microseconds(250));
   }
@@ -759,7 +757,7 @@ bool StartDebuggingRequestHandler::DoExecute(
     return false;
   }
 
-  g_vsc.SendReverseRequest(
+  g_dap.SendReverseRequest(
       "startDebugging",
       llvm::json::Object{{"request", request},
                          {"configuration", std::move(*configuration)}},
@@ -783,7 +781,7 @@ bool ReplModeRequestHandler::DoExecute(lldb::SBDebugger debugger,
   // If a new mode is not specified report the current mode.
   if (!command || llvm::StringRef(command[0]).empty()) {
     std::string mode;
-    switch (g_vsc.repl_mode) {
+    switch (g_dap.repl_mode) {
     case ReplMode::Variable:
       mode = "variable";
       break;
@@ -795,7 +793,7 @@ bool ReplModeRequestHandler::DoExecute(lldb::SBDebugger debugger,
       break;
     }
 
-    result.Printf("lldb-vscode repl-mode %s.\n", mode.c_str());
+    result.Printf("lldb-dap repl-mode %s.\n", mode.c_str());
     result.SetStatus(lldb::eReturnStatusSuccessFinishResult);
 
     return true;
@@ -804,11 +802,11 @@ bool ReplModeRequestHandler::DoExecute(lldb::SBDebugger debugger,
   llvm::StringRef new_mode{command[0]};
 
   if (new_mode == "variable") {
-    g_vsc.repl_mode = ReplMode::Variable;
+    g_dap.repl_mode = ReplMode::Variable;
   } else if (new_mode == "command") {
-    g_vsc.repl_mode = ReplMode::Command;
+    g_dap.repl_mode = ReplMode::Command;
   } else if (new_mode == "auto") {
-    g_vsc.repl_mode = ReplMode::Auto;
+    g_dap.repl_mode = ReplMode::Auto;
   } else {
     lldb::SBStream error_message;
     error_message.Printf("Invalid repl-mode '%s'. Expected one of 'variable', "
@@ -818,9 +816,9 @@ bool ReplModeRequestHandler::DoExecute(lldb::SBDebugger debugger,
     return false;
   }
 
-  result.Printf("lldb-vscode repl-mode %s set.\n", new_mode.data());
+  result.Printf("lldb-dap repl-mode %s set.\n", new_mode.data());
   result.SetStatus(lldb::eReturnStatusSuccessFinishNoResult);
   return true;
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-dap/DAP.h
similarity index 96%
rename from lldb/tools/lldb-vscode/VSCode.h
rename to lldb/tools/lldb-dap/DAP.h
index 59bb11c71e67203..e13d91a9df5d274 100644
--- a/lldb/tools/lldb-vscode/VSCode.h
+++ b/lldb/tools/lldb-dap/DAP.h
@@ -1,4 +1,4 @@
-//===-- VSCode.h ------------------------------------------------*- C++ -*-===//
+//===-- DAP.h ---------------------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H
-#define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H
+#ifndef LLDB_TOOLS_LLDB_DAP_DAP_H
+#define LLDB_TOOLS_LLDB_DAP_DAP_H
 
 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 
@@ -62,13 +62,13 @@
 #define VARREF_FIRST_VAR_IDX (int64_t)4
 #define NO_TYPENAME "<no-type>"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 typedef llvm::DenseMap<uint32_t, SourceBreakpoint> SourceBreakpointMap;
 typedef llvm::StringMap<FunctionBreakpoint> FunctionBreakpointMap;
 enum class OutputType { Console, Stdout, Stderr, Telemetry };
 
-enum VSCodeBroadcasterBits {
+enum DAPBroadcasterBits {
   eBroadcastBitStopEventThread = 1u << 0,
   eBroadcastBitStopProgressThread = 1u << 1
 };
@@ -141,7 +141,7 @@ struct ReplModeRequestHandler : public lldb::SBCommandPluginInterface {
                  lldb::SBCommandReturnObject &result) override;
 };
 
-struct VSCode {
+struct DAP {
   std::string debug_adaptor_path;
   InputStream input;
   OutputStream output;
@@ -189,10 +189,10 @@ struct VSCode {
   ReplMode repl_mode;
   bool auto_repl_mode_collision_warning;
 
-  VSCode();
-  ~VSCode();
-  VSCode(const VSCode &rhs) = delete;
-  void operator=(const VSCode &rhs) = delete;
+  DAP();
+  ~DAP();
+  DAP(const DAP &rhs) = delete;
+  void operator=(const DAP &rhs) = delete;
   ExceptionBreakpoint *GetExceptionBreakpoint(const std::string &filter);
   ExceptionBreakpoint *GetExceptionBreakpoint(const lldb::break_id_t bp_id);
 
@@ -245,7 +245,7 @@ struct VSCode {
   lldb::SBTarget CreateTargetFromArguments(const llvm::json::Object &arguments,
                                            lldb::SBError &error);
 
-  /// Set given target object as a current target for lldb-vscode and start
+  /// Set given target object as a current target for lldb-dap and start
   /// listeing for its breakpoint events.
   void SetTarget(const lldb::SBTarget target);
 
@@ -311,8 +311,8 @@ struct VSCode {
   void SendJSON(const std::string &json_str);
 };
 
-extern VSCode g_vsc;
+extern DAP g_dap;
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-vscode/VSCodeForward.h b/lldb/tools/lldb-dap/DAPForward.h
similarity index 81%
rename from lldb/tools/lldb-vscode/VSCodeForward.h
rename to lldb/tools/lldb-dap/DAPForward.h
index 92eb5757d18038f..fffff1e3f790209 100644
--- a/lldb/tools/lldb-vscode/VSCodeForward.h
+++ b/lldb/tools/lldb-dap/DAPForward.h
@@ -1,4 +1,4 @@
-//===-- VSCodeForward.h -----------------------------------------*- C++ -*-===//
+//===-- DAPForward.h --------------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,15 +6,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODEFORWARD_H
-#define LLDB_TOOLS_LLDB_VSCODE_VSCODEFORWARD_H
+#ifndef LLDB_TOOLS_LLDB_DAP_DAPFORWARD_H
+#define LLDB_TOOLS_LLDB_DAP_DAPFORWARD_H
 
-namespace lldb_vscode {
+namespace lldb_dap {
 struct BreakpointBase;
 struct ExceptionBreakpoint;
 struct FunctionBreakpoint;
 struct SourceBreakpoint;
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
 namespace lldb {
 class SBAttachInfo;
diff --git a/lldb/tools/lldb-vscode/ExceptionBreakpoint.cpp b/lldb/tools/lldb-dap/ExceptionBreakpoint.cpp
similarity index 84%
rename from lldb/tools/lldb-vscode/ExceptionBreakpoint.cpp
rename to lldb/tools/lldb-dap/ExceptionBreakpoint.cpp
index 9324e66d8bd7e31..130c237e65441dd 100644
--- a/lldb/tools/lldb-vscode/ExceptionBreakpoint.cpp
+++ b/lldb/tools/lldb-dap/ExceptionBreakpoint.cpp
@@ -8,16 +8,16 @@
 
 #include "ExceptionBreakpoint.h"
 #include "BreakpointBase.h"
-#include "VSCode.h"
+#include "DAP.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 void ExceptionBreakpoint::SetBreakpoint() {
   if (bp.IsValid())
     return;
   bool catch_value = filter.find("_catch") != std::string::npos;
   bool throw_value = filter.find("_throw") != std::string::npos;
-  bp = g_vsc.target.BreakpointCreateForException(language, catch_value,
+  bp = g_dap.target.BreakpointCreateForException(language, catch_value,
                                                  throw_value);
   // See comments in BreakpointBase::GetBreakpointLabel() for details of why
   // we add a label to our breakpoints.
@@ -27,8 +27,8 @@ void ExceptionBreakpoint::SetBreakpoint() {
 void ExceptionBreakpoint::ClearBreakpoint() {
   if (!bp.IsValid())
     return;
-  g_vsc.target.BreakpointDelete(bp.GetID());
+  g_dap.target.BreakpointDelete(bp.GetID());
   bp = lldb::SBBreakpoint();
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/ExceptionBreakpoint.h b/lldb/tools/lldb-dap/ExceptionBreakpoint.h
similarity index 83%
rename from lldb/tools/lldb-vscode/ExceptionBreakpoint.h
rename to lldb/tools/lldb-dap/ExceptionBreakpoint.h
index 203630ccf40ab37..7b81d845cb26bef 100644
--- a/lldb/tools/lldb-vscode/ExceptionBreakpoint.h
+++ b/lldb/tools/lldb-dap/ExceptionBreakpoint.h
@@ -6,14 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_EXCEPTIONBREAKPOINT_H
-#define LLDB_TOOLS_LLDB_VSCODE_EXCEPTIONBREAKPOINT_H
+#ifndef LLDB_TOOLS_LLDB_DAP_EXCEPTIONBREAKPOINT_H
+#define LLDB_TOOLS_LLDB_DAP_EXCEPTIONBREAKPOINT_H
 
 #include <string>
 
 #include "lldb/API/SBBreakpoint.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 struct ExceptionBreakpoint {
   std::string filter;
@@ -29,6 +29,6 @@ struct ExceptionBreakpoint {
   void ClearBreakpoint();
 };
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-vscode/FifoFiles.cpp b/lldb/tools/lldb-dap/FifoFiles.cpp
similarity index 98%
rename from lldb/tools/lldb-vscode/FifoFiles.cpp
rename to lldb/tools/lldb-dap/FifoFiles.cpp
index a5330d58c36c656..9a6423f79471a40 100644
--- a/lldb/tools/lldb-vscode/FifoFiles.cpp
+++ b/lldb/tools/lldb-dap/FifoFiles.cpp
@@ -26,7 +26,7 @@
 
 using namespace llvm;
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 FifoFile::FifoFile(StringRef path) : m_path(path) {}
 
@@ -102,4 +102,4 @@ Error FifoFileIO::SendJSON(const json::Value &json,
   return Error::success();
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/FifoFiles.h b/lldb/tools/lldb-dap/FifoFiles.h
similarity index 93%
rename from lldb/tools/lldb-vscode/FifoFiles.h
rename to lldb/tools/lldb-dap/FifoFiles.h
index a0c4562b5a6b7da..02a97cd5cbbd23c 100644
--- a/lldb/tools/lldb-vscode/FifoFiles.h
+++ b/lldb/tools/lldb-dap/FifoFiles.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_FIFOFILES_H
-#define LLDB_TOOLS_LLDB_VSCODE_FIFOFILES_H
+#ifndef LLDB_TOOLS_LLDB_DAP_FIFOFILES_H
+#define LLDB_TOOLS_LLDB_DAP_FIFOFILES_H
 
 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/Support/Error.h"
@@ -16,7 +16,7 @@
 
 #include <chrono>
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 /// Struct that controls the life of a fifo file in the filesystem.
 ///
@@ -82,6 +82,6 @@ class FifoFileIO {
   std::string m_other_endpoint_name;
 };
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
-#endif // LLDB_TOOLS_LLDB_VSCODE_FIFOFILES_H
+#endif // LLDB_TOOLS_LLDB_DAP_FIFOFILES_H
diff --git a/lldb/tools/lldb-vscode/FunctionBreakpoint.cpp b/lldb/tools/lldb-dap/FunctionBreakpoint.cpp
similarity index 87%
rename from lldb/tools/lldb-vscode/FunctionBreakpoint.cpp
rename to lldb/tools/lldb-dap/FunctionBreakpoint.cpp
index b7a8705d854c8db..d4bdb976500ecd8 100644
--- a/lldb/tools/lldb-vscode/FunctionBreakpoint.cpp
+++ b/lldb/tools/lldb-dap/FunctionBreakpoint.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "FunctionBreakpoint.h"
-#include "VSCode.h"
+#include "DAP.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 FunctionBreakpoint::FunctionBreakpoint(const llvm::json::Object &obj)
     : BreakpointBase(obj), functionName(std::string(GetString(obj, "name"))) {}
@@ -17,7 +17,7 @@ FunctionBreakpoint::FunctionBreakpoint(const llvm::json::Object &obj)
 void FunctionBreakpoint::SetBreakpoint() {
   if (functionName.empty())
     return;
-  bp = g_vsc.target.BreakpointCreateByName(functionName.c_str());
+  bp = g_dap.target.BreakpointCreateByName(functionName.c_str());
   // See comments in BreakpointBase::GetBreakpointLabel() for details of why
   // we add a label to our breakpoints.
   bp.AddName(GetBreakpointLabel());
@@ -29,4 +29,4 @@ void FunctionBreakpoint::SetBreakpoint() {
     SetLogMessage();
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/FunctionBreakpoint.h b/lldb/tools/lldb-dap/FunctionBreakpoint.h
similarity index 80%
rename from lldb/tools/lldb-vscode/FunctionBreakpoint.h
rename to lldb/tools/lldb-dap/FunctionBreakpoint.h
index 82eec89cd18d2ef..fc23e94e128763e 100644
--- a/lldb/tools/lldb-vscode/FunctionBreakpoint.h
+++ b/lldb/tools/lldb-dap/FunctionBreakpoint.h
@@ -6,12 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_FUNCTIONBREAKPOINT_H
-#define LLDB_TOOLS_LLDB_VSCODE_FUNCTIONBREAKPOINT_H
+#ifndef LLDB_TOOLS_LLDB_DAP_FUNCTIONBREAKPOINT_H
+#define LLDB_TOOLS_LLDB_DAP_FUNCTIONBREAKPOINT_H
 
 #include "BreakpointBase.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 struct FunctionBreakpoint : public BreakpointBase {
   std::string functionName;
@@ -23,6 +23,6 @@ struct FunctionBreakpoint : public BreakpointBase {
   void SetBreakpoint();
 };
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-vscode/IOStream.cpp b/lldb/tools/lldb-dap/IOStream.cpp
similarity index 99%
rename from lldb/tools/lldb-vscode/IOStream.cpp
rename to lldb/tools/lldb-dap/IOStream.cpp
index 6e2a89c5df9e05b..897ab791ed062d7 100644
--- a/lldb/tools/lldb-vscode/IOStream.cpp
+++ b/lldb/tools/lldb-dap/IOStream.cpp
@@ -20,7 +20,7 @@
 #include <string>
 #include <vector>
 
-using namespace lldb_vscode;
+using namespace lldb_dap;
 
 StreamDescriptor::StreamDescriptor() = default;
 
diff --git a/lldb/tools/lldb-vscode/IOStream.h b/lldb/tools/lldb-dap/IOStream.h
similarity index 93%
rename from lldb/tools/lldb-vscode/IOStream.h
rename to lldb/tools/lldb-dap/IOStream.h
index 0eb9b6fefb0dca4..b62502419182cd3 100644
--- a/lldb/tools/lldb-vscode/IOStream.h
+++ b/lldb/tools/lldb-dap/IOStream.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_IOSTREAM_H
-#define LLDB_TOOLS_LLDB_VSCODE_IOSTREAM_H
+#ifndef LLDB_TOOLS_LLDB_DAP_IOSTREAM_H
+#define LLDB_TOOLS_LLDB_DAP_IOSTREAM_H
 
 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 
@@ -32,7 +32,7 @@ typedef int SOCKET;
 // types of files, so we can't simply have one code path that just uses read
 // and write everywhere.  So we need an abstraction in order to allow us to
 // treat them identically.
-namespace lldb_vscode {
+namespace lldb_dap {
 struct StreamDescriptor {
   StreamDescriptor();
   ~StreamDescriptor();
@@ -66,6 +66,6 @@ struct OutputStream {
 
   bool write_full(llvm::StringRef str);
 };
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp
similarity index 98%
rename from lldb/tools/lldb-vscode/JSONUtils.cpp
rename to lldb/tools/lldb-dap/JSONUtils.cpp
index 6cf753170d8429f..0daa8c11c1fa6d3 100644
--- a/lldb/tools/lldb-vscode/JSONUtils.cpp
+++ b/lldb/tools/lldb-dap/JSONUtils.cpp
@@ -24,12 +24,12 @@
 #include "lldb/API/SBValue.h"
 #include "lldb/Host/PosixApi.h"
 
+#include "DAP.h"
 #include "ExceptionBreakpoint.h"
 #include "JSONUtils.h"
 #include "LLDBUtils.h"
-#include "VSCode.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 void EmplaceSafeString(llvm::json::Object &obj, llvm::StringRef key,
                        llvm::StringRef str) {
@@ -140,7 +140,7 @@ TryCreateAutoSummaryForContainer(lldb::SBValue &v) {
   // We gate this feature because it performs GetNumChildren(), which can
   // cause performance issues because LLDB needs to complete possibly huge
   // types.
-  if (!g_vsc.enable_auto_variable_summaries)
+  if (!g_dap.enable_auto_variable_summaries)
     return std::nullopt;
 
   if (!v.MightHaveChildren())
@@ -197,7 +197,7 @@ TryCreateAutoSummaryForContainer(lldb::SBValue &v) {
 /// Try to create a summary string for the given value that doesn't have a
 /// summary of its own.
 static std::optional<std::string> TryCreateAutoSummary(lldb::SBValue value) {
-  if (!g_vsc.enable_auto_variable_summaries)
+  if (!g_dap.enable_auto_variable_summaries)
     return std::nullopt;
 
   // We use the dereferenced value for generating the summary.
@@ -326,7 +326,7 @@ llvm::json::Value CreateScope(const llvm::StringRef name,
   llvm::json::Object object;
   EmplaceSafeString(object, "name", name.str());
 
-  // TODO: Support "arguments" scope. At the moment lldb-vscode includes the
+  // TODO: Support "arguments" scope. At the moment lldb-dap includes the
   // arguments into the "locals" scope.
   if (variablesReference == VARREF_LOCALS) {
     object.try_emplace("presentationHint", "locals");
@@ -425,7 +425,7 @@ llvm::json::Value CreateBreakpoint(lldb::SBBreakpoint &bp,
 
   if (bp_addr.IsValid()) {
     std::string formatted_addr =
-        "0x" + llvm::utohexstr(bp_addr.GetLoadAddress(g_vsc.target));
+        "0x" + llvm::utohexstr(bp_addr.GetLoadAddress(g_dap.target));
     object.try_emplace("instructionReference", formatted_addr);
     auto line_entry = bp_addr.GetLineEntry();
     const auto line = line_entry.GetLine();
@@ -513,7 +513,7 @@ llvm::json::Value CreateModule(lldb::SBModule &module) {
     object.try_emplace("symbolStatus", "Symbols not found.");
   }
   std::string loaded_addr = std::to_string(
-      module.GetObjectFileHeaderAddress().GetLoadAddress(g_vsc.target));
+      module.GetObjectFileHeaderAddress().GetLoadAddress(g_dap.target));
   object.try_emplace("addressRange", loaded_addr);
   std::string version_str;
   uint32_t version_nums[3];
@@ -775,7 +775,7 @@ std::optional<llvm::json::Value> CreateSource(lldb::SBFrame &frame) {
 // }
 llvm::json::Value CreateStackFrame(lldb::SBFrame &frame) {
   llvm::json::Object object;
-  int64_t frame_id = MakeVSCodeFrameID(frame);
+  int64_t frame_id = MakeDAPFrameID(frame);
   object.try_emplace("id", frame_id);
 
   // `function_name` can be a nullptr, which throws an error when assigned to an
@@ -932,7 +932,7 @@ llvm::json::Value CreateThreadStopped(lldb::SBThread &thread,
     body.try_emplace("reason", "step");
     break;
   case lldb::eStopReasonBreakpoint: {
-    ExceptionBreakpoint *exc_bp = g_vsc.GetExceptionBPFromStopReason(thread);
+    ExceptionBreakpoint *exc_bp = g_dap.GetExceptionBPFromStopReason(thread);
     if (exc_bp) {
       body.try_emplace("reason", "exception");
       EmplaceSafeString(body, "description", exc_bp->label);
@@ -990,10 +990,10 @@ llvm::json::Value CreateThreadStopped(lldb::SBThread &thread,
     }
   }
   // "threadCausedFocus" is used in tests to validate breaking behavior.
-  if (tid == g_vsc.focus_tid) {
+  if (tid == g_dap.focus_tid) {
     body.try_emplace("threadCausedFocus", true);
   }
-  body.try_emplace("preserveFocusHint", tid != g_vsc.focus_tid);
+  body.try_emplace("preserveFocusHint", tid != g_dap.focus_tid);
   body.try_emplace("allThreadsStopped", true);
   event.try_emplace("body", std::move(body));
   return llvm::json::Value(std::move(event));
@@ -1119,7 +1119,7 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference,
     // We create a "[raw]" fake child for each synthetic type, so we have to
     // account for it when returning indexed variables. We don't need to do this
     // for non-indexed ones.
-    bool has_raw_child = is_synthetic && g_vsc.enable_synthetic_child_debugging;
+    bool has_raw_child = is_synthetic && g_dap.enable_synthetic_child_debugging;
     int actual_num_children = num_children + (has_raw_child ? 1 : 0);
     if (is_array) {
       object.try_emplace("indexedVariables", actual_num_children);
@@ -1170,8 +1170,8 @@ CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request,
 
   auto launch_request_arguments = launch_request.getObject("arguments");
   // The program path must be the first entry in the "args" field
-  std::vector<std::string> args = {
-      debug_adaptor_path.str(), "--comm-file", comm_file.str()};
+  std::vector<std::string> args = {debug_adaptor_path.str(), "--comm-file",
+                                   comm_file.str()};
   if (debugger_pid != LLDB_INVALID_PROCESS_ID) {
     args.push_back("--debugger-pid");
     args.push_back(std::to_string(debugger_pid));
@@ -1248,7 +1248,7 @@ void FilterAndGetValueForKey(const lldb::SBStructuredData data, const char *key,
 }
 
 void addStatistic(llvm::json::Object &event) {
-  lldb::SBStructuredData statistics = g_vsc.target.GetStatistics();
+  lldb::SBStructuredData statistics = g_dap.target.GetStatistics();
   bool is_dictionary =
       statistics.GetType() == lldb::eStructuredDataTypeDictionary;
   if (!is_dictionary)
@@ -1279,4 +1279,4 @@ std::string JSONToString(const llvm::json::Value &json) {
   return data;
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-dap/JSONUtils.h
similarity index 94%
rename from lldb/tools/lldb-vscode/JSONUtils.h
rename to lldb/tools/lldb-dap/JSONUtils.h
index 2013147f5d3532a..ade8ed68226918f 100644
--- a/lldb/tools/lldb-vscode/JSONUtils.h
+++ b/lldb/tools/lldb-dap/JSONUtils.h
@@ -6,17 +6,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_JSONUTILS_H
-#define LLDB_TOOLS_LLDB_VSCODE_JSONUTILS_H
+#ifndef LLDB_TOOLS_LLDB_DAP_JSONUTILS_H
+#define LLDB_TOOLS_LLDB_DAP_JSONUTILS_H
 
-#include "VSCodeForward.h"
+#include "DAPForward.h"
 #include "lldb/API/SBModule.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/JSON.h"
 #include <cstdint>
 #include <optional>
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 /// Emplace a StringRef in a json::Object after enusring that the
 /// string is valid UTF8. If not, first call llvm::json::fixUTF8
@@ -154,7 +154,7 @@ std::vector<std::string> GetStrings(const llvm::json::Object *obj,
 /// and "success" set to true.
 ///
 /// \param[in] request
-///     The request object received from a call to VSCode::ReadJSON().
+///     The request object received from a call to DAP::ReadJSON().
 ///
 /// \param[in,out] response
 ///     An empty llvm::json::Object object that will be filled
@@ -214,7 +214,7 @@ void AppendBreakpoint(
     std::optional<llvm::StringRef> request_path = std::nullopt,
     std::optional<uint32_t> request_line = std::nullopt);
 
-/// Converts breakpoint location to a Visual Studio Code "Breakpoint"
+/// Converts breakpoint location to a debug adaptor protocol "Breakpoint".
 ///
 /// \param[in] bp
 ///     A LLDB breakpoint object to convert into a JSON value
@@ -233,11 +233,10 @@ void AppendBreakpoint(
 ///     fallback.
 ///
 /// \param[in] request_column
-///     An optional column to use when creating the resulting "Breakpoint" object.
-///     It is used if the breakpoint has no valid locations.
-///     It is useful to ensure the same column
-///     provided by the setBreakpoints request are returned to the IDE as a
-///     fallback.
+///     An optional column to use when creating the resulting "Breakpoint"
+///     object. It is used if the breakpoint has no valid locations. It is
+///     useful to ensure the same column provided by the setBreakpoints request
+///     are returned to the IDE as a fallback.
 ///
 /// \return
 ///     A "Breakpoint" JSON object with that follows the formal JSON
@@ -269,7 +268,7 @@ llvm::json::Value CreateModule(lldb::SBModule &module);
 llvm::json::Object CreateEventObject(const llvm::StringRef event_name);
 
 /// Create a "ExceptionBreakpointsFilter" JSON object as described in
-/// the Visual Studio Code debug adaptor definition.
+/// the debug adaptor definition.
 ///
 /// \param[in] bp
 ///     The exception breakpoint object to use
@@ -280,8 +279,7 @@ llvm::json::Object CreateEventObject(const llvm::StringRef event_name);
 llvm::json::Value
 CreateExceptionBreakpointFilter(const ExceptionBreakpoint &bp);
 
-/// Create a "Scope" JSON object as described in the Visual Studio Code
-/// debug adaptor definition.
+/// Create a "Scope" JSON object as described in the debug adaptor definition.
 ///
 /// \param[in] name
 ///     The value to place into the "name" key
@@ -302,8 +300,7 @@ llvm::json::Value CreateScope(const llvm::StringRef name,
                               int64_t variablesReference,
                               int64_t namedVariables, bool expensive);
 
-/// Create a "Source" JSON object as described in the Visual Studio Code
-/// debug adaptor definition.
+/// Create a "Source" JSON object as described in the debug adaptor definition.
 ///
 /// \param[in] line_entry
 ///     The LLDB line table to use when populating out the "Source"
@@ -330,7 +327,7 @@ llvm::json::Value CreateSource(llvm::StringRef source_path);
 /// object:
 ///   "id" - the stack frame ID as an integer
 ///   "name" - the function name as a string
-///   "source" - source file information as a "Source" VSCode object
+///   "source" - source file information as a "Source" DAP object
 ///   "line" - the source file line number as an integer
 ///   "column" - the source file column number as an integer
 ///
@@ -404,7 +401,7 @@ std::string CreateUniqueVariableNameForDisplay(lldb::SBValue v,
 ///   "value" - the value of the variable as a string
 ///   "type" - the typename of the variable as a string
 ///   "id" - a unique identifier for a value in case there are multiple
-///          variables with the same name. Other parts of the VSCode
+///          variables with the same name. Other parts of the DAP
 ///          protocol refer to values by name so this can help
 ///          disambiguate such cases if a IDE passes this "id" value
 ///          back down.
@@ -468,7 +465,7 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit);
 ///     The fifo file used to communicate the with the target launcher.
 ///
 /// \param[in] debugger_pid
-///     The PID of the lldb-vscode instance that will attach to the target. The
+///     The PID of the lldb-dap instance that will attach to the target. The
 ///     launcher uses it on Linux tell the kernel that it should allow the
 ///     debugger process to attach.
 ///
@@ -490,6 +487,6 @@ llvm::json::Object CreateTerminatedEventObject();
 /// Convert a given JSON object to a string.
 std::string JSONToString(const llvm::json::Value &json);
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-vscode/LLDBUtils.cpp b/lldb/tools/lldb-dap/LLDBUtils.cpp
similarity index 92%
rename from lldb/tools/lldb-vscode/LLDBUtils.cpp
rename to lldb/tools/lldb-dap/LLDBUtils.cpp
index 464195bdc6444c9..955c11f636895bc 100644
--- a/lldb/tools/lldb-vscode/LLDBUtils.cpp
+++ b/lldb/tools/lldb-dap/LLDBUtils.cpp
@@ -7,16 +7,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLDBUtils.h"
-#include "VSCode.h"
+#include "DAP.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 void RunLLDBCommands(llvm::StringRef prefix,
                      const llvm::ArrayRef<std::string> &commands,
                      llvm::raw_ostream &strm) {
   if (commands.empty())
     return;
-  lldb::SBCommandInterpreter interp = g_vsc.debugger.GetCommandInterpreter();
+  lldb::SBCommandInterpreter interp = g_dap.debugger.GetCommandInterpreter();
   if (!prefix.empty())
     strm << prefix << "\n";
   for (const auto &command : commands) {
@@ -78,9 +78,9 @@ uint32_t GetLLDBFrameID(uint64_t dap_frame_id) {
   return dap_frame_id & ((1u << THREAD_INDEX_SHIFT) - 1);
 }
 
-int64_t MakeVSCodeFrameID(lldb::SBFrame &frame) {
+int64_t MakeDAPFrameID(lldb::SBFrame &frame) {
   return ((int64_t)frame.GetThread().GetIndexID() << THREAD_INDEX_SHIFT) |
          frame.GetFrameID();
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/LLDBUtils.h b/lldb/tools/lldb-dap/LLDBUtils.h
similarity index 82%
rename from lldb/tools/lldb-vscode/LLDBUtils.h
rename to lldb/tools/lldb-dap/LLDBUtils.h
index 8867589b18a08c0..a99f798835370d1 100644
--- a/lldb/tools/lldb-vscode/LLDBUtils.h
+++ b/lldb/tools/lldb-dap/LLDBUtils.h
@@ -6,17 +6,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_LLDBUTILS_H
-#define LLDB_TOOLS_LLDB_VSCODE_LLDBUTILS_H
+#ifndef LLDB_TOOLS_LLDB_DAP_LLDBUTILS_H
+#define LLDB_TOOLS_LLDB_DAP_LLDBUTILS_H
 
-#include "VSCodeForward.h"
+#include "DAPForward.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 #include <vector>
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 /// Run a list of LLDB commands in the LLDB command interpreter.
 ///
@@ -68,7 +68,7 @@ bool ThreadHasStopReason(lldb::SBThread &thread);
 /// Given a LLDB frame, make a frame ID that is unique to a specific
 /// thread and frame.
 ///
-/// VSCode requires a Stackframe "id" to be unique, so we use the frame
+/// DAP requires a Stackframe "id" to be unique, so we use the frame
 /// index in the lower 32 bits and the thread index ID in the upper 32
 /// bits.
 ///
@@ -78,34 +78,34 @@ bool ThreadHasStopReason(lldb::SBThread &thread);
 /// \return
 ///     A unique integer that allows us to easily find the right
 ///     stack frame within a thread on subsequent VS code requests.
-int64_t MakeVSCodeFrameID(lldb::SBFrame &frame);
+int64_t MakeDAPFrameID(lldb::SBFrame &frame);
 
-/// Given a VSCode frame ID, convert to a LLDB thread index id.
+/// Given a DAP frame ID, convert to a LLDB thread index id.
 ///
-/// VSCode requires a Stackframe "id" to be unique, so we use the frame
+/// DAP requires a Stackframe "id" to be unique, so we use the frame
 /// index in the lower THREAD_INDEX_SHIFT bits and the thread index ID in
 /// the upper 32 - THREAD_INDEX_SHIFT bits.
 ///
 /// \param[in] dap_frame_id
-///     The VSCode frame ID to convert to a thread index ID.
+///     The DAP frame ID to convert to a thread index ID.
 ///
 /// \return
 ///     The LLDB thread index ID.
 uint32_t GetLLDBThreadIndexID(uint64_t dap_frame_id);
 
-/// Given a VSCode frame ID, convert to a LLDB frame ID.
+/// Given a DAP frame ID, convert to a LLDB frame ID.
 ///
-/// VSCode requires a Stackframe "id" to be unique, so we use the frame
+/// DAP requires a Stackframe "id" to be unique, so we use the frame
 /// index in the lower THREAD_INDEX_SHIFT bits and the thread index ID in
 /// the upper 32 - THREAD_INDEX_SHIFT bits.
 ///
 /// \param[in] dap_frame_id
-///     The VSCode frame ID to convert to a frame ID.
+///     The DAP frame ID to convert to a frame ID.
 ///
 /// \return
 ///     The LLDB frame index ID.
 uint32_t GetLLDBFrameID(uint64_t dap_frame_id);
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-vscode/Options.td b/lldb/tools/lldb-dap/Options.td
similarity index 85%
rename from lldb/tools/lldb-vscode/Options.td
rename to lldb/tools/lldb-dap/Options.td
index a6ba0a318f388a1..571967b232b4a26 100644
--- a/lldb/tools/lldb-vscode/Options.td
+++ b/lldb/tools/lldb-dap/Options.td
@@ -6,7 +6,7 @@ class R<list<string> prefixes, string name>
   : Option<prefixes, name, KIND_REMAINING_ARGS>;
 
 def help: F<"help">,
-  HelpText<"Prints out the usage information for the LLDB VSCode tool.">;
+  HelpText<"Prints out the usage information for the lldb-dap tool.">;
 def: Flag<["-"], "h">,
   Alias<help>,
   HelpText<"Alias for --help">;
@@ -19,7 +19,7 @@ def: Flag<["-"], "g">,
 
 def port: S<"port">,
   MetaVarName<"<port>">,
-  HelpText<"Communicate with the lldb-vscode tool over the defined port.">;
+  HelpText<"Communicate with the lldb-dap tool over the defined port.">;
 def: Separate<["-"], "p">,
   Alias<port>,
   HelpText<"Alias for --port">;
@@ -37,7 +37,7 @@ def comm_file: S<"comm-file">,
 
 def debugger_pid: S<"debugger-pid">,
   MetaVarName<"<pid>">,
-  HelpText<"The PID of the lldb-vscode instance that sent the launchInTerminal "
+  HelpText<"The PID of the lldb-dap instance that sent the launchInTerminal "
     "request when using --launch-target.">;
 
 def repl_mode: S<"repl-mode">,
diff --git a/lldb/tools/lldb-vscode/OutputRedirector.cpp b/lldb/tools/lldb-dap/OutputRedirector.cpp
similarity index 96%
rename from lldb/tools/lldb-vscode/OutputRedirector.cpp
rename to lldb/tools/lldb-dap/OutputRedirector.cpp
index 9243915f7d78711..4e6907ce6c78068 100644
--- a/lldb/tools/lldb-vscode/OutputRedirector.cpp
+++ b/lldb/tools/lldb-dap/OutputRedirector.cpp
@@ -18,7 +18,7 @@
 
 using namespace llvm;
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 Error RedirectFd(int fd, std::function<void(llvm::StringRef)> callback) {
   int new_fd[2];
@@ -59,4 +59,4 @@ Error RedirectFd(int fd, std::function<void(llvm::StringRef)> callback) {
   return Error::success();
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/OutputRedirector.h b/lldb/tools/lldb-dap/OutputRedirector.h
similarity index 77%
rename from lldb/tools/lldb-vscode/OutputRedirector.h
rename to lldb/tools/lldb-dap/OutputRedirector.h
index c728367a2185533..dba51016775bf45 100644
--- a/lldb/tools/lldb-vscode/OutputRedirector.h
+++ b/lldb/tools/lldb-dap/OutputRedirector.h
@@ -6,15 +6,15 @@
 //
 //===----------------------------------------------------------------------===/
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_OUTPUT_REDIRECTOR_H
-#define LLDB_TOOLS_LLDB_VSCODE_OUTPUT_REDIRECTOR_H
+#ifndef LLDB_TOOLS_LLDB_DAP_OUTPUT_REDIRECTOR_H
+#define LLDB_TOOLS_LLDB_DAP_OUTPUT_REDIRECTOR_H
 
 #include <thread>
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 /// Redirects the output of a given file descriptor to a callback.
 ///
@@ -23,6 +23,6 @@ namespace lldb_vscode {
 ///     otherwise.
 llvm::Error RedirectFd(int fd, std::function<void(llvm::StringRef)> callback);
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
-#endif // LLDB_TOOLS_LLDB_VSCODE_OUTPUT_REDIRECTOR_H
+#endif // LLDB_TOOLS_LLDB_DAP_OUTPUT_REDIRECTOR_H
diff --git a/lldb/tools/lldb-vscode/ProgressEvent.cpp b/lldb/tools/lldb-dap/ProgressEvent.cpp
similarity index 99%
rename from lldb/tools/lldb-vscode/ProgressEvent.cpp
rename to lldb/tools/lldb-dap/ProgressEvent.cpp
index 2930b5c1e24e929..8a660b50af1205b 100644
--- a/lldb/tools/lldb-vscode/ProgressEvent.cpp
+++ b/lldb/tools/lldb-dap/ProgressEvent.cpp
@@ -12,7 +12,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include <optional>
 
-using namespace lldb_vscode;
+using namespace lldb_dap;
 using namespace llvm;
 
 // The minimum duration of an event for it to be reported
diff --git a/lldb/tools/lldb-vscode/ProgressEvent.h b/lldb/tools/lldb-dap/ProgressEvent.h
similarity index 97%
rename from lldb/tools/lldb-vscode/ProgressEvent.h
rename to lldb/tools/lldb-dap/ProgressEvent.h
index 85006e4c0a810a3..dac21977add2d04 100644
--- a/lldb/tools/lldb-vscode/ProgressEvent.h
+++ b/lldb/tools/lldb-dap/ProgressEvent.h
@@ -12,17 +12,13 @@
 #include <queue>
 #include <thread>
 
-#include "VSCodeForward.h"
+#include "DAPForward.h"
 
 #include "llvm/Support/JSON.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
-enum ProgressEventType {
-  progressStart,
-  progressUpdate,
-  progressEnd
-};
+enum ProgressEventType { progressStart, progressUpdate, progressEnd };
 
 class ProgressEvent;
 using ProgressEventReportCallback = std::function<void(ProgressEvent &)>;
@@ -157,4 +153,4 @@ class ProgressEventReporter {
   std::mutex m_mutex;
 };
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/README.md b/lldb/tools/lldb-dap/README.md
similarity index 83%
rename from lldb/tools/lldb-vscode/README.md
rename to lldb/tools/lldb-dap/README.md
index 078129026cb0cc9..00ceb0bedc40a4f 100644
--- a/lldb/tools/lldb-vscode/README.md
+++ b/lldb/tools/lldb-dap/README.md
@@ -18,9 +18,10 @@
 
 # Introduction
 
-The `lldb-vscode` tool creates a command line tool that implements the [Visual
-Studio Code Debug API](https://code.visualstudio.com/docs/extensionAPI/api-debugging).
-It can be installed as an extension for the Visual Studio Code and Nuclide IDE.
+The `lldb-dap` tool (formerly `lldb-vscode`) creates a command line tool that
+implements the [Debug Adapter
+Protocol](https://microsoft.github.io/debug-adapter-protocol/). It can be
+installed as an extension for Visual Studio Code and other IDEs supporting DAP.
 The protocol is easy to run remotely and also can allow other tools and IDEs to
 get a full featured debugger with a well defined protocol.
 
@@ -30,7 +31,7 @@ Installing the plug-in involves creating a directory in any location outside of
 `~/.vscode/extensions`. For example, `~/vscode-lldb` is a valid one. You'll also
 need a subfolder `bin`, e.g. `~/vscode-lldb/bin`. Then copy the `package.json`
 file that is in the same directory as this documentation into it, and symlink
-the `lldb-vscode` binary into the `bin` directory inside the plug-in directory.
+the `lldb-dap` binary into the `bin` directory inside the plug-in directory.
 
 Finally, on VS Code, execute the command
 `Developer: Install Extension from Location` and pick the folder you just
@@ -40,10 +41,10 @@ If you want to make a stand alone plug-in that you can send to others on UNIX
 systems:
 
 ```bash
-mkdir -p ~/llvm-org.lldb-vscode-0.1.0/bin
-cp package.json ~/llvm-org.lldb-vscode-0.1.0
-cd ~/llvm-org.lldb-vscode-0.1.0/bin
-cp /path/to/a/built/lldb-vscode .
+mkdir -p ~/llvm-org.lldb-dap-0.1.0/bin
+cp package.json ~/llvm-org.lldb-dap-0.1.0
+cd ~/llvm-org.lldb-dap-0.1.0/bin
+cp /path/to/a/built/lldb-dap .
 cp /path/to/a/built/liblldb.so .
 ```
 
@@ -51,48 +52,48 @@ If you want to make a stand alone plug-in that you can send to others on macOS
 systems:
 
 ```bash
-mkdir -p ~/llvm-org.lldb-vscode-0.1.0/bin
-cp package.json ~/llvm-org.lldb-vscode-0.1.0
-cd ~/llvm-org.lldb-vscode-0.1.0/bin
-cp /path/to/a/built/lldb-vscode .
+mkdir -p ~/llvm-org.lldb-dap-0.1.0/bin
+cp package.json ~/llvm-org.lldb-dap-0.1.0
+cd ~/llvm-org.lldb-dap-0.1.0/bin
+cp /path/to/a/built/lldb-dap .
 rsync -av /path/to/a/built/LLDB.framework LLDB.framework
 ```
 
 You might need to create additional directories for the `liblldb.so` or
 `LLDB.framework` inside or next to the `bin` folder depending on how the
-[rpath](https://en.wikipedia.org/wiki/Rpath) is set in your `lldb-vscode`
+[rpath](https://en.wikipedia.org/wiki/Rpath) is set in your `lldb-dap`
 binary. By default the `Debug` builds of LLDB usually includes
 the current executable directory in the rpath, so these steps should work for
 most people.
 
-To create a plug-in that symlinks into your `lldb-vscode` in your build
+To create a plug-in that symlinks into your `lldb-dap` in your build
 directory:
 
 ```bash
-mkdir -p ~/llvm-org.lldb-vscode-0.1.0/bin
-cp package.json ~/llvm-org.lldb-vscode-0.1.0
-cd ~/llvm-org.lldb-vscode-0.1.0/bin
-ln -s /path/to/a/built/lldb-vscode
+mkdir -p ~/llvm-org.lldb-dap-0.1.0/bin
+cp package.json ~/llvm-org.lldb-dap-0.1.0
+cd ~/llvm-org.lldb-dap-0.1.0/bin
+ln -s /path/to/a/built/lldb-dap
 ```
 
-This is handy if you want to debug and develope the `lldb-vscode` executable
+This is handy if you want to debug and develop the `lldb-dap` executable
 when adding features or fixing bugs.
 
 # Configurations
 
 Launching to attaching require you to create a [launch configuration](https://code.visualstudio.com/Docs/editor/debugging#_launch-configurations). This file
-defines arguments that get passed to `lldb-vscode` and the configuration settings
+defines arguments that get passed to `lldb-dap` and the configuration settings
 control how the launch or attach happens.
 
 ## Launch Configuration Settings
 
 When you launch a program with Visual Studio Code you will need to create a [launch.json](https://code.visualstudio.com/Docs/editor/debugging#_launch-configurations)
-file that defines how your program will be run. The JSON configuration file can contain the following `lldb-vscode` specific launch key/value pairs:
+file that defines how your program will be run. The JSON configuration file can contain the following `lldb-dap` specific launch key/value pairs:
 
 |parameter          |type|req |         |
 |-------------------|----|:--:|---------|
 |**name**           |string|Y| A configuration name that will be displayed in the IDE.
-|**type**           |string|Y| Must be "lldb-vscode".
+|**type**           |string|Y| Must be "lldb-dap".
 |**request**        |string|Y| Must be "launch".
 |**program**        |string|Y| Path to the executable to launch.
 |**args**           |[string]|| An array of command line argument strings to be passed to the program being launched.
@@ -106,7 +107,7 @@ file that defines how your program will be run. The JSON configuration file can
 |**exitCommands**   |[string]| | LLDB commands executed when the program exits. Commands and command output will be sent to the debugger console when they are executed.
 |**terminateCommands** |[string]| | LLDB commands executed when the debugging session ends. Commands and command output will be sent to the debugger console when they are executed.
 |**sourceMap**      |[string[2]]| | Specify an array of path re-mappings. Each element in the array must be a two element array containing a source and destination pathname.
-|**debuggerRoot**   | string| |Specify a working directory to use when launching lldb-vscode. If the debug information in your executable contains relative paths, this option can be used so that `lldb-vscode` can find source files and object files that have relative paths.
+|**debuggerRoot**   | string| |Specify a working directory to use when launching lldb-dap. If the debug information in your executable contains relative paths, this option can be used so that `lldb-dap` can find source files and object files that have relative paths.
 
 ## Attaching Settings
 
@@ -116,12 +117,12 @@ When attaching to a process using LLDB you can attach in a few ways
 2. Attach to an existing process by name
 3. Attach by name by waiting for the next instance of a process to launch
 
-The JSON configuration file can contain the following `lldb-vscode` specific launch key/value pairs:
+The JSON configuration file can contain the following `lldb-dap` specific launch key/value pairs:
 
 |parameter          |type    |req |         |
 |-------------------|--------|:--:|---------|
 |**name**           |string  |Y| A configuration name that will be displayed in the IDE.
-|**type**           |string  |Y| Must be "lldb-vscode".
+|**type**           |string  |Y| Must be "lldb-dap".
 |**request**        |string  |Y| Must be "attach".
 |**program**        |string  | | Path to the executable to attach to. This value is optional but can help to resolve breakpoints prior the attaching to the program.
 |**pid**            |number  | | The process id of the process you wish to attach to. If **pid** is omitted, the debugger will attempt to attach to the program by finding a process whose file name matches the file name from **porgram**. Setting this value to `${command:pickMyProcess}` will allow interactive process selection in the IDE.
@@ -143,7 +144,7 @@ adds `FOO=1` and `bar` to the environment:
 
 ```javascript
 {
-  "type": "lldb-vscode",
+  "type": "lldb-dap",
   "request": "launch",
   "name": "Debug",
   "program": "/tmp/a.out",
@@ -158,7 +159,7 @@ This will attach to a process `a.out` whose process ID is 123:
 
 ```javascript
 {
-  "type": "lldb-vscode",
+  "type": "lldb-dap",
   "request": "attach",
   "name": "Attach to PID",
   "program": "/tmp/a.out",
@@ -175,7 +176,7 @@ above configuration:
 ```javascript
 {
   "name": "Attach to Name",
-  "type": "lldb-vscode",
+  "type": "lldb-dap",
   "request": "attach",
   "program": "/tmp/a.out",
 }
@@ -187,7 +188,7 @@ to be launched you can add the "waitFor" key value pair:
 ```javascript
 {
   "name": "Attach to Name (wait)",
-  "type": "lldb-vscode",
+  "type": "lldb-dap",
   "request": "attach",
   "program": "/tmp/a.out",
   "waitFor": true
@@ -205,7 +206,7 @@ This loads the coredump file `/cores/123.core` associated with the program
 ```javascript
 {
   "name": "Load coredump",
-  "type": "lldb-vscode",
+  "type": "lldb-dap",
   "request": "attach",
   "coreFile": "/cores/123.core",
   "program": "/tmp/a.out"
@@ -221,7 +222,7 @@ locally on port `2345`.
 ```javascript
 {
   "name": "Local Debug Server",
-  "type": "lldb-vscode",
+  "type": "lldb-dap",
   "request": "attach",
   "program": "/tmp/a.out",
   "attachCommands": ["gdb-remote 2345"],
@@ -237,7 +238,7 @@ port `5678` of that other machine.
 ```javascript
 {
   "name": "Remote Debug Server",
-  "type": "lldb-vscode",
+  "type": "lldb-dap",
   "request": "attach",
   "program": "/tmp/a.out",
   "attachCommands": ["gdb-remote hostname:5678"],
@@ -246,12 +247,12 @@ port `5678` of that other machine.
 
 # Custom debugger commands
 
-The `lldb-vscode` tool includes additional custom commands to support the Debug
+The `lldb-dap` tool includes additional custom commands to support the Debug
 Adapter Protocol features.
 
 ## startDebugging
 
-Using the command `lldb-vscode startDebugging` it is possible to trigger a
+Using the command `lldb-dap startDebugging` it is possible to trigger a
 reverse request to the client requesting a child debug session with the
 specified configuration. For example, this can be used to attached to forked or
 spawned processes. For more information see
@@ -260,7 +261,7 @@ spawned processes. For more information see
 The custom command has the following format:
 
 ```
-lldb-vscode startDebugging <launch|attach> <configuration>
+lldb-dap startDebugging <launch|attach> <configuration>
 ```
 
 This will launch a server and then request a child debug session for a client.
@@ -269,14 +270,14 @@ This will launch a server and then request a child debug session for a client.
 {
   "program": "server",
   "postRunCommand": [
-    "lldb-vscode startDebugging launch '{\"program\":\"client\"}'"
+    "lldb-dap startDebugging launch '{\"program\":\"client\"}'"
   ]
 }
 ```
 
 ## repl-mode
 
-Inspect or adjust the behavior of lldb-vscode repl evaluation requests. The
+Inspect or adjust the behavior of lldb-dap repl evaluation requests. The
 supported modes are `variable`, `command` and `auto`.
 
 - `variable` - Variable mode expressions are evaluated in the context of the
@@ -291,4 +292,4 @@ supported modes are `variable`, `command` and `auto`.
 
 The initial repl-mode can be configured with the cli flag `--repl-mode=<mode>`
 and may also be adjusted at runtime using the lldb command
-`lldb-vscode repl-mode <mode>`.
+`lldb-dap repl-mode <mode>`.
diff --git a/lldb/tools/lldb-vscode/RunInTerminal.cpp b/lldb/tools/lldb-dap/RunInTerminal.cpp
similarity index 98%
rename from lldb/tools/lldb-vscode/RunInTerminal.cpp
rename to lldb/tools/lldb-dap/RunInTerminal.cpp
index 25260afa64be373..ad019b8a56a4fa6 100644
--- a/lldb/tools/lldb-vscode/RunInTerminal.cpp
+++ b/lldb/tools/lldb-dap/RunInTerminal.cpp
@@ -25,7 +25,7 @@
 
 using namespace llvm;
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 const RunInTerminalMessagePid *RunInTerminalMessage::GetAsPidMessage() const {
   return static_cast<const RunInTerminalMessagePid *>(this);
@@ -163,11 +163,11 @@ std::string RunInTerminalDebugAdapterCommChannel::GetLauncherError() {
 Expected<std::shared_ptr<FifoFile>> CreateRunInTerminalCommFile() {
   SmallString<256> comm_file;
   if (std::error_code EC = sys::fs::getPotentiallyUniqueTempFileName(
-          "lldb-vscode-run-in-terminal-comm", "", comm_file))
+          "lldb-dap-run-in-terminal-comm", "", comm_file))
     return createStringError(EC, "Error making unique file name for "
                                  "runInTerminal communication files");
 
   return CreateFifoFile(comm_file.str());
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/RunInTerminal.h b/lldb/tools/lldb-dap/RunInTerminal.h
similarity index 94%
rename from lldb/tools/lldb-vscode/RunInTerminal.h
rename to lldb/tools/lldb-dap/RunInTerminal.h
index cdccdd6bcf9b9a5..2fbe3acbb408427 100644
--- a/lldb/tools/lldb-vscode/RunInTerminal.h
+++ b/lldb/tools/lldb-dap/RunInTerminal.h
@@ -6,15 +6,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_RUNINTERMINAL_H
-#define LLDB_TOOLS_LLDB_VSCODE_RUNINTERMINAL_H
+#ifndef LLDB_TOOLS_LLDB_DAP_RUNINTERMINAL_H
+#define LLDB_TOOLS_LLDB_DAP_RUNINTERMINAL_H
 
 #include "FifoFiles.h"
 
 #include <future>
 #include <thread>
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 enum RunInTerminalMessageKind {
   eRunInTerminalMessageKindPID = 0,
@@ -124,6 +124,6 @@ class RunInTerminalDebugAdapterCommChannel {
 /// the runInTerminal launcher.
 llvm::Expected<std::shared_ptr<FifoFile>> CreateRunInTerminalCommFile();
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
-#endif // LLDB_TOOLS_LLDB_VSCODE_RUNINTERMINAL_H
+#endif // LLDB_TOOLS_LLDB_DAP_RUNINTERMINAL_H
diff --git a/lldb/tools/lldb-vscode/SourceBreakpoint.cpp b/lldb/tools/lldb-dap/SourceBreakpoint.cpp
similarity index 88%
rename from lldb/tools/lldb-vscode/SourceBreakpoint.cpp
rename to lldb/tools/lldb-dap/SourceBreakpoint.cpp
index 7c57bf7d7c4f534..3bd83c0a6874ded 100644
--- a/lldb/tools/lldb-vscode/SourceBreakpoint.cpp
+++ b/lldb/tools/lldb-dap/SourceBreakpoint.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SourceBreakpoint.h"
-#include "VSCode.h"
+#include "DAP.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 SourceBreakpoint::SourceBreakpoint(const llvm::json::Object &obj)
     : BreakpointBase(obj), line(GetUnsigned(obj, "line", 0)),
@@ -17,7 +17,7 @@ SourceBreakpoint::SourceBreakpoint(const llvm::json::Object &obj)
 
 void SourceBreakpoint::SetBreakpoint(const llvm::StringRef source_path) {
   lldb::SBFileSpecList module_list;
-  bp = g_vsc.target.BreakpointCreateByLocation(source_path.str().c_str(), line,
+  bp = g_dap.target.BreakpointCreateByLocation(source_path.str().c_str(), line,
                                                column, 0, module_list);
   // See comments in BreakpointBase::GetBreakpointLabel() for details of why
   // we add a label to our breakpoints.
@@ -30,4 +30,4 @@ void SourceBreakpoint::SetBreakpoint(const llvm::StringRef source_path) {
     SetLogMessage();
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
diff --git a/lldb/tools/lldb-vscode/SourceBreakpoint.h b/lldb/tools/lldb-dap/SourceBreakpoint.h
similarity index 87%
rename from lldb/tools/lldb-vscode/SourceBreakpoint.h
rename to lldb/tools/lldb-dap/SourceBreakpoint.h
index 2891d57ee8ad01c..f4b54a44fc68756 100644
--- a/lldb/tools/lldb-vscode/SourceBreakpoint.h
+++ b/lldb/tools/lldb-dap/SourceBreakpoint.h
@@ -6,13 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLDB_TOOLS_LLDB_VSCODE_SOURCEBREAKPOINT_H
-#define LLDB_TOOLS_LLDB_VSCODE_SOURCEBREAKPOINT_H
+#ifndef LLDB_TOOLS_LLDB_DAP_SOURCEBREAKPOINT_H
+#define LLDB_TOOLS_LLDB_DAP_SOURCEBREAKPOINT_H
 
 #include "BreakpointBase.h"
 #include "llvm/ADT/StringRef.h"
 
-namespace lldb_vscode {
+namespace lldb_dap {
 
 struct SourceBreakpoint : public BreakpointBase {
 
@@ -33,6 +33,6 @@ inline bool operator<(const SourceBreakpoint &lhs,
   return lhs.line < rhs.line;
 }
 
-} // namespace lldb_vscode
+} // namespace lldb_dap
 
 #endif
diff --git a/lldb/tools/lldb-vscode/lldb-vscode-Info.plist.in b/lldb/tools/lldb-dap/lldb-dap-Info.plist.in
similarity index 88%
rename from lldb/tools/lldb-vscode/lldb-vscode-Info.plist.in
rename to lldb/tools/lldb-dap/lldb-dap-Info.plist.in
index 2098e190d6ba7ab..7d01d3145d929b2 100644
--- a/lldb/tools/lldb-vscode/lldb-vscode-Info.plist.in
+++ b/lldb/tools/lldb-dap/lldb-dap-Info.plist.in
@@ -5,11 +5,11 @@
 	<key>CFBundleDevelopmentRegion</key>
 	<string>English</string>
 	<key>CFBundleIdentifier</key>
-	<string>com.apple.lldb-vscode</string>
+	<string>com.apple.lldb-dap</string>
 	<key>CFBundleInfoDictionaryVersion</key>
 	<string>6.0</string>
 	<key>CFBundleName</key>
-	<string>lldb-vscode</string>
+	<string>lldb-dap</string>
 	<key>CFBundleVersion</key>
 	<string>${LLDB_VERSION}</string>
 	<key>SecTaskAccess</key>
diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp
similarity index 88%
rename from lldb/tools/lldb-vscode/lldb-vscode.cpp
rename to lldb/tools/lldb-dap/lldb-dap.cpp
index 3904d430c49b4cd..d8d81647e81008a 100644
--- a/lldb/tools/lldb-vscode/lldb-vscode.cpp
+++ b/lldb/tools/lldb-dap/lldb-dap.cpp
@@ -1,4 +1,4 @@
-//===-- lldb-vscode.cpp -----------------------------------------*- C++ -*-===//
+//===-- lldb-dap.cpp -----------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "VSCode.h"
+#include "DAP.h"
 
 #include <cassert>
 #include <climits>
@@ -73,7 +73,7 @@
 typedef int socklen_t;
 #endif
 
-using namespace lldb_vscode;
+using namespace lldb_dap;
 
 namespace {
 using namespace llvm::opt;
@@ -109,11 +109,11 @@ enum LaunchMethod { Launch, Attach, AttachForSuspendedLaunch };
 lldb::SBValueList *GetTopLevelScope(int64_t variablesReference) {
   switch (variablesReference) {
   case VARREF_LOCALS:
-    return &g_vsc.variables.locals;
+    return &g_dap.variables.locals;
   case VARREF_GLOBALS:
-    return &g_vsc.variables.globals;
+    return &g_dap.variables.globals;
   case VARREF_REGS:
-    return &g_vsc.variables.registers;
+    return &g_dap.variables.registers;
   default:
     return nullptr;
   }
@@ -125,8 +125,8 @@ SOCKET AcceptConnection(int portno) {
   struct sockaddr_in serv_addr, cli_addr;
   SOCKET sockfd = socket(AF_INET, SOCK_STREAM, 0);
   if (sockfd < 0) {
-    if (g_vsc.log)
-      *g_vsc.log << "error: opening socket (" << strerror(errno) << ")"
+    if (g_dap.log)
+      *g_dap.log << "error: opening socket (" << strerror(errno) << ")"
                  << std::endl;
   } else {
     memset((char *)&serv_addr, 0, sizeof(serv_addr));
@@ -135,8 +135,8 @@ SOCKET AcceptConnection(int portno) {
     serv_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
     serv_addr.sin_port = htons(portno);
     if (bind(sockfd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0) {
-      if (g_vsc.log)
-        *g_vsc.log << "error: binding socket (" << strerror(errno) << ")"
+      if (g_dap.log)
+        *g_dap.log << "error: binding socket (" << strerror(errno) << ")"
                    << std::endl;
     } else {
       listen(sockfd, 5);
@@ -145,8 +145,8 @@ SOCKET AcceptConnection(int portno) {
           llvm::sys::RetryAfterSignal(static_cast<SOCKET>(-1), accept, sockfd,
                                       (struct sockaddr *)&cli_addr, &clilen);
       if (newsockfd < 0)
-        if (g_vsc.log)
-          *g_vsc.log << "error: accept (" << strerror(errno) << ")"
+        if (g_dap.log)
+          *g_dap.log << "error: accept (" << strerror(errno) << ")"
                      << std::endl;
     }
 #if defined(_WIN32)
@@ -176,7 +176,7 @@ void SendProcessExitedEvent(lldb::SBProcess &process) {
   llvm::json::Object body;
   body.try_emplace("exitCode", (int64_t)process.GetExitStatus());
   event.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(event)));
+  g_dap.SendJSON(llvm::json::Value(std::move(event)));
 }
 
 void SendThreadExitedEvent(lldb::tid_t tid) {
@@ -185,29 +185,29 @@ void SendThreadExitedEvent(lldb::tid_t tid) {
   body.try_emplace("reason", "exited");
   body.try_emplace("threadId", (int64_t)tid);
   event.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(event)));
+  g_dap.SendJSON(llvm::json::Value(std::move(event)));
 }
 
 // Send a "continued" event to indicate the process is in the running state.
 void SendContinuedEvent() {
-  lldb::SBProcess process = g_vsc.target.GetProcess();
+  lldb::SBProcess process = g_dap.target.GetProcess();
   if (!process.IsValid()) {
     return;
   }
 
   // If the focus thread is not set then we haven't reported any thread status
   // to the client, so nothing to report.
-  if (!g_vsc.configuration_done_sent ||
-      g_vsc.focus_tid == LLDB_INVALID_THREAD_ID) {
+  if (!g_dap.configuration_done_sent ||
+      g_dap.focus_tid == LLDB_INVALID_THREAD_ID) {
     return;
   }
 
   llvm::json::Object event(CreateEventObject("continued"));
   llvm::json::Object body;
-  body.try_emplace("threadId", (int64_t)g_vsc.focus_tid);
+  body.try_emplace("threadId", (int64_t)g_dap.focus_tid);
   body.try_emplace("allThreadsContinued", true);
   event.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(event)));
+  g_dap.SendJSON(llvm::json::Value(std::move(event)));
 }
 
 // Send a "terminated" event to indicate the process is done being
@@ -217,7 +217,7 @@ void SendTerminatedEvent() {
   // the threads executing EventThreadFunction and request_discontinue
   // respectively may call SendTerminatedEvent simultaneously. Without any
   // synchronization, the thread executing EventThreadFunction may set
-  // g_vsc.sent_terminated_event before the thread executing
+  // g_dap.sent_terminated_event before the thread executing
   // request_discontinue has had a chance to test it, in which case the latter
   // would move ahead to issue a response to the disconnect request. Said
   // response may get dispatched ahead of the terminated event compelling the
@@ -226,24 +226,24 @@ void SendTerminatedEvent() {
   // synchronize simultaneous calls to SendTerminatedEvent.
   static std::mutex mutex;
   std::lock_guard<std::mutex> locker(mutex);
-  if (!g_vsc.sent_terminated_event) {
-    g_vsc.sent_terminated_event = true;
-    g_vsc.RunTerminateCommands();
+  if (!g_dap.sent_terminated_event) {
+    g_dap.sent_terminated_event = true;
+    g_dap.RunTerminateCommands();
     // Send a "terminated" event
     llvm::json::Object event(CreateTerminatedEventObject());
-    g_vsc.SendJSON(llvm::json::Value(std::move(event)));
+    g_dap.SendJSON(llvm::json::Value(std::move(event)));
   }
 }
 
 // Send a thread stopped event for all threads as long as the process
 // is stopped.
 void SendThreadStoppedEvent() {
-  lldb::SBProcess process = g_vsc.target.GetProcess();
+  lldb::SBProcess process = g_dap.target.GetProcess();
   if (process.IsValid()) {
     auto state = process.GetState();
     if (state == lldb::eStateStopped) {
       llvm::DenseSet<lldb::tid_t> old_thread_ids;
-      old_thread_ids.swap(g_vsc.thread_ids);
+      old_thread_ids.swap(g_dap.thread_ids);
       uint32_t stop_id = process.GetStopID();
       const uint32_t num_threads = process.GetNumThreads();
 
@@ -259,10 +259,10 @@ void SendThreadStoppedEvent() {
         const lldb::tid_t tid = thread.GetThreadID();
         const bool has_reason = ThreadHasStopReason(thread);
         // If the focus thread doesn't have a stop reason, clear the thread ID
-        if (tid == g_vsc.focus_tid) {
+        if (tid == g_dap.focus_tid) {
           focus_thread_exists = true;
           if (!has_reason)
-            g_vsc.focus_tid = LLDB_INVALID_THREAD_ID;
+            g_dap.focus_tid = LLDB_INVALID_THREAD_ID;
         }
         if (has_reason) {
           ++num_threads_with_reason;
@@ -271,47 +271,47 @@ void SendThreadStoppedEvent() {
         }
       }
 
-      // We will have cleared g_vsc.focus_tid if the focus thread doesn't have
+      // We will have cleared g_dap.focus_tid if the focus thread doesn't have
       // a stop reason, so if it was cleared, or wasn't set, or doesn't exist,
       // then set the focus thread to the first thread with a stop reason.
-      if (!focus_thread_exists || g_vsc.focus_tid == LLDB_INVALID_THREAD_ID)
-        g_vsc.focus_tid = first_tid_with_reason;
+      if (!focus_thread_exists || g_dap.focus_tid == LLDB_INVALID_THREAD_ID)
+        g_dap.focus_tid = first_tid_with_reason;
 
       // If no threads stopped with a reason, then report the first one so
       // we at least let the UI know we stopped.
       if (num_threads_with_reason == 0) {
         lldb::SBThread thread = process.GetThreadAtIndex(0);
-        g_vsc.focus_tid = thread.GetThreadID();
-        g_vsc.SendJSON(CreateThreadStopped(thread, stop_id));
+        g_dap.focus_tid = thread.GetThreadID();
+        g_dap.SendJSON(CreateThreadStopped(thread, stop_id));
       } else {
         for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
           lldb::SBThread thread = process.GetThreadAtIndex(thread_idx);
-          g_vsc.thread_ids.insert(thread.GetThreadID());
+          g_dap.thread_ids.insert(thread.GetThreadID());
           if (ThreadHasStopReason(thread)) {
-            g_vsc.SendJSON(CreateThreadStopped(thread, stop_id));
+            g_dap.SendJSON(CreateThreadStopped(thread, stop_id));
           }
         }
       }
 
       for (auto tid : old_thread_ids) {
-        auto end = g_vsc.thread_ids.end();
-        auto pos = g_vsc.thread_ids.find(tid);
+        auto end = g_dap.thread_ids.end();
+        auto pos = g_dap.thread_ids.find(tid);
         if (pos == end)
           SendThreadExitedEvent(tid);
       }
     } else {
-      if (g_vsc.log)
-        *g_vsc.log << "error: SendThreadStoppedEvent() when process"
+      if (g_dap.log)
+        *g_dap.log << "error: SendThreadStoppedEvent() when process"
                       " isn't stopped ("
                    << lldb::SBDebugger::StateAsCString(state) << ')'
                    << std::endl;
     }
   } else {
-    if (g_vsc.log)
-      *g_vsc.log << "error: SendThreadStoppedEvent() invalid process"
+    if (g_dap.log)
+      *g_dap.log << "error: SendThreadStoppedEvent() invalid process"
                  << std::endl;
   }
-  g_vsc.RunStopCommands();
+  g_dap.RunStopCommands();
 }
 
 // "ProcessEvent": {
@@ -369,13 +369,13 @@ void SendThreadStoppedEvent() {
 //   ]
 // }
 void SendProcessEvent(LaunchMethod launch_method) {
-  lldb::SBFileSpec exe_fspec = g_vsc.target.GetExecutable();
+  lldb::SBFileSpec exe_fspec = g_dap.target.GetExecutable();
   char exe_path[PATH_MAX];
   exe_fspec.GetPath(exe_path, sizeof(exe_path));
   llvm::json::Object event(CreateEventObject("process"));
   llvm::json::Object body;
   EmplaceSafeString(body, "name", std::string(exe_path));
-  const auto pid = g_vsc.target.GetProcess().GetProcessID();
+  const auto pid = g_dap.target.GetProcess().GetProcessID();
   body.try_emplace("systemProcessId", (int64_t)pid);
   body.try_emplace("isLocalProcess", true);
   const char *startMethod = nullptr;
@@ -392,7 +392,7 @@ void SendProcessEvent(LaunchMethod launch_method) {
   }
   body.try_emplace("startMethod", startMethod);
   event.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(event)));
+  g_dap.SendJSON(llvm::json::Value(std::move(event)));
 }
 
 // Grab any STDOUT and STDERR from the process and send it up to VS Code
@@ -401,22 +401,22 @@ void SendStdOutStdErr(lldb::SBProcess &process) {
   char buffer[1024];
   size_t count;
   while ((count = process.GetSTDOUT(buffer, sizeof(buffer))) > 0)
-    g_vsc.SendOutput(OutputType::Stdout, llvm::StringRef(buffer, count));
+    g_dap.SendOutput(OutputType::Stdout, llvm::StringRef(buffer, count));
   while ((count = process.GetSTDERR(buffer, sizeof(buffer))) > 0)
-    g_vsc.SendOutput(OutputType::Stderr, llvm::StringRef(buffer, count));
+    g_dap.SendOutput(OutputType::Stderr, llvm::StringRef(buffer, count));
 }
 
 void ProgressEventThreadFunction() {
-  lldb::SBListener listener("lldb-vscode.progress.listener");
-  g_vsc.debugger.GetBroadcaster().AddListener(
+  lldb::SBListener listener("lldb-dap.progress.listener");
+  g_dap.debugger.GetBroadcaster().AddListener(
       listener, lldb::SBDebugger::eBroadcastBitProgress);
-  g_vsc.broadcaster.AddListener(listener, eBroadcastBitStopProgressThread);
+  g_dap.broadcaster.AddListener(listener, eBroadcastBitStopProgressThread);
   lldb::SBEvent event;
   bool done = false;
   while (!done) {
     if (listener.WaitForEvent(1, event)) {
       const auto event_mask = event.GetType();
-      if (event.BroadcasterMatchesRef(g_vsc.broadcaster)) {
+      if (event.BroadcasterMatchesRef(g_dap.broadcaster)) {
         if (event_mask & eBroadcastBitStopProgressThread) {
           done = true;
         }
@@ -428,7 +428,7 @@ void ProgressEventThreadFunction() {
         const char *message = lldb::SBDebugger::GetProgressFromEvent(
             event, progress_id, completed, total, is_debugger_specific);
         if (message)
-          g_vsc.SendProgressEvent(progress_id, message, completed, total);
+          g_dap.SendProgressEvent(progress_id, message, completed, total);
       }
     }
   }
@@ -441,7 +441,7 @@ void ProgressEventThreadFunction() {
 // is required.
 void EventThreadFunction() {
   lldb::SBEvent event;
-  lldb::SBListener listener = g_vsc.debugger.GetListener();
+  lldb::SBListener listener = g_dap.debugger.GetListener();
   bool done = false;
   while (!done) {
     if (listener.WaitForEvent(1, event)) {
@@ -477,7 +477,7 @@ void EventThreadFunction() {
             // stop events which we do not want to send an event for. We will
             // manually send a stopped event in request_configurationDone(...)
             // so don't send any before then.
-            if (g_vsc.configuration_done_sent) {
+            if (g_dap.configuration_done_sent) {
               // Only report a stopped event if the process was not
               // automatically restarted.
               if (!lldb::SBProcess::GetRestartedFromEvent(event)) {
@@ -487,7 +487,7 @@ void EventThreadFunction() {
             }
             break;
           case lldb::eStateRunning:
-            g_vsc.WillContinue();
+            g_dap.WillContinue();
             SendContinuedEvent();
             break;
           case lldb::eStateExited:
@@ -495,12 +495,12 @@ void EventThreadFunction() {
             // just killed with the old PID, or even with no PID. In that case
             // we don't have to terminate the session.
             if (process.GetProcessID() == LLDB_INVALID_PROCESS_ID ||
-                process.GetProcessID() == g_vsc.restarting_process_id) {
-              g_vsc.restarting_process_id = LLDB_INVALID_PROCESS_ID;
+                process.GetProcessID() == g_dap.restarting_process_id) {
+              g_dap.restarting_process_id = LLDB_INVALID_PROCESS_ID;
             } else {
               // Run any exit LLDB commands the user specified in the
               // launch.json
-              g_vsc.RunExitCommands();
+              g_dap.RunExitCommands();
               SendProcessExitedEvent(process);
               SendTerminatedEvent();
               done = true;
@@ -537,10 +537,10 @@ void EventThreadFunction() {
             body.try_emplace("breakpoint", source_bp);
             body.try_emplace("reason", "changed");
             bp_event.try_emplace("body", std::move(body));
-            g_vsc.SendJSON(llvm::json::Value(std::move(bp_event)));
+            g_dap.SendJSON(llvm::json::Value(std::move(bp_event)));
           }
         }
-      } else if (event.BroadcasterMatchesRef(g_vsc.broadcaster)) {
+      } else if (event.BroadcasterMatchesRef(g_dap.broadcaster)) {
         if (event_mask & eBroadcastBitStopEventThread) {
           done = true;
         }
@@ -569,7 +569,7 @@ void SetSourceMapFromArguments(const llvm::json::Object &arguments) {
       if (mapping == nullptr || mapping->size() != 2 ||
           (*mapping)[0].kind() != llvm::json::Value::String ||
           (*mapping)[1].kind() != llvm::json::Value::String) {
-        g_vsc.SendOutput(OutputType::Console, llvm::StringRef(sourceMapHelp));
+        g_dap.SendOutput(OutputType::Console, llvm::StringRef(sourceMapHelp));
         return;
       }
       auto mapFrom = GetAsString((*mapping)[0]);
@@ -578,7 +578,7 @@ void SetSourceMapFromArguments(const llvm::json::Object &arguments) {
     }
   } else {
     if (ObjectContainsKey(arguments, "sourceMap")) {
-      g_vsc.SendOutput(OutputType::Console, llvm::StringRef(sourceMapHelp));
+      g_dap.SendOutput(OutputType::Console, llvm::StringRef(sourceMapHelp));
       return;
     }
     if (sourcePath.empty())
@@ -588,7 +588,7 @@ void SetSourceMapFromArguments(const llvm::json::Object &arguments) {
   }
   strm.flush();
   if (!sourceMapCommand.empty()) {
-    g_vsc.RunLLDBCommands("Setting source map:", {sourceMapCommand});
+    g_dap.RunLLDBCommands("Setting source map:", {sourceMapCommand});
   }
 }
 
@@ -621,8 +621,8 @@ void SetSourceMapFromArguments(const llvm::json::Object &arguments) {
 //   }]
 // }
 void request_attach(const llvm::json::Object &request) {
-  g_vsc.is_attach = true;
-  g_vsc.last_launch_or_attach_request = request;
+  g_dap.is_attach = true;
+  g_dap.last_launch_or_attach_request = request;
   llvm::json::Object response;
   lldb::SBError error;
   FillResponse(request, response);
@@ -634,83 +634,83 @@ void request_attach(const llvm::json::Object &request) {
     attach_info.SetProcessID(pid);
   const auto wait_for = GetBoolean(arguments, "waitFor", false);
   attach_info.SetWaitForLaunch(wait_for, false /*async*/);
-  g_vsc.init_commands = GetStrings(arguments, "initCommands");
-  g_vsc.pre_run_commands = GetStrings(arguments, "preRunCommands");
-  g_vsc.stop_commands = GetStrings(arguments, "stopCommands");
-  g_vsc.exit_commands = GetStrings(arguments, "exitCommands");
-  g_vsc.terminate_commands = GetStrings(arguments, "terminateCommands");
+  g_dap.init_commands = GetStrings(arguments, "initCommands");
+  g_dap.pre_run_commands = GetStrings(arguments, "preRunCommands");
+  g_dap.stop_commands = GetStrings(arguments, "stopCommands");
+  g_dap.exit_commands = GetStrings(arguments, "exitCommands");
+  g_dap.terminate_commands = GetStrings(arguments, "terminateCommands");
   auto attachCommands = GetStrings(arguments, "attachCommands");
   llvm::StringRef core_file = GetString(arguments, "coreFile");
   const uint64_t timeout_seconds = GetUnsigned(arguments, "timeout", 30);
-  g_vsc.stop_at_entry =
+  g_dap.stop_at_entry =
       core_file.empty() ? GetBoolean(arguments, "stopOnEntry", false) : true;
   std::vector<std::string> postRunCommands =
       GetStrings(arguments, "postRunCommands");
   const llvm::StringRef debuggerRoot = GetString(arguments, "debuggerRoot");
-  g_vsc.enable_auto_variable_summaries =
+  g_dap.enable_auto_variable_summaries =
       GetBoolean(arguments, "enableAutoVariableSummaries", false);
-  g_vsc.enable_synthetic_child_debugging =
+  g_dap.enable_synthetic_child_debugging =
       GetBoolean(arguments, "enableSyntheticChildDebugging", false);
 
   // This is a hack for loading DWARF in .o files on Mac where the .o files
   // in the debug map of the main executable have relative paths which require
-  // the lldb-vscode binary to have its working directory set to that relative
+  // the lldb-dap binary to have its working directory set to that relative
   // root for the .o files in order to be able to load debug info.
   if (!debuggerRoot.empty())
     llvm::sys::fs::set_current_path(debuggerRoot);
 
   // Run any initialize LLDB commands the user specified in the launch.json
-  g_vsc.RunInitCommands();
+  g_dap.RunInitCommands();
 
   SetSourceMapFromArguments(*arguments);
 
   lldb::SBError status;
-  g_vsc.SetTarget(g_vsc.CreateTargetFromArguments(*arguments, status));
+  g_dap.SetTarget(g_dap.CreateTargetFromArguments(*arguments, status));
   if (status.Fail()) {
     response["success"] = llvm::json::Value(false);
     EmplaceSafeString(response, "message", status.GetCString());
-    g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+    g_dap.SendJSON(llvm::json::Value(std::move(response)));
     return;
   }
 
   // Run any pre run LLDB commands the user specified in the launch.json
-  g_vsc.RunPreRunCommands();
+  g_dap.RunPreRunCommands();
 
   if (pid == LLDB_INVALID_PROCESS_ID && wait_for) {
     char attach_msg[256];
     auto attach_msg_len = snprintf(attach_msg, sizeof(attach_msg),
                                    "Waiting to attach to \"%s\"...",
-                                   g_vsc.target.GetExecutable().GetFilename());
-    g_vsc.SendOutput(OutputType::Console,
+                                   g_dap.target.GetExecutable().GetFilename());
+    g_dap.SendOutput(OutputType::Console,
                      llvm::StringRef(attach_msg, attach_msg_len));
   }
   if (attachCommands.empty()) {
     // No "attachCommands", just attach normally.
     // Disable async events so the attach will be successful when we return from
     // the launch call and the launch will happen synchronously
-    g_vsc.debugger.SetAsync(false);
+    g_dap.debugger.SetAsync(false);
     if (core_file.empty())
-      g_vsc.target.Attach(attach_info, error);
+      g_dap.target.Attach(attach_info, error);
     else
-      g_vsc.target.LoadCore(core_file.data(), error);
+      g_dap.target.LoadCore(core_file.data(), error);
     // Reenable async events
-    g_vsc.debugger.SetAsync(true);
+    g_dap.debugger.SetAsync(true);
   } else {
     // We have "attachCommands" that are a set of commands that are expected
     // to execute the commands after which a process should be created. If there
     // is no valid process after running these commands, we have failed.
-    g_vsc.RunLLDBCommands("Running attachCommands:", attachCommands);
+    g_dap.RunLLDBCommands("Running attachCommands:", attachCommands);
     // The custom commands might have created a new target so we should use the
     // selected target after these commands are run.
-    g_vsc.target = g_vsc.debugger.GetSelectedTarget();
+    g_dap.target = g_dap.debugger.GetSelectedTarget();
 
     // Make sure the process is attached and stopped before proceeding as the
     // the launch commands are not run using the synchronous mode.
-    error = g_vsc.WaitForProcessToStop(timeout_seconds);
+    error = g_dap.WaitForProcessToStop(timeout_seconds);
   }
 
   if (error.Success() && core_file.empty()) {
-    auto attached_pid = g_vsc.target.GetProcess().GetProcessID();
+    auto attached_pid = g_dap.target.GetProcess().GetProcessID();
     if (attached_pid == LLDB_INVALID_PROCESS_ID) {
       if (attachCommands.empty())
         error.SetErrorString("failed to attach to a process");
@@ -723,13 +723,13 @@ void request_attach(const llvm::json::Object &request) {
     response["success"] = llvm::json::Value(false);
     EmplaceSafeString(response, "message", std::string(error.GetCString()));
   } else {
-    g_vsc.RunLLDBCommands("Running postRunCommands:", postRunCommands);
+    g_dap.RunLLDBCommands("Running postRunCommands:", postRunCommands);
   }
 
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
   if (error.Success()) {
     SendProcessEvent(Attach);
-    g_vsc.SendJSON(CreateEventObject("initialized"));
+    g_dap.SendJSON(CreateEventObject("initialized"));
   }
 }
 
@@ -790,12 +790,12 @@ void request_attach(const llvm::json::Object &request) {
 void request_continue(const llvm::json::Object &request) {
   llvm::json::Object response;
   FillResponse(request, response);
-  lldb::SBProcess process = g_vsc.target.GetProcess();
+  lldb::SBProcess process = g_dap.target.GetProcess();
   lldb::SBError error = process.Continue();
   llvm::json::Object body;
   body.try_emplace("allThreadsContinued", true);
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "ConfigurationDoneRequest": {
@@ -832,12 +832,12 @@ void request_continue(const llvm::json::Object &request) {
 void request_configurationDone(const llvm::json::Object &request) {
   llvm::json::Object response;
   FillResponse(request, response);
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
-  g_vsc.configuration_done_sent = true;
-  if (g_vsc.stop_at_entry)
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.configuration_done_sent = true;
+  if (g_dap.stop_at_entry)
     SendThreadStoppedEvent();
   else
-    g_vsc.target.GetProcess().Continue();
+    g_dap.target.GetProcess().Continue();
 }
 
 // "DisconnectRequest": {
@@ -889,10 +889,10 @@ void request_disconnect(const llvm::json::Object &request) {
   FillResponse(request, response);
   auto arguments = request.getObject("arguments");
 
-  bool defaultTerminateDebuggee = g_vsc.is_attach ? false : true;
+  bool defaultTerminateDebuggee = g_dap.is_attach ? false : true;
   bool terminateDebuggee =
       GetBoolean(arguments, "terminateDebuggee", defaultTerminateDebuggee);
-  lldb::SBProcess process = g_vsc.target.GetProcess();
+  lldb::SBProcess process = g_dap.target.GetProcess();
   auto state = process.GetState();
   switch (state) {
   case lldb::eStateInvalid:
@@ -908,22 +908,22 @@ void request_disconnect(const llvm::json::Object &request) {
   case lldb::eStateSuspended:
   case lldb::eStateStopped:
   case lldb::eStateRunning:
-    g_vsc.debugger.SetAsync(false);
+    g_dap.debugger.SetAsync(false);
     lldb::SBError error = terminateDebuggee ? process.Kill() : process.Detach();
     if (!error.Success())
       response.try_emplace("error", error.GetCString());
-    g_vsc.debugger.SetAsync(true);
+    g_dap.debugger.SetAsync(true);
     break;
   }
   SendTerminatedEvent();
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
-  if (g_vsc.event_thread.joinable()) {
-    g_vsc.broadcaster.BroadcastEventByType(eBroadcastBitStopEventThread);
-    g_vsc.event_thread.join();
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
+  if (g_dap.event_thread.joinable()) {
+    g_dap.broadcaster.BroadcastEventByType(eBroadcastBitStopEventThread);
+    g_dap.event_thread.join();
   }
-  if (g_vsc.progress_event_thread.joinable()) {
-    g_vsc.broadcaster.BroadcastEventByType(eBroadcastBitStopProgressThread);
-    g_vsc.progress_event_thread.join();
+  if (g_dap.progress_event_thread.joinable()) {
+    g_dap.broadcaster.BroadcastEventByType(eBroadcastBitStopProgressThread);
+    g_dap.progress_event_thread.join();
   }
 }
 
@@ -932,13 +932,13 @@ void request_exceptionInfo(const llvm::json::Object &request) {
   FillResponse(request, response);
   auto arguments = request.getObject("arguments");
   llvm::json::Object body;
-  lldb::SBThread thread = g_vsc.GetLLDBThread(*arguments);
+  lldb::SBThread thread = g_dap.GetLLDBThread(*arguments);
   if (thread.IsValid()) {
     auto stopReason = thread.GetStopReason();
     if (stopReason == lldb::eStopReasonSignal)
       body.try_emplace("exceptionId", "signal");
     else if (stopReason == lldb::eStopReasonBreakpoint) {
-      ExceptionBreakpoint *exc_bp = g_vsc.GetExceptionBPFromStopReason(thread);
+      ExceptionBreakpoint *exc_bp = g_dap.GetExceptionBPFromStopReason(thread);
       if (exc_bp) {
         EmplaceSafeString(body, "exceptionId", exc_bp->filter);
         EmplaceSafeString(body, "description", exc_bp->label);
@@ -963,7 +963,7 @@ void request_exceptionInfo(const llvm::json::Object &request) {
     response["success"] = llvm::json::Value(false);
   }
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "CompletionsRequest": {
@@ -1088,7 +1088,7 @@ void request_completions(const llvm::json::Object &request) {
   auto arguments = request.getObject("arguments");
 
   // If we have a frame, try to set the context for variable completions.
-  lldb::SBFrame frame = g_vsc.GetLLDBFrame(*arguments);
+  lldb::SBFrame frame = g_dap.GetLLDBFrame(*arguments);
   if (frame.IsValid()) {
     frame.GetThread().GetProcess().SetSelectedThread(frame.GetThread());
     frame.GetThread().SetSelectedFrame(frame.GetFrameID());
@@ -1107,7 +1107,7 @@ void request_completions(const llvm::json::Object &request) {
   }
   llvm::json::Array targets;
 
-  if (g_vsc.DetectExpressionContext(frame, text) ==
+  if (g_dap.DetectExpressionContext(frame, text) ==
       ExpressionContext::Variable) {
     char command[] = "expression -- ";
     text = command + text;
@@ -1116,7 +1116,7 @@ void request_completions(const llvm::json::Object &request) {
   lldb::SBStringList matches;
   lldb::SBStringList descriptions;
 
-  if (g_vsc.debugger.GetCommandInterpreter().HandleCompletionWithDescriptions(
+  if (g_dap.debugger.GetCommandInterpreter().HandleCompletionWithDescriptions(
           text.c_str(), offset, 0, 100, matches, descriptions)) {
     // The first element is the common substring after the cursor position for
     // all the matches. The rest of the elements are the matches so ignore the
@@ -1146,7 +1146,7 @@ void request_completions(const llvm::json::Object &request) {
 
   body.try_emplace("targets", std::move(targets));
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 //  "EvaluateRequest": {
@@ -1255,16 +1255,16 @@ void request_evaluate(const llvm::json::Object &request) {
   FillResponse(request, response);
   llvm::json::Object body;
   auto arguments = request.getObject("arguments");
-  lldb::SBFrame frame = g_vsc.GetLLDBFrame(*arguments);
+  lldb::SBFrame frame = g_dap.GetLLDBFrame(*arguments);
   std::string expression = GetString(arguments, "expression").str();
   llvm::StringRef context = GetString(arguments, "context");
 
-  if (context == "repl" && g_vsc.DetectExpressionContext(frame, expression) ==
+  if (context == "repl" && g_dap.DetectExpressionContext(frame, expression) ==
                                ExpressionContext::Command) {
     // If we're evaluating a command relative to the current frame, set the
     // focus_tid to the current frame for any thread related events.
     if (frame.IsValid()) {
-      g_vsc.focus_tid = frame.GetThread().GetThreadID();
+      g_dap.focus_tid = frame.GetThread().GetThreadID();
     }
     auto result = RunLLDBCommands(llvm::StringRef(), {std::string(expression)});
     EmplaceSafeString(body, "result", result);
@@ -1302,7 +1302,7 @@ void request_evaluate(const llvm::json::Object &request) {
       EmplaceSafeString(body, "type",
                         value_typename ? value_typename : NO_TYPENAME);
       if (value.MightHaveChildren()) {
-        auto variableReference = g_vsc.variables.InsertExpandableVariable(
+        auto variableReference = g_dap.variables.InsertExpandableVariable(
             value, /*is_permanent=*/context == "repl");
         body.try_emplace("variablesReference", variableReference);
       } else {
@@ -1311,7 +1311,7 @@ void request_evaluate(const llvm::json::Object &request) {
     }
   }
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "compileUnitsRequest": {
@@ -1361,9 +1361,9 @@ void request_compileUnits(const llvm::json::Object &request) {
   llvm::json::Array units;
   auto arguments = request.getObject("arguments");
   std::string module_id = std::string(GetString(arguments, "moduleId"));
-  int num_modules = g_vsc.target.GetNumModules();
+  int num_modules = g_dap.target.GetNumModules();
   for (int i = 0; i < num_modules; i++) {
-    auto curr_module = g_vsc.target.GetModuleAtIndex(i);
+    auto curr_module = g_dap.target.GetModuleAtIndex(i);
     if (module_id == curr_module.GetUUIDString()) {
       int num_units = curr_module.GetNumCompileUnits();
       for (int j = 0; j < num_units; j++) {
@@ -1375,7 +1375,7 @@ void request_compileUnits(const llvm::json::Object &request) {
     }
   }
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "modulesRequest": {
@@ -1409,15 +1409,15 @@ void request_modules(const llvm::json::Object &request) {
   FillResponse(request, response);
 
   llvm::json::Array modules;
-  for (size_t i = 0; i < g_vsc.target.GetNumModules(); i++) {
-    lldb::SBModule module = g_vsc.target.GetModuleAtIndex(i);
+  for (size_t i = 0; i < g_dap.target.GetNumModules(); i++) {
+    lldb::SBModule module = g_dap.target.GetModuleAtIndex(i);
     modules.emplace_back(CreateModule(module));
   }
 
   llvm::json::Object body;
   body.try_emplace("modules", std::move(modules));
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "InitializeRequest": {
@@ -1498,7 +1498,7 @@ void request_modules(const llvm::json::Object &request) {
 // }
 void request_initialize(const llvm::json::Object &request) {
   auto log_cb = [](const char *buf, void *baton) -> void {
-    g_vsc.SendOutput(OutputType::Console, llvm::StringRef{buf});
+    g_dap.SendOutput(OutputType::Console, llvm::StringRef{buf});
   };
 
   auto arguments = request.getObject("arguments");
@@ -1507,25 +1507,24 @@ void request_initialize(const llvm::json::Object &request) {
   // which may affect the outcome of tests.
   bool source_init_file = GetBoolean(arguments, "sourceInitFile", true);
 
-  g_vsc.debugger =
-      lldb::SBDebugger::Create(source_init_file, log_cb, nullptr);
-  auto cmd = g_vsc.debugger.GetCommandInterpreter().AddMultiwordCommand(
-      "lldb-vscode", "Commands for managing lldb-vscode.");
+  g_dap.debugger = lldb::SBDebugger::Create(source_init_file, log_cb, nullptr);
+  auto cmd = g_dap.debugger.GetCommandInterpreter().AddMultiwordCommand(
+      "lldb-dap", "Commands for managing lldb-dap.");
   if (GetBoolean(arguments, "supportsStartDebuggingRequest", false)) {
     cmd.AddCommand(
-        "startDebugging", &g_vsc.start_debugging_request_handler,
+        "startDebugging", &g_dap.start_debugging_request_handler,
         "Sends a startDebugging request from the debug adapter to the client "
         "to start a child debug session of the same type as the caller.");
   }
   cmd.AddCommand(
-      "repl-mode", &g_vsc.repl_mode_request_handler,
-      "Get or set the repl behavior of vscode-lldb evaluation requests.");
+      "repl-mode", &g_dap.repl_mode_request_handler,
+      "Get or set the repl behavior of lldb-dap evaluation requests.");
 
-  g_vsc.progress_event_thread = std::thread(ProgressEventThreadFunction);
+  g_dap.progress_event_thread = std::thread(ProgressEventThreadFunction);
 
   // Start our event thread so we can receive events from the debugger, target,
   // process and more.
-  g_vsc.event_thread = std::thread(EventThreadFunction);
+  g_dap.event_thread = std::thread(EventThreadFunction);
 
   llvm::json::Object response;
   FillResponse(request, response);
@@ -1544,7 +1543,7 @@ void request_initialize(const llvm::json::Object &request) {
   body.try_emplace("supportsEvaluateForHovers", true);
   // Available filters or options for the setExceptionBreakpoints request.
   llvm::json::Array filters;
-  for (const auto &exc_bp : g_vsc.exception_breakpoints) {
+  for (const auto &exc_bp : g_dap.exception_breakpoints) {
     filters.emplace_back(CreateExceptionBreakpointFilter(exc_bp));
   }
   body.try_emplace("exceptionBreakpointFilters", std::move(filters));
@@ -1607,12 +1606,12 @@ void request_initialize(const llvm::json::Object &request) {
   body.try_emplace("supportsLogPoints", true);
 
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 llvm::Error request_runInTerminal(const llvm::json::Object &launch_request,
                                   const uint64_t timeout_seconds) {
-  g_vsc.is_attach = true;
+  g_dap.is_attach = true;
   lldb::SBAttachInfo attach_info;
 
   llvm::Expected<std::shared_ptr<FifoFile>> comm_file_or_err =
@@ -1628,8 +1627,8 @@ llvm::Error request_runInTerminal(const llvm::json::Object &launch_request,
   debugger_pid = getpid();
 #endif
   llvm::json::Object reverse_request = CreateRunInTerminalReverseRequest(
-      launch_request, g_vsc.debug_adaptor_path, comm_file.m_path, debugger_pid);
-  g_vsc.SendReverseRequest("runInTerminal", std::move(reverse_request),
+      launch_request, g_dap.debug_adaptor_path, comm_file.m_path, debugger_pid);
+  g_dap.SendReverseRequest("runInTerminal", std::move(reverse_request),
                            [](llvm::Expected<llvm::json::Value> value) {
                              if (!value) {
                                llvm::Error err = value.takeError();
@@ -1644,9 +1643,9 @@ llvm::Error request_runInTerminal(const llvm::json::Object &launch_request,
   else
     return pid.takeError();
 
-  g_vsc.debugger.SetAsync(false);
+  g_dap.debugger.SetAsync(false);
   lldb::SBError error;
-  g_vsc.target.Attach(attach_info, error);
+  g_dap.target.Attach(attach_info, error);
 
   if (error.Fail())
     return llvm::createStringError(llvm::inconvertibleErrorCode(),
@@ -1664,11 +1663,11 @@ llvm::Error request_runInTerminal(const llvm::json::Object &launch_request,
   // process right in the middle of the exec. To the user, what we are doing is
   // transparent, as they will only be able to see the process since the exec,
   // completely unaware of the preparatory work.
-  g_vsc.target.GetProcess().Continue();
+  g_dap.target.GetProcess().Continue();
 
   // Now that the actual target is just starting (i.e. exec was just invoked),
   // we return the debugger to its async state.
-  g_vsc.debugger.SetAsync(true);
+  g_dap.debugger.SetAsync(true);
 
   // If sending the notification failed, the launcher should be dead by now and
   // the async didAttach notification should have an error message, so we
@@ -1691,7 +1690,7 @@ lldb::SBError LaunchProcess(const llvm::json::Object &request) {
   auto launchCommands = GetStrings(arguments, "launchCommands");
 
   // Instantiate a launch info instance for the target.
-  auto launch_info = g_vsc.target.GetLaunchInfo();
+  auto launch_info = g_dap.target.GetLaunchInfo();
 
   // Grab the current working directory if there is one and set it in the
   // launch info.
@@ -1730,21 +1729,21 @@ lldb::SBError LaunchProcess(const llvm::json::Object &request) {
   } else if (launchCommands.empty()) {
     // Disable async events so the launch will be successful when we return from
     // the launch call and the launch will happen synchronously
-    g_vsc.debugger.SetAsync(false);
-    g_vsc.target.Launch(launch_info, error);
-    g_vsc.debugger.SetAsync(true);
+    g_dap.debugger.SetAsync(false);
+    g_dap.target.Launch(launch_info, error);
+    g_dap.debugger.SetAsync(true);
   } else {
     // Set the launch info so that run commands can access the configured
     // launch details.
-    g_vsc.target.SetLaunchInfo(launch_info);
-    g_vsc.RunLLDBCommands("Running launchCommands:", launchCommands);
+    g_dap.target.SetLaunchInfo(launch_info);
+    g_dap.RunLLDBCommands("Running launchCommands:", launchCommands);
     // The custom commands might have created a new target so we should use the
     // selected target after these commands are run.
-    g_vsc.target = g_vsc.debugger.GetSelectedTarget();
+    g_dap.target = g_dap.debugger.GetSelectedTarget();
     // Make sure the process is launched and stopped at the entry point before
     // proceeding as the launch commands are not run using the synchronous
     // mode.
-    error = g_vsc.WaitForProcessToStop(timeout_seconds);
+    error = g_dap.WaitForProcessToStop(timeout_seconds);
   }
   return error;
 }
@@ -1784,28 +1783,28 @@ lldb::SBError LaunchProcess(const llvm::json::Object &request) {
 //   }]
 // }
 void request_launch(const llvm::json::Object &request) {
-  g_vsc.is_attach = false;
-  g_vsc.last_launch_or_attach_request = request;
+  g_dap.is_attach = false;
+  g_dap.last_launch_or_attach_request = request;
   llvm::json::Object response;
   FillResponse(request, response);
   auto arguments = request.getObject("arguments");
-  g_vsc.init_commands = GetStrings(arguments, "initCommands");
-  g_vsc.pre_run_commands = GetStrings(arguments, "preRunCommands");
-  g_vsc.stop_commands = GetStrings(arguments, "stopCommands");
-  g_vsc.exit_commands = GetStrings(arguments, "exitCommands");
-  g_vsc.terminate_commands = GetStrings(arguments, "terminateCommands");
+  g_dap.init_commands = GetStrings(arguments, "initCommands");
+  g_dap.pre_run_commands = GetStrings(arguments, "preRunCommands");
+  g_dap.stop_commands = GetStrings(arguments, "stopCommands");
+  g_dap.exit_commands = GetStrings(arguments, "exitCommands");
+  g_dap.terminate_commands = GetStrings(arguments, "terminateCommands");
   std::vector<std::string> postRunCommands =
       GetStrings(arguments, "postRunCommands");
-  g_vsc.stop_at_entry = GetBoolean(arguments, "stopOnEntry", false);
+  g_dap.stop_at_entry = GetBoolean(arguments, "stopOnEntry", false);
   const llvm::StringRef debuggerRoot = GetString(arguments, "debuggerRoot");
-  g_vsc.enable_auto_variable_summaries =
+  g_dap.enable_auto_variable_summaries =
       GetBoolean(arguments, "enableAutoVariableSummaries", false);
-  g_vsc.enable_synthetic_child_debugging =
+  g_dap.enable_synthetic_child_debugging =
       GetBoolean(arguments, "enableSyntheticChildDebugging", false);
 
   // This is a hack for loading DWARF in .o files on Mac where the .o files
   // in the debug map of the main executable have relative paths which require
-  // the lldb-vscode binary to have its working directory set to that relative
+  // the lldb-dap binary to have its working directory set to that relative
   // root for the .o files in order to be able to load debug info.
   if (!debuggerRoot.empty())
     llvm::sys::fs::set_current_path(debuggerRoot);
@@ -1813,21 +1812,21 @@ void request_launch(const llvm::json::Object &request) {
   // Run any initialize LLDB commands the user specified in the launch.json.
   // This is run before target is created, so commands can't do anything with
   // the targets - preRunCommands are run with the target.
-  g_vsc.RunInitCommands();
+  g_dap.RunInitCommands();
 
   SetSourceMapFromArguments(*arguments);
 
   lldb::SBError status;
-  g_vsc.SetTarget(g_vsc.CreateTargetFromArguments(*arguments, status));
+  g_dap.SetTarget(g_dap.CreateTargetFromArguments(*arguments, status));
   if (status.Fail()) {
     response["success"] = llvm::json::Value(false);
     EmplaceSafeString(response, "message", status.GetCString());
-    g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+    g_dap.SendJSON(llvm::json::Value(std::move(response)));
     return;
   }
 
   // Run any pre run LLDB commands the user specified in the launch.json
-  g_vsc.RunPreRunCommands();
+  g_dap.RunPreRunCommands();
 
   status = LaunchProcess(request);
 
@@ -1835,18 +1834,18 @@ void request_launch(const llvm::json::Object &request) {
     response["success"] = llvm::json::Value(false);
     EmplaceSafeString(response, "message", std::string(status.GetCString()));
   } else {
-    g_vsc.RunLLDBCommands("Running postRunCommands:", postRunCommands);
+    g_dap.RunLLDBCommands("Running postRunCommands:", postRunCommands);
   }
 
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 
   if (!status.Fail()) {
-    if (g_vsc.is_attach)
-      SendProcessEvent(Attach);  // this happens when doing runInTerminal
+    if (g_dap.is_attach)
+      SendProcessEvent(Attach); // this happens when doing runInTerminal
     else
       SendProcessEvent(Launch);
   }
-  g_vsc.SendJSON(CreateEventObject("initialized"));
+  g_dap.SendJSON(CreateEventObject("initialized"));
 }
 
 // "NextRequest": {
@@ -1891,16 +1890,16 @@ void request_next(const llvm::json::Object &request) {
   llvm::json::Object response;
   FillResponse(request, response);
   auto arguments = request.getObject("arguments");
-  lldb::SBThread thread = g_vsc.GetLLDBThread(*arguments);
+  lldb::SBThread thread = g_dap.GetLLDBThread(*arguments);
   if (thread.IsValid()) {
     // Remember the thread ID that caused the resume so we can set the
     // "threadCausedFocus" boolean value in the "stopped" events.
-    g_vsc.focus_tid = thread.GetThreadID();
+    g_dap.focus_tid = thread.GetThreadID();
     thread.StepOver();
   } else {
     response["success"] = llvm::json::Value(false);
   }
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "PauseRequest": {
@@ -1942,12 +1941,11 @@ void request_next(const llvm::json::Object &request) {
 void request_pause(const llvm::json::Object &request) {
   llvm::json::Object response;
   FillResponse(request, response);
-  lldb::SBProcess process = g_vsc.target.GetProcess();
+  lldb::SBProcess process = g_dap.target.GetProcess();
   lldb::SBError error = process.Stop();
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
-
 // "RestartRequest": {
 //   "allOf": [ { "$ref": "#/definitions/Request" }, {
 //     "type": "object",
@@ -1992,11 +1990,11 @@ void request_pause(const llvm::json::Object &request) {
 void request_restart(const llvm::json::Object &request) {
   llvm::json::Object response;
   FillResponse(request, response);
-  if (!g_vsc.last_launch_or_attach_request) {
+  if (!g_dap.last_launch_or_attach_request) {
     response["success"] = llvm::json::Value(false);
     EmplaceSafeString(response, "message",
                       "Restart request received but no process was launched.");
-    g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+    g_dap.SendJSON(llvm::json::Value(std::move(response)));
     return;
   }
   // Check if we were in a "launch" session or an "attach" session.
@@ -2008,12 +2006,12 @@ void request_restart(const llvm::json::Object &request) {
   // Note that when using runInTerminal we're technically attached, but it's an
   // implementation detail. The adapter *did* launch the process in response to
   // a "launch" command, so we can still stop it and re-run it. This is why we
-  // don't just check `g_vsc.is_attach`.
-  if (GetString(*g_vsc.last_launch_or_attach_request, "command") == "attach") {
+  // don't just check `g_dap.is_attach`.
+  if (GetString(*g_dap.last_launch_or_attach_request, "command") == "attach") {
     response["success"] = llvm::json::Value(false);
     EmplaceSafeString(response, "message",
                       "Restarting an \"attach\" session is not supported.");
-    g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+    g_dap.SendJSON(llvm::json::Value(std::move(response)));
     return;
   }
 
@@ -2023,20 +2021,20 @@ void request_restart(const llvm::json::Object &request) {
   if (restart_arguments) {
     auto launch_request_arguments = restart_arguments->getObject("arguments");
     if (launch_request_arguments) {
-      (*g_vsc.last_launch_or_attach_request)["arguments"] =
+      (*g_dap.last_launch_or_attach_request)["arguments"] =
           llvm::json::Value(llvm::json::Object(*launch_request_arguments));
     }
   }
 
   // Keep track of the old PID so when we get a "process exited" event from the
   // killed process we can detect it and not shut down the whole session.
-  lldb::SBProcess process = g_vsc.target.GetProcess();
-  g_vsc.restarting_process_id = process.GetProcessID();
+  lldb::SBProcess process = g_dap.target.GetProcess();
+  g_dap.restarting_process_id = process.GetProcessID();
 
   // Stop the current process if necessary. The logic here is similar to
   // CommandObjectProcessLaunchOrAttach::StopProcessIfNecessary, except that
   // we don't ask the user for confirmation.
-  g_vsc.debugger.SetAsync(false);
+  g_dap.debugger.SetAsync(false);
   if (process.IsValid()) {
     lldb::StateType state = process.GetState();
     if (state != lldb::eStateConnected) {
@@ -2044,21 +2042,21 @@ void request_restart(const llvm::json::Object &request) {
     }
     // Clear the list of thread ids to avoid sending "thread exited" events
     // for threads of the process we are terminating.
-    g_vsc.thread_ids.clear();
+    g_dap.thread_ids.clear();
   }
-  g_vsc.debugger.SetAsync(true);
-  LaunchProcess(*g_vsc.last_launch_or_attach_request);
+  g_dap.debugger.SetAsync(true);
+  LaunchProcess(*g_dap.last_launch_or_attach_request);
 
   // This is normally done after receiving a "configuration done" request.
   // Because we're restarting, configuration has already happened so we can
   // continue the process right away.
-  if (g_vsc.stop_at_entry) {
+  if (g_dap.stop_at_entry) {
     SendThreadStoppedEvent();
   } else {
-    g_vsc.target.GetProcess().Continue();
+    g_dap.target.GetProcess().Continue();
   }
 
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "ScopesRequest": {
@@ -2117,7 +2115,7 @@ void request_scopes(const llvm::json::Object &request) {
   FillResponse(request, response);
   llvm::json::Object body;
   auto arguments = request.getObject("arguments");
-  lldb::SBFrame frame = g_vsc.GetLLDBFrame(*arguments);
+  lldb::SBFrame frame = g_dap.GetLLDBFrame(*arguments);
   // As the user selects different stack frames in the GUI, a "scopes" request
   // will be sent to the DAP. This is the only way we know that the user has
   // selected a frame in a thread. There are no other notifications that are
@@ -2137,18 +2135,18 @@ void request_scopes(const llvm::json::Object &request) {
     frame.GetThread().SetSelectedFrame(frame.GetFrameID());
   }
 
-  g_vsc.variables.locals = frame.GetVariables(/*arguments=*/true,
+  g_dap.variables.locals = frame.GetVariables(/*arguments=*/true,
                                               /*locals=*/true,
                                               /*statics=*/false,
                                               /*in_scope_only=*/true);
-  g_vsc.variables.globals = frame.GetVariables(/*arguments=*/false,
+  g_dap.variables.globals = frame.GetVariables(/*arguments=*/false,
                                                /*locals=*/false,
                                                /*statics=*/true,
                                                /*in_scope_only=*/true);
-  g_vsc.variables.registers = frame.GetRegisters();
-  body.try_emplace("scopes", g_vsc.CreateTopLevelScopes());
+  g_dap.variables.registers = frame.GetRegisters();
+  body.try_emplace("scopes", g_dap.CreateTopLevelScopes());
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "SetBreakpointsRequest": {
@@ -2283,8 +2281,8 @@ void request_setBreakpoints(const llvm::json::Object &request) {
         request_bps[src_bp.line] = src_bp;
 
         // We check if this breakpoint already exists to update it
-        auto existing_source_bps = g_vsc.source_breakpoints.find(path);
-        if (existing_source_bps != g_vsc.source_breakpoints.end()) {
+        auto existing_source_bps = g_dap.source_breakpoints.find(path);
+        if (existing_source_bps != g_dap.source_breakpoints.end()) {
           const auto &existing_bp =
               existing_source_bps->second.find(src_bp.line);
           if (existing_bp != existing_source_bps->second.end()) {
@@ -2295,8 +2293,8 @@ void request_setBreakpoints(const llvm::json::Object &request) {
           }
         }
         // At this point the breakpoint is new
-        g_vsc.source_breakpoints[path][src_bp.line] = src_bp;
-        SourceBreakpoint &new_bp = g_vsc.source_breakpoints[path][src_bp.line];
+        g_dap.source_breakpoints[path][src_bp.line] = src_bp;
+        SourceBreakpoint &new_bp = g_dap.source_breakpoints[path][src_bp.line];
         new_bp.SetBreakpoint(path.data());
         AppendBreakpoint(new_bp.bp, response_breakpoints, path, new_bp.line);
       }
@@ -2306,13 +2304,13 @@ void request_setBreakpoints(const llvm::json::Object &request) {
   // Delete any breakpoints in this source file that aren't in the
   // request_bps set. There is no call to remove breakpoints other than
   // calling this function with a smaller or empty "breakpoints" list.
-  auto old_src_bp_pos = g_vsc.source_breakpoints.find(path);
-  if (old_src_bp_pos != g_vsc.source_breakpoints.end()) {
+  auto old_src_bp_pos = g_dap.source_breakpoints.find(path);
+  if (old_src_bp_pos != g_dap.source_breakpoints.end()) {
     for (auto &old_bp : old_src_bp_pos->second) {
       auto request_pos = request_bps.find(old_bp.first);
       if (request_pos == request_bps.end()) {
         // This breakpoint no longer exists in this source file, delete it
-        g_vsc.target.BreakpointDelete(old_bp.second.bp.GetID());
+        g_dap.target.BreakpointDelete(old_bp.second.bp.GetID());
         old_src_bp_pos->second.erase(old_bp.first);
       }
     }
@@ -2321,7 +2319,7 @@ void request_setBreakpoints(const llvm::json::Object &request) {
   llvm::json::Object body;
   body.try_emplace("breakpoints", std::move(response_breakpoints));
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "SetExceptionBreakpointsRequest": {
@@ -2380,23 +2378,23 @@ void request_setExceptionBreakpoints(const llvm::json::Object &request) {
   // Keep a list of any exception breakpoint filter names that weren't set
   // so we can clear any exception breakpoints if needed.
   std::set<std::string> unset_filters;
-  for (const auto &bp : g_vsc.exception_breakpoints)
+  for (const auto &bp : g_dap.exception_breakpoints)
     unset_filters.insert(bp.filter);
 
   for (const auto &value : *filters) {
     const auto filter = GetAsString(value);
-    auto exc_bp = g_vsc.GetExceptionBreakpoint(std::string(filter));
+    auto exc_bp = g_dap.GetExceptionBreakpoint(std::string(filter));
     if (exc_bp) {
       exc_bp->SetBreakpoint();
       unset_filters.erase(std::string(filter));
     }
   }
   for (const auto &filter : unset_filters) {
-    auto exc_bp = g_vsc.GetExceptionBreakpoint(filter);
+    auto exc_bp = g_dap.GetExceptionBreakpoint(filter);
     if (exc_bp)
       exc_bp->ClearBreakpoint();
   }
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "SetFunctionBreakpointsRequest": {
@@ -2497,11 +2495,11 @@ void request_setFunctionBreakpoints(const llvm::json::Object &request) {
   // Disable any function breakpoints that aren't in the request_bps.
   // There is no call to remove function breakpoints other than calling this
   // function with a smaller or empty "breakpoints" list.
-  for (auto &pair : g_vsc.function_breakpoints) {
+  for (auto &pair : g_dap.function_breakpoints) {
     auto request_pos = request_bps.find(pair.first());
     if (request_pos == request_bps.end()) {
       // This function breakpoint no longer exists delete it from LLDB
-      g_vsc.target.BreakpointDelete(pair.second.bp.GetID());
+      g_dap.target.BreakpointDelete(pair.second.bp.GetID());
       remove_names.push_back(pair.first());
     } else {
       // Update the existing breakpoint as any setting withing the function
@@ -2516,14 +2514,14 @@ void request_setFunctionBreakpoints(const llvm::json::Object &request) {
   }
   // Remove any breakpoints that are no longer in our list
   for (const auto &name : remove_names)
-    g_vsc.function_breakpoints.erase(name);
+    g_dap.function_breakpoints.erase(name);
 
   // Any breakpoints that are left in "request_bps" are breakpoints that
   // need to be set.
   for (auto &pair : request_bps) {
     // Add this breakpoint info to the response
-    g_vsc.function_breakpoints[pair.first()] = std::move(pair.second);
-    FunctionBreakpoint &new_bp = g_vsc.function_breakpoints[pair.first()];
+    g_dap.function_breakpoints[pair.first()] = std::move(pair.second);
+    FunctionBreakpoint &new_bp = g_dap.function_breakpoints[pair.first()];
     new_bp.SetBreakpoint();
     AppendBreakpoint(new_bp.bp, response_breakpoints);
   }
@@ -2531,7 +2529,7 @@ void request_setFunctionBreakpoints(const llvm::json::Object &request) {
   llvm::json::Object body;
   body.try_emplace("breakpoints", std::move(response_breakpoints));
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "SourceRequest": {
@@ -2597,7 +2595,7 @@ void request_source(const llvm::json::Object &request) {
   FillResponse(request, response);
   llvm::json::Object body{{"content", ""}};
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "StackTraceRequest": {
@@ -2675,7 +2673,7 @@ void request_stackTrace(const llvm::json::Object &request) {
   FillResponse(request, response);
   lldb::SBError error;
   auto arguments = request.getObject("arguments");
-  lldb::SBThread thread = g_vsc.GetLLDBThread(*arguments);
+  lldb::SBThread thread = g_dap.GetLLDBThread(*arguments);
   llvm::json::Array stackFrames;
   llvm::json::Object body;
 
@@ -2753,7 +2751,7 @@ void request_stackTrace(const llvm::json::Object &request) {
   }
   body.try_emplace("stackFrames", std::move(stackFrames));
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "StepInRequest": {
@@ -2805,16 +2803,16 @@ void request_stepIn(const llvm::json::Object &request) {
   llvm::json::Object response;
   FillResponse(request, response);
   auto arguments = request.getObject("arguments");
-  lldb::SBThread thread = g_vsc.GetLLDBThread(*arguments);
+  lldb::SBThread thread = g_dap.GetLLDBThread(*arguments);
   if (thread.IsValid()) {
     // Remember the thread ID that caused the resume so we can set the
     // "threadCausedFocus" boolean value in the "stopped" events.
-    g_vsc.focus_tid = thread.GetThreadID();
+    g_dap.focus_tid = thread.GetThreadID();
     thread.StepInto();
   } else {
     response["success"] = llvm::json::Value(false);
   }
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "StepOutRequest": {
@@ -2857,16 +2855,16 @@ void request_stepOut(const llvm::json::Object &request) {
   llvm::json::Object response;
   FillResponse(request, response);
   auto arguments = request.getObject("arguments");
-  lldb::SBThread thread = g_vsc.GetLLDBThread(*arguments);
+  lldb::SBThread thread = g_dap.GetLLDBThread(*arguments);
   if (thread.IsValid()) {
     // Remember the thread ID that caused the resume so we can set the
     // "threadCausedFocus" boolean value in the "stopped" events.
-    g_vsc.focus_tid = thread.GetThreadID();
+    g_dap.focus_tid = thread.GetThreadID();
     thread.StepOut();
   } else {
     response["success"] = llvm::json::Value(false);
   }
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "ThreadsRequest": {
@@ -2906,7 +2904,7 @@ void request_stepOut(const llvm::json::Object &request) {
 // }
 void request_threads(const llvm::json::Object &request) {
 
-  lldb::SBProcess process = g_vsc.target.GetProcess();
+  lldb::SBProcess process = g_dap.target.GetProcess();
   llvm::json::Object response;
   FillResponse(request, response);
 
@@ -2922,7 +2920,7 @@ void request_threads(const llvm::json::Object &request) {
   llvm::json::Object body;
   body.try_emplace("threads", std::move(threads));
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "SetVariableRequest": {
@@ -3034,10 +3032,10 @@ void request_setVariable(const llvm::json::Object &request) {
   // only specifies the variable reference of the enclosing scope/variable, and
   // the name of the variable. We could have two shadowed variables with the
   // same name in "Locals" or "Globals". In our case the "id" absolute index
-  // of the variable within the g_vsc.variables list.
+  // of the variable within the g_dap.variables list.
   const auto id_value = GetUnsigned(arguments, "id", UINT64_MAX);
   if (id_value != UINT64_MAX) {
-    variable = g_vsc.variables.GetVariable(id_value);
+    variable = g_dap.variables.GetVariable(id_value);
   } else if (lldb::SBValueList *top_scope =
                  GetTopLevelScope(variablesReference)) {
     // variablesReference is one of our scopes, not an actual variable it is
@@ -3060,7 +3058,7 @@ void request_setVariable(const llvm::json::Object &request) {
 
     // We have a named item within an actual variable so we need to find it
     // withing the container variable by name.
-    lldb::SBValue container = g_vsc.variables.GetVariable(variablesReference);
+    lldb::SBValue container = g_dap.variables.GetVariable(variablesReference);
     variable = container.GetChildMemberWithName(name.data());
     if (!variable.IsValid()) {
       if (name.startswith("[")) {
@@ -3081,12 +3079,12 @@ void request_setVariable(const llvm::json::Object &request) {
       SetValueForKey(variable, body, "value");
       EmplaceSafeString(body, "type", variable.GetType().GetDisplayTypeName());
 
-      // We don't know the index of the variable in our g_vsc.variables
+      // We don't know the index of the variable in our g_dap.variables
       // so always insert a new one to get its variablesReference.
       // is_permanent is false because debug console does not support
       // setVariable request.
       if (variable.MightHaveChildren())
-        newVariablesReference = g_vsc.variables.InsertExpandableVariable(
+        newVariablesReference = g_dap.variables.InsertExpandableVariable(
             variable, /*is_permanent=*/false);
 
       body.try_emplace("variablesReference", newVariablesReference);
@@ -3099,7 +3097,7 @@ void request_setVariable(const llvm::json::Object &request) {
   }
 
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "VariablesRequest": {
@@ -3201,8 +3199,8 @@ void request_variables(const llvm::json::Object &request) {
       // and resolve what the pointer resolves to. Only change the format if the
       // format was set to the default format or if it was hex as some registers
       // have formats set for them.
-      const uint32_t addr_size = g_vsc.target.GetProcess().GetAddressByteSize();
-      lldb::SBValue reg_set = g_vsc.variables.registers.GetValueAtIndex(0);
+      const uint32_t addr_size = g_dap.target.GetProcess().GetAddressByteSize();
+      lldb::SBValue reg_set = g_dap.variables.registers.GetValueAtIndex(0);
       const uint32_t num_regs = reg_set.GetNumChildren();
       for (uint32_t reg_idx = 0; reg_idx < num_regs; ++reg_idx) {
         lldb::SBValue reg = reg_set.GetChildAtIndex(reg_idx);
@@ -3260,7 +3258,7 @@ void request_variables(const llvm::json::Object &request) {
 
       int64_t var_ref = 0;
       if (variable.MightHaveChildren() || variable.IsSynthetic()) {
-        var_ref = g_vsc.variables.InsertExpandableVariable(
+        var_ref = g_dap.variables.InsertExpandableVariable(
             variable, /*is_permanent=*/false);
       }
       variables.emplace_back(CreateVariable(
@@ -3270,7 +3268,7 @@ void request_variables(const llvm::json::Object &request) {
   } else {
     // We are expanding a variable that has children, so we will return its
     // children.
-    lldb::SBValue variable = g_vsc.variables.GetVariable(variablesReference);
+    lldb::SBValue variable = g_dap.variables.GetVariable(variablesReference);
     if (variable.IsValid()) {
       auto addChild = [&](lldb::SBValue child,
                           std::optional<std::string> custom_name = {}) {
@@ -3278,9 +3276,9 @@ void request_variables(const llvm::json::Object &request) {
           return;
         if (child.MightHaveChildren()) {
           auto is_permanent =
-              g_vsc.variables.IsPermanentVariableReference(variablesReference);
+              g_dap.variables.IsPermanentVariableReference(variablesReference);
           auto childVariablesReferences =
-              g_vsc.variables.InsertExpandableVariable(child, is_permanent);
+              g_dap.variables.InsertExpandableVariable(child, is_permanent);
           variables.emplace_back(CreateVariable(
               child, childVariablesReferences, childVariablesReferences, hex,
               /*is_name_duplicated=*/false, custom_name));
@@ -3300,7 +3298,7 @@ void request_variables(const llvm::json::Object &request) {
       // "[raw]" child that can be used to inspect the raw version of a
       // synthetic member. That eliminates the need for the user to go to the
       // debug console and type `frame var <variable> to get these values.
-      if (g_vsc.enable_synthetic_child_debugging && variable.IsSynthetic() &&
+      if (g_dap.enable_synthetic_child_debugging && variable.IsSynthetic() &&
           i == num_children)
         addChild(variable.GetNonSyntheticValue(), "[raw]");
     }
@@ -3308,7 +3306,7 @@ void request_variables(const llvm::json::Object &request) {
   llvm::json::Object body;
   body.try_emplace("variables", std::move(variables));
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 // "DisassembleRequest": {
@@ -3394,27 +3392,27 @@ void request_disassemble(const llvm::json::Object &request) {
     response["success"] = false;
     response["message"] =
         "Malformed memory reference: " + memoryReference.str();
-    g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+    g_dap.SendJSON(llvm::json::Value(std::move(response)));
     return;
   }
 
   addr_ptr += GetSigned(arguments, "instructionOffset", 0);
-  lldb::SBAddress addr(addr_ptr, g_vsc.target);
+  lldb::SBAddress addr(addr_ptr, g_dap.target);
   if (!addr.IsValid()) {
     response["success"] = false;
     response["message"] = "Memory reference not found in the current binary.";
-    g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+    g_dap.SendJSON(llvm::json::Value(std::move(response)));
     return;
   }
 
   const auto inst_count = GetUnsigned(arguments, "instructionCount", 0);
   lldb::SBInstructionList insts =
-      g_vsc.target.ReadInstructions(addr, inst_count);
+      g_dap.target.ReadInstructions(addr, inst_count);
 
   if (!insts.IsValid()) {
     response["success"] = false;
     response["message"] = "Failed to find instructions for memory address.";
-    g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+    g_dap.SendJSON(llvm::json::Value(std::move(response)));
     return;
   }
 
@@ -3424,11 +3422,11 @@ void request_disassemble(const llvm::json::Object &request) {
   for (size_t i = 0; i < num_insts; ++i) {
     lldb::SBInstruction inst = insts.GetInstructionAtIndex(i);
     auto addr = inst.GetAddress();
-    const auto inst_addr = addr.GetLoadAddress(g_vsc.target);
-    const char *m = inst.GetMnemonic(g_vsc.target);
-    const char *o = inst.GetOperands(g_vsc.target);
-    const char *c = inst.GetComment(g_vsc.target);
-    auto d = inst.GetData(g_vsc.target);
+    const auto inst_addr = addr.GetLoadAddress(g_dap.target);
+    const char *m = inst.GetMnemonic(g_dap.target);
+    const char *o = inst.GetOperands(g_dap.target);
+    const char *c = inst.GetComment(g_dap.target);
+    auto d = inst.GetData(g_dap.target);
 
     std::string bytes;
     llvm::raw_string_ostream sb(bytes);
@@ -3514,7 +3512,7 @@ void request_disassemble(const llvm::json::Object &request) {
   llvm::json::Object body;
   body.try_emplace("instructions", std::move(instructions));
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 // A request used in testing to get the details on all breakpoints that are
 // currently set in the target. This helps us to test "setBreakpoints" and
@@ -3524,48 +3522,48 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) {
   llvm::json::Object response;
   FillResponse(request, response);
   llvm::json::Array response_breakpoints;
-  for (uint32_t i = 0; g_vsc.target.GetBreakpointAtIndex(i).IsValid(); ++i) {
-    auto bp = g_vsc.target.GetBreakpointAtIndex(i);
+  for (uint32_t i = 0; g_dap.target.GetBreakpointAtIndex(i).IsValid(); ++i) {
+    auto bp = g_dap.target.GetBreakpointAtIndex(i);
     AppendBreakpoint(bp, response_breakpoints);
   }
   llvm::json::Object body;
   body.try_emplace("breakpoints", std::move(response_breakpoints));
   response.try_emplace("body", std::move(body));
-  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+  g_dap.SendJSON(llvm::json::Value(std::move(response)));
 }
 
 void RegisterRequestCallbacks() {
-  g_vsc.RegisterRequestCallback("attach", request_attach);
-  g_vsc.RegisterRequestCallback("completions", request_completions);
-  g_vsc.RegisterRequestCallback("continue", request_continue);
-  g_vsc.RegisterRequestCallback("configurationDone", request_configurationDone);
-  g_vsc.RegisterRequestCallback("disconnect", request_disconnect);
-  g_vsc.RegisterRequestCallback("evaluate", request_evaluate);
-  g_vsc.RegisterRequestCallback("exceptionInfo", request_exceptionInfo);
-  g_vsc.RegisterRequestCallback("initialize", request_initialize);
-  g_vsc.RegisterRequestCallback("launch", request_launch);
-  g_vsc.RegisterRequestCallback("next", request_next);
-  g_vsc.RegisterRequestCallback("pause", request_pause);
-  g_vsc.RegisterRequestCallback("restart", request_restart);
-  g_vsc.RegisterRequestCallback("scopes", request_scopes);
-  g_vsc.RegisterRequestCallback("setBreakpoints", request_setBreakpoints);
-  g_vsc.RegisterRequestCallback("setExceptionBreakpoints",
+  g_dap.RegisterRequestCallback("attach", request_attach);
+  g_dap.RegisterRequestCallback("completions", request_completions);
+  g_dap.RegisterRequestCallback("continue", request_continue);
+  g_dap.RegisterRequestCallback("configurationDone", request_configurationDone);
+  g_dap.RegisterRequestCallback("disconnect", request_disconnect);
+  g_dap.RegisterRequestCallback("evaluate", request_evaluate);
+  g_dap.RegisterRequestCallback("exceptionInfo", request_exceptionInfo);
+  g_dap.RegisterRequestCallback("initialize", request_initialize);
+  g_dap.RegisterRequestCallback("launch", request_launch);
+  g_dap.RegisterRequestCallback("next", request_next);
+  g_dap.RegisterRequestCallback("pause", request_pause);
+  g_dap.RegisterRequestCallback("restart", request_restart);
+  g_dap.RegisterRequestCallback("scopes", request_scopes);
+  g_dap.RegisterRequestCallback("setBreakpoints", request_setBreakpoints);
+  g_dap.RegisterRequestCallback("setExceptionBreakpoints",
                                 request_setExceptionBreakpoints);
-  g_vsc.RegisterRequestCallback("setFunctionBreakpoints",
+  g_dap.RegisterRequestCallback("setFunctionBreakpoints",
                                 request_setFunctionBreakpoints);
-  g_vsc.RegisterRequestCallback("setVariable", request_setVariable);
-  g_vsc.RegisterRequestCallback("source", request_source);
-  g_vsc.RegisterRequestCallback("stackTrace", request_stackTrace);
-  g_vsc.RegisterRequestCallback("stepIn", request_stepIn);
-  g_vsc.RegisterRequestCallback("stepOut", request_stepOut);
-  g_vsc.RegisterRequestCallback("threads", request_threads);
-  g_vsc.RegisterRequestCallback("variables", request_variables);
-  g_vsc.RegisterRequestCallback("disassemble", request_disassemble);
+  g_dap.RegisterRequestCallback("setVariable", request_setVariable);
+  g_dap.RegisterRequestCallback("source", request_source);
+  g_dap.RegisterRequestCallback("stackTrace", request_stackTrace);
+  g_dap.RegisterRequestCallback("stepIn", request_stepIn);
+  g_dap.RegisterRequestCallback("stepOut", request_stepOut);
+  g_dap.RegisterRequestCallback("threads", request_threads);
+  g_dap.RegisterRequestCallback("variables", request_variables);
+  g_dap.RegisterRequestCallback("disassemble", request_disassemble);
   // Custom requests
-  g_vsc.RegisterRequestCallback("compileUnits", request_compileUnits);
-  g_vsc.RegisterRequestCallback("modules", request_modules);
+  g_dap.RegisterRequestCallback("compileUnits", request_compileUnits);
+  g_dap.RegisterRequestCallback("modules", request_modules);
   // Testing requests
-  g_vsc.RegisterRequestCallback("_testGetTargetBreakpoints",
+  g_dap.RegisterRequestCallback("_testGetTargetBreakpoints",
                                 request__testGetTargetBreakpoints);
 }
 
@@ -3579,21 +3577,21 @@ static void printHelp(LLDBVSCodeOptTable &table, llvm::StringRef tool_name) {
 EXAMPLES:
   The debug adapter can be started in two modes.
 
-  Running lldb-vscode without any arguments will start communicating with the
-  parent over stdio. Passing a port number causes lldb-vscode to start listening
+  Running lldb-dap without any arguments will start communicating with the
+  parent over stdio. Passing a port number causes lldb-dap to start listening
   for connections on that port.
 
-    lldb-vscode -p <port>
+    lldb-dap -p <port>
 
   Passing --wait-for-debugger will pause the process at startup and wait for a
   debugger to attach to the process.
 
-    lldb-vscode -g
+    lldb-dap -g
   )___";
   llvm::outs() << examples;
 }
 
-// If --launch-target is provided, this instance of lldb-vscode becomes a
+// If --launch-target is provided, this instance of lldb-dap becomes a
 // runInTerminal launcher. It will ultimately launch the program specified in
 // the --launch-target argument, which is the original program the user wanted
 // to debug. This is done in such a way that the actual debug adaptor can
@@ -3638,7 +3636,7 @@ void LaunchRunInTerminalTarget(llvm::opt::Arg &target_arg,
   // using a signal to prevent being paused forever.
 
   // This env var should be used only for tests.
-  const char *timeout_env_var = getenv("LLDB_VSCODE_RIT_TIMEOUT_IN_MS");
+  const char *timeout_env_var = getenv("LLDB_DAP_RIT_TIMEOUT_IN_MS");
   int timeout_in_ms =
       timeout_env_var != nullptr ? atoi(timeout_env_var) : 20000;
   if (llvm::Error err = comm_channel.WaitUntilDebugAdaptorAttaches(
@@ -3676,26 +3674,26 @@ int SetupStdoutStderrRedirection() {
   int stdoutfd = fileno(stdout);
   int new_stdout_fd = dup(stdoutfd);
   auto output_callback_stderr = [](llvm::StringRef data) {
-    g_vsc.SendOutput(OutputType::Stderr, data);
+    g_dap.SendOutput(OutputType::Stderr, data);
   };
   auto output_callback_stdout = [](llvm::StringRef data) {
-    g_vsc.SendOutput(OutputType::Stdout, data);
+    g_dap.SendOutput(OutputType::Stdout, data);
   };
   if (llvm::Error err = RedirectFd(stdoutfd, output_callback_stdout)) {
     std::string error_message = llvm::toString(std::move(err));
-    if (g_vsc.log)
-      *g_vsc.log << error_message << std::endl;
+    if (g_dap.log)
+      *g_dap.log << error_message << std::endl;
     output_callback_stderr(error_message);
   }
   if (llvm::Error err = RedirectFd(fileno(stderr), output_callback_stderr)) {
     std::string error_message = llvm::toString(std::move(err));
-    if (g_vsc.log)
-      *g_vsc.log << error_message << std::endl;
+    if (g_dap.log)
+      *g_dap.log << error_message << std::endl;
     output_callback_stderr(error_message);
   }
 
   /// used only by TestVSCode_redirection_to_console.py
-  if (getenv("LLDB_VSCODE_TEST_STDOUT_STDERR_REDIRECTION") != nullptr)
+  if (getenv("LLDB_DAP_TEST_STDOUT_STDERR_REDIRECTION") != nullptr)
     redirection_test();
   return new_stdout_fd;
 }
@@ -3706,7 +3704,7 @@ int main(int argc, char *argv[]) {
 
   llvm::SmallString<256> program_path(argv[0]);
   llvm::sys::fs::make_absolute(program_path);
-  g_vsc.debug_adaptor_path = program_path.str().str();
+  g_dap.debug_adaptor_path = program_path.str().str();
 
   LLDBVSCodeOptTable T;
   unsigned MAI, MAC;
@@ -3722,11 +3720,11 @@ int main(int argc, char *argv[]) {
     llvm::opt::Arg *repl_mode = input_args.getLastArg(OPT_repl_mode);
     llvm::StringRef repl_mode_value = repl_mode->getValue();
     if (repl_mode_value == "auto") {
-      g_vsc.repl_mode = ReplMode::Auto;
+      g_dap.repl_mode = ReplMode::Auto;
     } else if (repl_mode_value == "variable") {
-      g_vsc.repl_mode = ReplMode::Variable;
+      g_dap.repl_mode = ReplMode::Variable;
     } else if (repl_mode_value == "command") {
-      g_vsc.repl_mode = ReplMode::Command;
+      g_dap.repl_mode = ReplMode::Command;
     } else {
       llvm::errs()
           << "'" << repl_mode_value
@@ -3742,7 +3740,8 @@ int main(int argc, char *argv[]) {
       if (debugger_pid) {
         llvm::StringRef debugger_pid_value = debugger_pid->getValue();
         if (debugger_pid_value.getAsInteger(10, pid)) {
-          llvm::errs() << "'" << debugger_pid_value << "' is not a valid "
+          llvm::errs() << "'" << debugger_pid_value
+                       << "' is not a valid "
                           "PID\n";
           return EXIT_FAILURE;
         }
@@ -3796,20 +3795,20 @@ int main(int argc, char *argv[]) {
     printf("Listening on port %i...\n", portno);
     SOCKET socket_fd = AcceptConnection(portno);
     if (socket_fd >= 0) {
-      g_vsc.input.descriptor = StreamDescriptor::from_socket(socket_fd, true);
-      g_vsc.output.descriptor = StreamDescriptor::from_socket(socket_fd, false);
+      g_dap.input.descriptor = StreamDescriptor::from_socket(socket_fd, true);
+      g_dap.output.descriptor = StreamDescriptor::from_socket(socket_fd, false);
     } else {
       return EXIT_FAILURE;
     }
   } else {
-    g_vsc.input.descriptor = StreamDescriptor::from_file(fileno(stdin), false);
-    g_vsc.output.descriptor = StreamDescriptor::from_file(new_stdout_fd, false);
+    g_dap.input.descriptor = StreamDescriptor::from_file(fileno(stdin), false);
+    g_dap.output.descriptor = StreamDescriptor::from_file(new_stdout_fd, false);
   }
 
   bool CleanExit = true;
-  if (auto Err = g_vsc.Loop()) {
-    if (g_vsc.log)
-      *g_vsc.log << "Transport Error: " << llvm::toString(std::move(Err))
+  if (auto Err = g_dap.Loop()) {
+    if (g_dap.log)
+      *g_dap.log << "Transport Error: " << llvm::toString(std::move(Err))
                  << "\n";
     CleanExit = false;
   }
diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-dap/package.json
similarity index 97%
rename from lldb/tools/lldb-vscode/package.json
rename to lldb/tools/lldb-dap/package.json
index 1b3d452f92ab86a..79954cde2bbc8f3 100644
--- a/lldb/tools/lldb-vscode/package.json
+++ b/lldb/tools/lldb-dap/package.json
@@ -1,5 +1,5 @@
 {
-	"name": "lldb-vscode",
+	"name": "lldb-dap",
 	"displayName": "LLDB VSCode",
 	"version": "0.1.0",
 	"publisher": "llvm",
@@ -103,7 +103,7 @@
 		],
 		"debuggers": [
 			{
-				"type": "lldb-vscode",
+				"type": "lldb-dap",
 				"label": "Native LLDB Debugger",
 				"enableBreakpointsFor": {
 					"languageIds": [
@@ -124,9 +124,9 @@
 						"swift"
 					]
 				},
-				"program": "./bin/lldb-vscode",
+				"program": "./bin/lldb-dap",
 				"windows": {
-					"program": "./bin/lldb-vscode.exe"
+					"program": "./bin/lldb-dap.exe"
 				},
 				"configurationAttributes": {
 					"launch": {
@@ -219,7 +219,7 @@
 							},
 							"launchCommands": {
 								"type": "array",
-								"description": "Custom commands that are executed instead of launching a process. A target will be created with the launch arguments prior to executing these commands. The commands may optionally create a new target and must perform a launch. A valid process must exist after these commands complete or the \"launch\" will fail. Launch the process with \"process launch -s\" to make the process to at the entry point since lldb-vscode will auto resume if necessary.",
+								"description": "Custom commands that are executed instead of launching a process. A target will be created with the launch arguments prior to executing these commands. The commands may optionally create a new target and must perform a launch. A valid process must exist after these commands complete or the \"launch\" will fail. Launch the process with \"process launch -s\" to make the process to at the entry point since lldb-dap will auto resume if necessary.",
 								"default": []
 							},
 							"stopCommands": {
@@ -345,7 +345,7 @@
 				},
 				"initialConfigurations": [
 					{
-						"type": "lldb-vscode",
+						"type": "lldb-dap",
 						"request": "launch",
 						"name": "Debug",
 						"program": "${workspaceRoot}/<your program>",
@@ -359,7 +359,7 @@
 						"label": "LLDB: Launch",
 						"description": "",
 						"body": {
-							"type": "lldb-vscode",
+							"type": "lldb-dap",
 							"request": "launch",
 							"name": "${2:Launch}",
 							"program": "^\"\\${workspaceRoot}/${1:<your program>}\"",
diff --git a/lldb/tools/lldb-vscode/syntaxes/arm.disasm b/lldb/tools/lldb-dap/syntaxes/arm.disasm
similarity index 100%
rename from lldb/tools/lldb-vscode/syntaxes/arm.disasm
rename to lldb/tools/lldb-dap/syntaxes/arm.disasm
diff --git a/lldb/tools/lldb-vscode/syntaxes/arm64.disasm b/lldb/tools/lldb-dap/syntaxes/arm64.disasm
similarity index 100%
rename from lldb/tools/lldb-vscode/syntaxes/arm64.disasm
rename to lldb/tools/lldb-dap/syntaxes/arm64.disasm
diff --git a/lldb/tools/lldb-vscode/syntaxes/disassembly.json b/lldb/tools/lldb-dap/syntaxes/disassembly.json
similarity index 100%
rename from lldb/tools/lldb-vscode/syntaxes/disassembly.json
rename to lldb/tools/lldb-dap/syntaxes/disassembly.json
diff --git a/lldb/tools/lldb-vscode/syntaxes/x86.disasm b/lldb/tools/lldb-dap/syntaxes/x86.disasm
similarity index 100%
rename from lldb/tools/lldb-vscode/syntaxes/x86.disasm
rename to lldb/tools/lldb-dap/syntaxes/x86.disasm
diff --git a/lldb/tools/lldb-vscode b/lldb/tools/lldb-vscode
new file mode 120000
index 000000000000000..46b40044086c9b6
--- /dev/null
+++ b/lldb/tools/lldb-vscode
@@ -0,0 +1 @@
+lldb-dap
\ No newline at end of file
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 922a4a45b537777..6830285483e28d9 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -205,6 +205,9 @@ Changes to LLDB
   for formatters to quickly find directly nested type when it's known
   where to search for it, avoiding more expensive global search via
   ``SBTarget::FindFirstType``.
+* ``lldb-vscode`` was renamed to ``lldb-dap`` and and its installation
+  instructions have been updated to reflect this. The underlying functionality
+  remains unchanged.
 
 Changes to Sanitizers
 ---------------------

>From fdfe0b095f629a59a361813231fb5a6de4ef7ef9 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Thu, 19 Oct 2023 12:55:41 -0400
Subject: [PATCH 23/76] Fix test clang/test/Driver/cl-offload.cu

Regression caused by e880e8aedbc17ba04c969c9426d1f2567af72e7b

Due to aux-target mismatch. Add -target option to fix aux-target.

https://lab.llvm.org/buildbot/#/builders/230/builds/20138
---
 clang/test/Driver/cl-offload.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu
index aa5c096338110b0..c3bc5a2c08275e0 100644
--- a/clang/test/Driver/cl-offload.cu
+++ b/clang/test/Driver/cl-offload.cu
@@ -1,9 +1,9 @@
-// RUN: %clang_cl -### --offload-arch=sm_35 -fgpu-rdc \
+// RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=sm_35 -fgpu-rdc \
 // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
 // RUN:   /Wall -x cuda %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CUDA
 
-// RUN: %clang_cl -### --offload-arch=gfx1010 -fgpu-rdc --hip-link \
+// RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=gfx1010 -fgpu-rdc --hip-link \
 // RUN:   --rocm-path=%S/Inputs/rocm /Wall -x hip %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=HIP
 

>From 2ae37be4b433632e46209aa04fcc857675783f81 Mon Sep 17 00:00:00 2001
From: bjacob <jacob.benoit.1 at gmail.com>
Date: Thu, 19 Oct 2023 13:13:27 -0400
Subject: [PATCH 24/76] Allow empty dimension arrays in
 `linalg::inferContractionDims` (#69496)

This function was returning failure when any of the intersection sets
was empty, but this is actually legitimate in "matrix times vector"
cases, where some of the operands have lower dimensionality, implying
unit-dimension semantics for the "missing" dimensions.

Example:

```mlir
func.func @transpose_extend_batch_matmul(
    %vec: tensor<32x128xi16>,
    %mat: tensor<11008x32x128xi4>) -> tensor<11008x32xi32> {
  %c0_i32 = arith.constant 0 : i32
  %cst_0 = arith.constant 0.000000e+00 : f32
  %0 = tensor.empty() : tensor<11008x32xi32>
  %1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<11008x32xi32>) -> tensor<11008x32xi32>
  %2 = tensor.empty() : tensor<11008xf32>
  %3 = linalg.fill ins(%cst_0 : f32) outs(%2 : tensor<11008xf32>) -> tensor<11008xf32>
  %batch_matmul_result = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d2)>,
                                                          affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
                                                          affine_map<(d0, d1, d2) -> (d0, d1)>],
                                         iterator_types = ["parallel", "parallel", "reduction"]}
                                         ins(%vec, %mat : tensor<32x128xi16>, tensor<11008x32x128xi4>)
                                         outs(%1 : tensor<11008x32xi32>) {
  ^bb0(%in: i16, %in_3: i4, %out: i32):
      %19 = arith.extsi %in : i16 to i32
      %20 = arith.extui %in_3 : i4 to i32
      %21 = arith.muli %19, %20 : i32
      %22 = arith.addi %21, %out : i32
      linalg.yield %22 : i32
  } -> tensor<11008x32xi32>
  return %batch_matmul_result : tensor<11008x32xi32>
}
```

Here, we were returning failure because `ac` is empty. With this PR, we
return this useful information:

```
batch: [ 1 ]
m: [ ]
n: [ 0 ]
k: [ 2 ]
```
---
 mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp     |  3 ---
 mlir/test/Dialect/Linalg/match-ops-interpreter.mlir | 13 +++++++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
index ea50e1232a4c74a..5fde8d71cac3e75 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -227,9 +227,6 @@ mlir::linalg::inferContractionDims(LinalgOp linalgOp) {
       linalgOp, linalgOp.getDpsInputOperand(1), red);
   llvm::set_intersect(ra, rb);
 
-  if (ac.empty() || bc.empty() || ra.empty())
-    return failure();
-
   // Return each set in sorted order.
   ContractionDimensions dimensions{
       SmallVector<unsigned, 2>(batches.begin(), batches.end()),
diff --git a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
index bad6893eaa99e1e..1da092ab42ad779 100644
--- a/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
+++ b/mlir/test/Dialect/Linalg/match-ops-interpreter.mlir
@@ -910,6 +910,19 @@ module attributes { transform.target_tag = "start_here" } {
     return %result : tensor<10x15xf64>
   }
 
+  func.func @vecmat_simple(%lhs: tensor<20xf32>, %rhs: tensor<20x15xf32>) -> tensor<15xf64> {
+    %cst = arith.constant 0.0 : f64
+    %empty = tensor.empty() : tensor<15xf64>
+    %fill = linalg.fill ins(%cst : f64) outs(%empty : tensor<15xf64>) -> tensor<15xf64>
+    // expected-remark @below {{contraction}}
+    // expected-remark @below {{batch dims}}
+    // expected-remark @below {{m dims}}
+    // expected-remark @below {{n dims 0}}
+    // expected-remark @below {{k dims 1}}
+    %result = linalg.vecmat ins(%lhs, %rhs: tensor<20xf32>, tensor<20x15xf32>) outs(%fill: tensor<15xf64>) -> tensor<15xf64>
+    return %result : tensor<15xf64>
+  }
+
   func.func @double_batch(%lhs: tensor<40x10x50x20xf32>, %rhs: tensor<40x20x50x15xf32>) -> tensor<40x10x50x15xf32> {
     %cst = arith.constant 0.0 : f32
     %empty = tensor.empty() : tensor<40x10x50x15xf32>

>From 60622511bb123deb42dae05c3ea5121010db49b4 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot at gmail.com>
Date: Thu, 19 Oct 2023 17:14:26 +0000
Subject: [PATCH 25/76] [gn build] Port

---
 llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn
index f6a22670fbaa4d4..a480fcefbc17de5 100644
--- a/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn
@@ -39,6 +39,7 @@ executable("lldb-vscode") {
 
   sources = [
     "BreakpointBase.cpp",
+    "DAP.cpp",
     "ExceptionBreakpoint.cpp",
     "FifoFiles.cpp",
     "FunctionBreakpoint.cpp",
@@ -50,6 +51,7 @@ executable("lldb-vscode") {
     "RunInTerminal.cpp",
     "SourceBreakpoint.cpp",
     "VSCode.cpp",
+    "lldb-dap.cpp",
     "lldb-vscode.cpp",
   ]
 }

>From 30520cdff606ea211caed1ee8cc76da14dbc4739 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot at gmail.com>
Date: Thu, 19 Oct 2023 17:14:27 +0000
Subject: [PATCH 26/76] [gn build] Port 01263c6c6fb4

---
 llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn
index a480fcefbc17de5..36da1ce8db2f027 100644
--- a/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn
@@ -50,8 +50,6 @@ executable("lldb-vscode") {
     "ProgressEvent.cpp",
     "RunInTerminal.cpp",
     "SourceBreakpoint.cpp",
-    "VSCode.cpp",
     "lldb-dap.cpp",
-    "lldb-vscode.cpp",
   ]
 }

>From 6b8a1425eafce38f2c2ba2269c473a12cb825cb2 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks at google.com>
Date: Thu, 19 Oct 2023 10:21:22 -0700
Subject: [PATCH 27/76] [docs][NewPM] Add comment about declaring analysis
 managers in the correct order

Otherwise you will likely get crashes.
---
 llvm/docs/NewPassManager.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/docs/NewPassManager.rst b/llvm/docs/NewPassManager.rst
index 7464110daa092ff..4554d800437548b 100644
--- a/llvm/docs/NewPassManager.rst
+++ b/llvm/docs/NewPassManager.rst
@@ -17,6 +17,8 @@ Just Tell Me How To Run The Default Optimization Pipeline With The New Pass Mana
 .. code-block:: c++
 
   // Create the analysis managers.
+  // These must be declared in this order so that they are destroyed in the
+  // correct order due to inter-analysis-manager references.
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
   CGSCCAnalysisManager CGAM;

>From bd21efe24c7670bc8d6f5c3bb92f12b0d1983e0b Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Thu, 19 Oct 2023 10:30:12 -0700
Subject: [PATCH 28/76] [lldb] Fix ASCII art in CommandObjectSource.h (NFC)

---
 lldb/source/Commands/CommandObjectSource.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/lldb/source/Commands/CommandObjectSource.h b/lldb/source/Commands/CommandObjectSource.h
index f2117bd6ca3f794..d552508f448ec4d 100644
--- a/lldb/source/Commands/CommandObjectSource.h
+++ b/lldb/source/Commands/CommandObjectSource.h
@@ -1,5 +1,4 @@
-//===-- CommandObjectSource.h.h -----------------------------------*- C++
-//-*-===//
+//===-- CommandObjectSource.h -----------------------------------*- C++-*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,8 +13,6 @@
 
 namespace lldb_private {
 
-// CommandObjectMultiwordSource
-
 class CommandObjectMultiwordSource : public CommandObjectMultiword {
 public:
   CommandObjectMultiwordSource(CommandInterpreter &interpreter);

>From 44d4b30ca30b11f29fdb7819e94dcc54ac8a979a Mon Sep 17 00:00:00 2001
From: Haowei <haowei at google.com>
Date: Thu, 19 Oct 2023 10:34:19 -0700
Subject: [PATCH 29/76] [unittest] Refactoring the gtest sharding option.
 (#69537)

This patch addresses the missed review comment from PR #67063. It
renames LIT flag "--disable-gtest-sharding" to "--no-gtest-sharding"
and corrects the code style issue.
---
 llvm/utils/lit/lit/LitConfig.py                |  4 ++--
 llvm/utils/lit/lit/cl_arguments.py             | 12 +++++++++---
 llvm/utils/lit/lit/formats/googletest.py       |  4 ++--
 llvm/utils/lit/lit/main.py                     |  2 +-
 llvm/utils/lit/tests/googletest-no-sharding.py |  2 +-
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/llvm/utils/lit/lit/LitConfig.py b/llvm/utils/lit/lit/LitConfig.py
index c7703f15f9e3f71..5dc712ae28370cc 100644
--- a/llvm/utils/lit/lit/LitConfig.py
+++ b/llvm/utils/lit/lit/LitConfig.py
@@ -37,7 +37,7 @@ def __init__(
         maxIndividualTestTime=0,
         parallelism_groups={},
         per_test_coverage=False,
-        disableGTestSharding=False,
+        gtest_sharding=True,
     ):
         # The name of the test runner.
         self.progname = progname
@@ -88,7 +88,7 @@ def __init__(
         self.maxIndividualTestTime = maxIndividualTestTime
         self.parallelism_groups = parallelism_groups
         self.per_test_coverage = per_test_coverage
-        self.disableGTestSharding = bool(disableGTestSharding)
+        self.gtest_sharding = bool(gtest_sharding)
 
     @property
     def maxIndividualTestTime(self):
diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py
index 7f12f833afe5995..ba3706659550b67 100644
--- a/llvm/utils/lit/lit/cl_arguments.py
+++ b/llvm/utils/lit/lit/cl_arguments.py
@@ -119,10 +119,16 @@ def parse_args():
 
     execution_group = parser.add_argument_group("Test Execution")
     execution_group.add_argument(
-        "--disable-gtest-sharding",
-        dest="disableGTestSharding",
-        help="Disable sharding for GoogleTest format",
+        "--gtest-sharding",
+        help="Enable sharding for GoogleTest format",
         action="store_true",
+        default=True,
+    )
+    execution_group.add_argument(
+        "--no-gtest-sharding",
+        dest="gtest_sharding",
+        help="Disable sharding for GoogleTest format",
+        action="store_false",
     )
     execution_group.add_argument(
         "--path",
diff --git a/llvm/utils/lit/lit/formats/googletest.py b/llvm/utils/lit/lit/formats/googletest.py
index 16f411b25607a99..8037094a910678a 100644
--- a/llvm/utils/lit/lit/formats/googletest.py
+++ b/llvm/utils/lit/lit/formats/googletest.py
@@ -68,7 +68,7 @@ def getTestsInDirectory(self, testSuite, path_in_suite, litConfig, localConfig):
                     self.seen_executables.add(execpath)
                 num_tests = self.get_num_tests(execpath, litConfig, localConfig)
                 if num_tests is not None:
-                    if not litConfig.disableGTestSharding:
+                    if litConfig.gtest_sharding:
                         # Compute the number of shards.
                         shard_size = init_shard_size
                         nshard = int(math.ceil(num_tests / shard_size))
@@ -151,7 +151,7 @@ def execute(self, test, litConfig):
             "GTEST_OUTPUT": "json:" + test.gtest_json_file,
             "GTEST_SHUFFLE": "1" if use_shuffle else "0",
         }
-        if not litConfig.disableGTestSharding:
+        if litConfig.gtest_sharding:
             testPath, testName = os.path.split(test.getSourcePath())
             while not os.path.exists(testPath):
                 # Handle GTest parameterized and typed tests, whose name includes
diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py
index 4580dbc96667948..1d0d6bb2682993d 100755
--- a/llvm/utils/lit/lit/main.py
+++ b/llvm/utils/lit/lit/main.py
@@ -41,7 +41,7 @@ def main(builtin_params={}):
         params=params,
         config_prefix=opts.configPrefix,
         per_test_coverage=opts.per_test_coverage,
-        disableGTestSharding=opts.disableGTestSharding,
+        gtest_sharding=opts.gtest_sharding,
     )
 
     discovered_tests = lit.discovery.find_tests_for_inputs(
diff --git a/llvm/utils/lit/tests/googletest-no-sharding.py b/llvm/utils/lit/tests/googletest-no-sharding.py
index ccf2fe0d9d31d32..bb008effb831533 100644
--- a/llvm/utils/lit/tests/googletest-no-sharding.py
+++ b/llvm/utils/lit/tests/googletest-no-sharding.py
@@ -1,6 +1,6 @@
 # Check the various features of the GoogleTest format.
 
-# RUN: not %{lit} -v --disable-gtest-sharding --order=random %{inputs}/googletest-no-sharding > %t.out
+# RUN: not %{lit} -v --no-gtest-sharding --order=random %{inputs}/googletest-no-sharding > %t.out
 # FIXME: Temporarily dump test output so we can debug failing tests on
 # buildbots.
 # RUN: cat %t.out

>From 0af5c0668a1b93c7b3b34a1885b494c4ebb0b46f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 19 Oct 2023 18:35:27 +0100
Subject: [PATCH 30/76] [InstCombine] Don't consider aligned_alloc removable if
 icmp uses result (#69474)

At the moment, all alloc-like functions are assumed to return non-null
pointers, if their return value is only used in a compare. This is based
on being allowed to substitute the allocation function with one that
doesn't fail to allocate the required memory.

aligned_alloc however must also return null if the required alignment
cannot be satisfied, so I don't think the same reasoning as above can be
applied to it.

This patch adds a bail-out for aligned_alloc calls to
isAllocSiteRemovable.
---
 .../InstCombine/InstructionCombining.cpp      | 20 +++++++++++++++++++
 .../Transforms/InstCombine/malloc-free.ll     | 16 ++++++++++-----
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 8a6f66e36bd80e9..559eb2ef4795eb1 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2430,6 +2430,26 @@ static bool isAllocSiteRemovable(Instruction *AI,
         unsigned OtherIndex = (ICI->getOperand(0) == PI) ? 1 : 0;
         if (!isNeverEqualToUnescapedAlloc(ICI->getOperand(OtherIndex), TLI, AI))
           return false;
+
+        // Do not fold compares to aligned_alloc calls, as they may have to
+        // return null in case the required alignment cannot be satisfied,
+        // unless we can prove that both alignment and size are valid.
+        auto AlignmentAndSizeKnownValid = [](CallBase *CB) {
+          // Check if alignment and size of a call to aligned_alloc is valid,
+          // that is alignment is a power-of-2 and the size is a multiple of the
+          // alignment.
+          const APInt *Alignment;
+          const APInt *Size;
+          return match(CB->getArgOperand(0), m_APInt(Alignment)) &&
+                 match(CB->getArgOperand(1), m_APInt(Size)) &&
+                 Alignment->isPowerOf2() && Size->urem(*Alignment).isZero();
+        };
+        auto *CB = dyn_cast<CallBase>(AI);
+        LibFunc TheLibFunc;
+        if (CB && TLI.getLibFunc(*CB->getCalledFunction(), TheLibFunc) &&
+            TLI.has(TheLibFunc) && TheLibFunc == LibFunc_aligned_alloc &&
+            !AlignmentAndSizeKnownValid(CB))
+          return false;
         Users.emplace_back(I);
         continue;
       }
diff --git a/llvm/test/Transforms/InstCombine/malloc-free.ll b/llvm/test/Transforms/InstCombine/malloc-free.ll
index b77f70f239921e8..10725950a1c7373 100644
--- a/llvm/test/Transforms/InstCombine/malloc-free.ll
+++ b/llvm/test/Transforms/InstCombine/malloc-free.ll
@@ -26,9 +26,11 @@ define i32 @dead_aligned_alloc(i32 %size, i32 %alignment, i8 %value) {
   ret i32 0
 }
 
-define i1 @aligned_alloc_pointer_only_used_by_cmp(i32 %size, i32 %alignment, i8 %value) {
-; CHECK-LABEL: @aligned_alloc_pointer_only_used_by_cmp(
-; CHECK-NEXT:    ret i1 true
+define i1 @aligned_alloc_only_pointe(i32 %size, i32 %alignment, i8 %value) {
+; CHECK-LABEL: @aligned_alloc_only_pointe(
+; CHECK-NEXT:    [[ALIGNED_ALLOCATION:%.*]] = tail call ptr @aligned_alloc(i32 [[ALIGNMENT:%.*]], i32 [[SIZE:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[ALIGNED_ALLOCATION]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %aligned_allocation = tail call ptr @aligned_alloc(i32 %alignment, i32 %size)
   %cmp = icmp ne ptr %aligned_allocation, null
@@ -46,7 +48,9 @@ define i1 @aligned_alloc_pointer_only_used_by_cmp_alignment_and_value_known_ok(i
 
 define i1 @aligned_alloc_pointer_only_used_by_cmp_alignment_no_power_of_2(i32 %size, i32 %alignment, i8 %value) {
 ; CHECK-LABEL: @aligned_alloc_pointer_only_used_by_cmp_alignment_no_power_of_2(
-; CHECK-NEXT:    ret i1 true
+; CHECK-NEXT:    [[ALIGNED_ALLOCATION:%.*]] = tail call dereferenceable_or_null(32) ptr @aligned_alloc(i32 3, i32 32)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[ALIGNED_ALLOCATION]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %aligned_allocation = tail call ptr @aligned_alloc(i32 3, i32 32)
   %cmp = icmp ne ptr %aligned_allocation, null
@@ -55,7 +59,9 @@ define i1 @aligned_alloc_pointer_only_used_by_cmp_alignment_no_power_of_2(i32 %s
 
 define i1 @aligned_alloc_pointer_only_used_by_cmp_size_not_multiple_of_alignment(i32 %size, i32 %alignment, i8 %value) {
 ; CHECK-LABEL: @aligned_alloc_pointer_only_used_by_cmp_size_not_multiple_of_alignment(
-; CHECK-NEXT:    ret i1 true
+; CHECK-NEXT:    [[ALIGNED_ALLOCATION:%.*]] = tail call dereferenceable_or_null(31) ptr @aligned_alloc(i32 8, i32 31)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[ALIGNED_ALLOCATION]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %aligned_allocation = tail call ptr @aligned_alloc(i32 8, i32 31)
   %cmp = icmp ne ptr %aligned_allocation, null

>From f35053209990ec4a58c270dcfe60a9ee76113593 Mon Sep 17 00:00:00 2001
From: alfredfo <98554039+alfredfo at users.noreply.github.com>
Date: Thu, 19 Oct 2023 19:39:02 +0200
Subject: [PATCH 31/76] [libc] Fix accidental LIBC_NAMESPACE_clock_freq
 (#69620)

See-also: https://github.com/llvm/llvm-project/pull/69548
---
 libc/src/time/gpu/time_utils.cpp        | 2 +-
 libc/src/time/gpu/time_utils.h          | 4 ++--
 libc/startup/gpu/amdgpu/start.cpp       | 2 +-
 libc/test/UnitTest/LibcTest.cpp         | 4 ++--
 libc/utils/gpu/loader/amdgpu/Loader.cpp | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libc/src/time/gpu/time_utils.cpp b/libc/src/time/gpu/time_utils.cpp
index 935a5394e5fdbe0..300a72b102360ae 100644
--- a/libc/src/time/gpu/time_utils.cpp
+++ b/libc/src/time/gpu/time_utils.cpp
@@ -16,7 +16,7 @@ namespace LIBC_NAMESPACE {
 // TODO: Once we have another use-case for this we should put it in a common
 // device environment struct.
 extern "C" [[gnu::visibility("protected")]] uint64_t
-    [[clang::address_space(4)]] LIBC_NAMESPACE_clock_freq = clock_freq;
+    [[clang::address_space(4)]] __llvm_libc_clock_freq = clock_freq;
 #endif
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/time/gpu/time_utils.h b/libc/src/time/gpu/time_utils.h
index 075fcb61768ec7e..53548181e17e25c 100644
--- a/libc/src/time/gpu/time_utils.h
+++ b/libc/src/time/gpu/time_utils.h
@@ -39,8 +39,8 @@ constexpr uint64_t clock_freq = 0;
 // We provide an externally visible symbol such that the runtime can set this to
 // the correct value. If it is not set we try to default to the known values.
 extern "C" [[gnu::visibility("protected")]] uint64_t
-    [[clang::address_space(4)]] LIBC_NAMESPACE_clock_freq;
-#define GPU_CLOCKS_PER_SEC static_cast<clock_t>(LIBC_NAMESPACE_clock_freq)
+    [[clang::address_space(4)]] __llvm_libc_clock_freq;
+#define GPU_CLOCKS_PER_SEC static_cast<clock_t>(__llvm_libc_clock_freq)
 
 #elif defined(LIBC_TARGET_ARCH_IS_NVPTX)
 // NPVTX uses a single 1 GHz fixed frequency clock for all target architectures.
diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp
index 9e642dbe3fc7d30..89b0be208d2e7fd 100644
--- a/libc/startup/gpu/amdgpu/start.cpp
+++ b/libc/startup/gpu/amdgpu/start.cpp
@@ -19,7 +19,7 @@ namespace LIBC_NAMESPACE {
 // real time. However, the frequency of this clock varies between cards and can
 // only be obtained via the driver. The loader will set this so we can use it.
 extern "C" [[gnu::visibility("protected")]] uint64_t
-    [[clang::address_space(4)]] LIBC_NAMESPACE_clock_freq = 0;
+    [[clang::address_space(4)]] __llvm_libc_clock_freq = 0;
 
 extern "C" uintptr_t __init_array_start[];
 extern "C" uintptr_t __init_array_end[];
diff --git a/libc/test/UnitTest/LibcTest.cpp b/libc/test/UnitTest/LibcTest.cpp
index b01f40f0526461f..5c27e2e69bead95 100644
--- a/libc/test/UnitTest/LibcTest.cpp
+++ b/libc/test/UnitTest/LibcTest.cpp
@@ -23,8 +23,8 @@ static long clock() { return LIBC_NAMESPACE::gpu::fixed_frequency_clock(); }
 #else
 // The AMDGPU loader needs to initialize this at runtime by querying the driver.
 extern "C" [[gnu::visibility("protected")]] uint64_t
-    [[clang::address_space(4)]] LIBC_NAMESPACE_clock_freq;
-#define CLOCKS_PER_SEC LIBC_NAMESPACE_clock_freq
+    [[clang::address_space(4)]] __llvm_libc_clock_freq;
+#define CLOCKS_PER_SEC __llvm_libc_clock_freq
 #endif
 #else
 static long clock() { return 0; }
diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
index 1fb67e841cfec2e..1d0247a6dc5dca0 100644
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -477,7 +477,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
   // If the clock_freq symbol is missing, no work to do.
   hsa_executable_symbol_t freq_sym;
   if (HSA_STATUS_SUCCESS ==
-      hsa_executable_get_symbol_by_name(executable, "LIBC_NAMESPACE_clock_freq",
+      hsa_executable_get_symbol_by_name(executable, "__llvm_libc_clock_freq",
                                         &dev_agent, &freq_sym)) {
 
     void *host_clock_freq;

>From f7ab79f33ef5609a2d8519cbfc676842d617eeb3 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <rampitec at users.noreply.github.com>
Date: Thu, 19 Oct 2023 10:41:44 -0700
Subject: [PATCH 32/76] [AMDGPU] Allow lit() on operands which do not accept
 modifiers (#69527)

---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      | 38 +++++++++++++++----
 llvm/test/MC/AMDGPU/literals.s                | 10 +++++
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 9e143c77b606c34..fe2729f9790aa7c 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1577,9 +1577,11 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
   bool parseSP3NegModifier();
-  ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
+  ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
+                       bool HasLit = false);
   ParseStatus parseReg(OperandVector &Operands);
-  ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
+  ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
+                            bool HasLit = false);
   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
                                            bool AllowImm = true);
   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
@@ -2904,13 +2906,26 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
 }
 
 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
-                                      bool HasSP3AbsModifier) {
+                                      bool HasSP3AbsModifier, bool HasLit) {
   // TODO: add syntactic sugar for 1/(2*PI)
 
   if (isRegister())
     return ParseStatus::NoMatch;
   assert(!isModifier());
 
+  if (!HasLit) {
+    HasLit = trySkipId("lit");
+    if (HasLit) {
+      if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
+        return ParseStatus::Failure;
+      ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
+      if (S.isSuccess() &&
+          !skipToken(AsmToken::RParen, "expected closing parentheses"))
+        return ParseStatus::Failure;
+      return S;
+    }
+  }
+
   const auto& Tok = getToken();
   const auto& NextTok = peekToken();
   bool IsReal = Tok.is(AsmToken::Real);
@@ -2923,6 +2938,9 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
     Negate = true;
   }
 
+  AMDGPUOperand::Modifiers Mods;
+  Mods.Lit = HasLit;
+
   if (IsReal) {
     // Floating-point expressions are not supported.
     // Can only allow floating-point literals with an
@@ -2941,6 +2959,8 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
     Operands.push_back(
       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
                                AMDGPUOperand::ImmTyNone, true));
+    AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+    Op.setModifiers(Mods);
 
     return ParseStatus::Success;
 
@@ -2967,7 +2987,11 @@ ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
 
     if (Expr->evaluateAsAbsolute(IntVal)) {
       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
+      AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
+      Op.setModifiers(Mods);
     } else {
+      if (HasLit)
+        return ParseStatus::NoMatch;
       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
     }
 
@@ -2990,20 +3014,20 @@ ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
 }
 
 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
-                                           bool HasSP3AbsMod) {
+                                           bool HasSP3AbsMod, bool HasLit) {
   ParseStatus Res = parseReg(Operands);
   if (!Res.isNoMatch())
     return Res;
   if (isModifier())
     return ParseStatus::NoMatch;
-  return parseImm(Operands, HasSP3AbsMod);
+  return parseImm(Operands, HasSP3AbsMod, HasLit);
 }
 
 bool
 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
     const auto &str = Token.getString();
-    return str == "abs" || str == "neg" || str == "sext" || str == "lit";
+    return str == "abs" || str == "neg" || str == "sext";
   }
   return false;
 }
@@ -3123,7 +3147,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
 
   ParseStatus Res;
   if (AllowImm) {
-    Res = parseRegOrImm(Operands, SP3Abs);
+    Res = parseRegOrImm(Operands, SP3Abs, Lit);
   } else {
     Res = parseReg(Operands);
   }
diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s
index 910e3d82b2fc849..00575619c49f658 100644
--- a/llvm/test/MC/AMDGPU/literals.s
+++ b/llvm/test/MC/AMDGPU/literals.s
@@ -925,3 +925,13 @@ v_sqrt_f32 v2, lit(123.0
 // NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: expected immediate with lit modifier
 // NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: expected immediate with lit modifier
 v_sqrt_f32 v2, lit(v1)
+
+// Make sure lit() is accepted on operands without modifiers.
+
+// SICI: v_madak_f32 v4, 0x7e8, v8, 0x7e8        ; encoding: [0xff,0x10,0x08,0x42,0xe8,0x07,0x00,0x00]
+// GFX89: v_madak_f32 v4, 0x7e8, v8, 0x7e8        ; encoding: [0xff,0x10,0x08,0x30,0xe8,0x07,0x00,0x00]
+v_madak_f32 v4, lit(0x7e8), v8, lit(0x7e8)
+
+// NOSICI: :[[@LINE+2]]:{{[0-9]+}}: error: not a valid operand.
+// NOGFX89: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand.
+v_madak_f32 v4, lit(lit(0x7e8)), v8, lit(0x7e8)

>From b1a6b2cc409e5d54080638ee2df5d8fea698dee2 Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Thu, 19 Oct 2023 19:50:46 +0200
Subject: [PATCH 33/76] [AArch64][GlobalISel] Fix miscompile on carry-in
 selection (#68840)

Eliding the vReg to NZCV conversion instruction for G_UADDE/... is illegal if
it causes the carry generating instruction to become dead because ISel
will just remove the dead instruction.
I accidentally introduced this here: https://reviews.llvm.org/D153164.
As far as I can tell, this is not exposed on the default clang settings,
because on O0 there is always a G_AND between boolean defs and uses, so
the optimization doesn't apply. Thus, when I tried to commit
https://reviews.llvm.org/D159140, which removes these G_ANDs on O0, I
broke some UBSan tests.
We fix this by recursively selecting the previous (NZCV-setting) instruction before continuing selection for the current instruction.
---
 .../CodeGen/GlobalISel/MachineIRBuilder.h     |  3 ++
 .../GISel/AArch64InstructionSelector.cpp      | 20 +++++++++++-
 .../AArch64/GlobalISel/select-sadde.mir       | 31 +++++++++++++++++++
 .../AArch64/GlobalISel/select-ssube.mir       | 31 +++++++++++++++++++
 .../AArch64/GlobalISel/select-uadde.mir       | 31 +++++++++++++++++++
 .../AArch64/GlobalISel/select-usube.mir       | 31 +++++++++++++++++++
 6 files changed, 146 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index bf41c19cd6cc726..3409d441704ca17 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -298,6 +298,9 @@ class MachineIRBuilder {
   /// Getter for the State
   MachineIRBuilderState &getState() { return State; }
 
+  /// Setter for the State
+  void setState(const MachineIRBuilderState &NewState) { State = NewState; }
+
   /// Getter for the basic block we currently build.
   const MachineBasicBlock &getMBB() const {
     assert(State.MBB && "MachineBasicBlock is not set");
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 1c7a09696e853e2..f4ef6cffdfa4193 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -102,6 +102,11 @@ class AArch64InstructionSelector : public InstructionSelector {
   // An early selection function that runs before the selectImpl() call.
   bool earlySelect(MachineInstr &I);
 
+  /// Save state that is shared between select calls, call select on \p I and
+  /// then restore the saved state. This can be used to recursively call select
+  /// within a select call.
+  bool selectAndRestoreState(MachineInstr &I);
+
   // Do some preprocessing of G_PHIs before we begin selection.
   void processPHIs(MachineFunction &MF);
 
@@ -3552,6 +3557,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
   return false;
 }
 
+bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
+  MachineIRBuilderState OldMIBState = MIB.getState();
+  bool Success = select(I);
+  MIB.setState(OldMIBState);
+  return Success;
+}
+
 bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
                                                  MachineRegisterInfo &MRI) {
   Register VecReg = I.getOperand(1).getReg();
@@ -4749,11 +4761,17 @@ MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
   // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
   // generated during legalization of wide add/sub. This optimization depends on
   // these sequences not being interrupted by other instructions.
+  // We have to select the previous instruction before the carry-using
+  // instruction is deleted by the calling function, otherwise the previous
+  // instruction might become dead and would get deleted.
   MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
   if (SrcMI == I.getPrevNode()) {
     if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
       bool ProducesNegatedCarry = CarrySrcMI->isSub();
-      if (NeedsNegatedCarry == ProducesNegatedCarry && CarrySrcMI->isUnsigned())
+      if (NeedsNegatedCarry == ProducesNegatedCarry &&
+          CarrySrcMI->isUnsigned() &&
+          CarrySrcMI->getCarryOutReg() == CarryReg &&
+          selectAndRestoreState(*SrcMI))
         return nullptr;
     }
   }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir
index 85625ced4ba6922..e98ab4af570978d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir
@@ -175,3 +175,34 @@ body:             |
     $x2 = COPY %9(s64)
     RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
 ...
+...
+---
+name:            sadde_opt_prev_dead
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+    ; CHECK-LABEL: name: sadde_opt_prev_dead
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %4:gpr(s64), %5:gpr(s32) = G_UADDO %0, %2
+    %6:gpr(s64), %7:gpr(s32) = G_SADDE %1, %3, %5
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir
index 00bd26cc0220d2b..a9da51781efbe0e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir
@@ -175,3 +175,34 @@ body:             |
     $x2 = COPY %9(s64)
     RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
 ...
+...
+---
+name:            ssube_opt_prev_dead
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+    ; CHECK-LABEL: name: ssube_opt_prev_dead
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %4:gpr(s64), %5:gpr(s32) = G_USUBO %0, %2
+    %6:gpr(s64), %7:gpr(s32) = G_SSUBE %1, %3, %5
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir
index dc80d0c9abc252e..f6f9964e6babd34 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir
@@ -175,3 +175,34 @@ body:             |
     $x2 = COPY %9(s64)
     RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
 ...
+...
+---
+name:            uadde_opt_prev_dead
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+    ; CHECK-LABEL: name: uadde_opt_prev_dead
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %4:gpr(s64), %5:gpr(s32) = G_UADDO %0, %2
+    %6:gpr(s64), %7:gpr(s32) = G_UADDE %1, %3, %5
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir
index c532474fc67b4f7..fa8799653a570cc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir
@@ -175,3 +175,34 @@ body:             |
     $x2 = COPY %9(s64)
     RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
 ...
+...
+---
+name:            usube_opt_prev_dead
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+    ; CHECK-LABEL: name: usube_opt_prev_dead
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %4:gpr(s64), %5:gpr(s32) = G_USUBO %0, %2
+    %6:gpr(s64), %7:gpr(s32) = G_USUBE %1, %3, %5
+    $w0 = COPY %7(s32)
+    RET_ReallyLR implicit $w0
+...

>From 460e84398a19b8b662985e872a5e9762ca056a1b Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <137158460+wangpc-pp at users.noreply.github.com>
Date: Fri, 20 Oct 2023 01:52:42 +0800
Subject: [PATCH 34/76] [RISCV] Add getSameRatioLMUL (#69570)

To calculate the LMUL with the same SEW/LMUL ratio when providing
EEW.
---
 .../RISCV/MCTargetDesc/RISCVBaseInfo.cpp      |  9 +++++
 .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h |  2 ++
 llvm/unittests/Target/RISCV/CMakeLists.txt    |  1 +
 .../Target/RISCV/RISCVBaseInfoTest.cpp        | 34 +++++++++++++++++++
 4 files changed, 46 insertions(+)
 create mode 100644 llvm/unittests/Target/RISCV/RISCVBaseInfoTest.cpp

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index d71efc11e6a9fcf..7919189d198c8d1 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -206,6 +206,15 @@ unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
   return (SEW * 8) / LMul;
 }
 
+RISCVII::VLMUL RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL,
+                                            unsigned EEW) {
+  unsigned Ratio = RISCVVType::getSEWLMULRatio(SEW, VLMUL);
+  unsigned EMULFixedPoint = (EEW * 8) / Ratio;
+  bool Fractional = EMULFixedPoint < 8;
+  unsigned EMUL = Fractional ? 8 / EMULFixedPoint : EMULFixedPoint / 8;
+  return RISCVVType::encodeLMUL(EMUL, Fractional);
+}
+
 // Include the auto-generated portion of the compress emitter.
 #define GEN_UNCOMPRESS_INSTR
 #define GEN_COMPRESS_INSTR
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 20ff26a39dc3b30..e7181eadd497386 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -535,6 +535,8 @@ void printVType(unsigned VType, raw_ostream &OS);
 
 unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul);
 
+RISCVII::VLMUL getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL,
+                                unsigned EEW);
 } // namespace RISCVVType
 
 namespace RISCVRVC {
diff --git a/llvm/unittests/Target/RISCV/CMakeLists.txt b/llvm/unittests/Target/RISCV/CMakeLists.txt
index 2c757b82e5dce85..9d0bf7244c02210 100644
--- a/llvm/unittests/Target/RISCV/CMakeLists.txt
+++ b/llvm/unittests/Target/RISCV/CMakeLists.txt
@@ -13,6 +13,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_target_unittest(RISCVTests
   MCInstrAnalysisTest.cpp
+  RISCVBaseInfoTest.cpp
   )
 
 set_property(TARGET RISCVTests PROPERTY FOLDER "Tests/UnitTests/TargetTests")
diff --git a/llvm/unittests/Target/RISCV/RISCVBaseInfoTest.cpp b/llvm/unittests/Target/RISCV/RISCVBaseInfoTest.cpp
new file mode 100644
index 000000000000000..0e4c90caaaefd7d
--- /dev/null
+++ b/llvm/unittests/Target/RISCV/RISCVBaseInfoTest.cpp
@@ -0,0 +1,34 @@
+//===- RISCVBaseInfoTest.cpp - RISCVBaseInfo unit tests ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVBaseInfo.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+TEST(RISCVBaseInfo, CheckSameRatioLMUL) {
+  // Smaller LMUL.
+  EXPECT_EQ(RISCVII::LMUL_1,
+            RISCVVType::getSameRatioLMUL(16, RISCVII::LMUL_2, 8));
+  EXPECT_EQ(RISCVII::LMUL_F2,
+            RISCVVType::getSameRatioLMUL(16, RISCVII::LMUL_1, 8));
+  // Smaller fractional LMUL.
+  EXPECT_EQ(RISCVII::LMUL_F8,
+            RISCVVType::getSameRatioLMUL(16, RISCVII::LMUL_F4, 8));
+  // Bigger LMUL.
+  EXPECT_EQ(RISCVII::LMUL_2,
+            RISCVVType::getSameRatioLMUL(8, RISCVII::LMUL_1, 16));
+  EXPECT_EQ(RISCVII::LMUL_1,
+            RISCVVType::getSameRatioLMUL(8, RISCVII::LMUL_F2, 16));
+  // Bigger fractional LMUL.
+  EXPECT_EQ(RISCVII::LMUL_F2,
+            RISCVVType::getSameRatioLMUL(8, RISCVII::LMUL_F4, 16));
+}
+} // namespace

>From 3343d000a3511a4ea6fbd73bad86a3bbef8117e4 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Thu, 19 Oct 2023 10:58:52 -0700
Subject: [PATCH 35/76] [ELF][test] Demonstrate --no-allow-shlib-undefined
 behavior with a hidden relocatable object file definition

1981b1b6b92f7579a30c9ed32dbdf3bc749c1b40 improved the check.
---
 lld/test/ELF/allow-shlib-undefined.s | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/lld/test/ELF/allow-shlib-undefined.s b/lld/test/ELF/allow-shlib-undefined.s
index 515fa618e7beec6..03f047b02d75d52 100644
--- a/lld/test/ELF/allow-shlib-undefined.s
+++ b/lld/test/ELF/allow-shlib-undefined.s
@@ -37,6 +37,17 @@
 # RUN: not ld.lld %t.o %t.so %t2.so -o /dev/null 2>&1 | \
 # RUN:   FileCheck %s --check-prefixes=CHECK,CHECK2
 
+## Test some cases where relocatable object files provide a hidden definition.
+# RUN: echo '.globl _unresolved; _unresolved:' | llvm-mc -filetype=obj -triple=x86_64 -o %tdef.o
+# RUN: echo '.globl _unresolved; .hidden _unresolved; _unresolved:' | llvm-mc -filetype=obj -triple=x86_64 -o %tdef-hidden.o
+# RUN: ld.lld %t.o %t.so %tdef-hidden.o -o /dev/null 2>&1 | count 0
+
+## The section containing the definition is discarded, and we report an error.
+# RUN: not ld.lld --gc-sections %t.o %t.so %tdef-hidden.o -o /dev/null 2>&1 | FileCheck %s
+## The definition %tdef.so is ignored.
+# RUN: ld.lld -shared -soname=tdef.so %tdef.o -o %tdef.so
+# RUN: not ld.lld --gc-sections %t.o %t.so %tdef.so %tdef-hidden.o -o /dev/null 2>&1 | FileCheck %s
+
 .globl _start
 _start:
   callq _shared at PLT

>From 67770cbb9841e86357021e03753a1b8f4ce783ea Mon Sep 17 00:00:00 2001
From: Alfred Persson Forsberg <cat at catcream.org>
Date: Thu, 19 Oct 2023 19:50:44 +0200
Subject: [PATCH 36/76] [libc][NFC] Fix features.h.def file header

---
 libc/include/features.h.def | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/include/features.h.def b/libc/include/features.h.def
index a5d2be0a0692fe2..64205f57acb5d9b 100644
--- a/libc/include/features.h.def
+++ b/libc/include/features.h.def
@@ -1,4 +1,4 @@
-//===-- C standard library header features.h -------------------------------===//
+//===-- C standard library header features.h ------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

>From 5175cd777c57190ab9860c304796d386e4df9b8f Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane at nvidia.com>
Date: Thu, 19 Oct 2023 10:59:34 -0700
Subject: [PATCH 37/76] Disallow _BitInt as an underlying type for an
 enumeration

As mentioned in #69619, C23 6.7.2.2p5 explicitly prohibits using a
_BitInt as an underlying type to an enumeration. While we had this in
the _ExtInt implementation, the justification for that limitation in C
is compelling, so this is being removed to be compatible with the C23
standard.

Fixes: #69619
---
 clang/docs/ReleaseNotes.rst                   |  3 +++
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +-
 clang/lib/Sema/SemaDecl.cpp                   |  6 ++---
 clang/test/CodeGenCXX/ext-int.cpp             | 13 ----------
 clang/test/SemaCXX/ext-int.cpp                |  4 ++--
 clang/test/SemaCXX/type-traits.cpp            | 24 -------------------
 6 files changed, 8 insertions(+), 44 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index eee48431d716878..fc8caf9221b9d29 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -409,6 +409,9 @@ Bug Fixes in This Version
   operator in C. No longer issuing a confusing diagnostic along the lines of
   "incompatible operand types ('foo' and 'foo')" with extensions such as matrix
   types. Fixes (`#69008 <https://github.com/llvm/llvm-project/issues/69008>`_)
+- Clang no longer permits using the `_BitInt` types as an underlying type for an
+  enumeration as specified in the C23 Standard.
+  Fixes (`#69619 <https://github.com/llvm/llvm-project/issues/69619>`_)
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index a6b21f0af1c0642..6b499d112b79f48 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2613,7 +2613,7 @@ def err_final_function_overridden : Error<
 
 // C++11 scoped enumerations
 def err_enum_invalid_underlying : Error<
-  "non-integral type %0 is an invalid underlying type">;
+  "%select{non-integral type %0|%0}1 is an invalid underlying type">;
 def err_enumerator_too_large : Error<
   "enumerator value is not representable in the underlying type %0">;
 def ext_enumerator_too_large : Extension<
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index e3387b5b669c603..b363b0db79f164d 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -16722,10 +16722,8 @@ bool Sema::CheckEnumUnderlyingType(TypeSourceInfo *TI) {
     if (BT->isInteger())
       return false;
 
-  if (T->isBitIntType())
-    return false;
-
-  return Diag(UnderlyingLoc, diag::err_enum_invalid_underlying) << T;
+  return Diag(UnderlyingLoc, diag::err_enum_invalid_underlying)
+         << T << T->isBitIntType();
 }
 
 /// Check whether this is a valid redeclaration of a previous enumeration.
diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
index 7676dec791f3f62..5a4270aef28542e 100644
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -98,19 +98,6 @@ void BitfieldAssignment() {
   // CHECK: %[[SETC:.+]] = or i8 %[[CLEARC]], 64
 }
 
-enum AsEnumUnderlyingType : _BitInt(9) {
-  A,B,C
-};
-
-void UnderlyingTypeUsage(AsEnumUnderlyingType Param) {
-  // LIN: define{{.*}} void @_Z19UnderlyingTypeUsage20AsEnumUnderlyingType(i9 signext %
-  // WIN64: define dso_local void @"?UnderlyingTypeUsage@@YAXW4AsEnumUnderlyingType@@@Z"(i9 %
-  // WIN32: define dso_local void @"?UnderlyingTypeUsage@@YAXW4AsEnumUnderlyingType@@@Z"(i9 signext %
-  AsEnumUnderlyingType Var;
-  // CHECK: alloca i9, align 2
-  // CHECK: store i9 %{{.*}}, align 2
-}
-
 unsigned _BitInt(33) ManglingTestRetParam(unsigned _BitInt(33) Param) {
 // LIN64: define{{.*}} i64 @_Z20ManglingTestRetParamDU33_(i64 %
 // LIN32: define{{.*}} i33 @_Z20ManglingTestRetParamDU33_(i33 %
diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp
index 4dd7cd3b7b2b6e2..000b871ccd3433a 100644
--- a/clang/test/SemaCXX/ext-int.cpp
+++ b/clang/test/SemaCXX/ext-int.cpp
@@ -211,8 +211,8 @@ void ConstexprBitsize() {
   static_assert(is_same<decltype(F), _BitInt(42)>::value, "");
 }
 
-// Useable as an underlying type.
-enum AsEnumUnderlyingType : _BitInt(33) {
+// Not useable as an underlying type.
+enum AsEnumUnderlyingType : _BitInt(33) { // expected-error{{'_BitInt(33)' is an invalid underlying type}}
 };
 
 void overloaded(int);
diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp
index 275ddcbae73930d..c5d196a2590f8d7 100644
--- a/clang/test/SemaCXX/type-traits.cpp
+++ b/clang/test/SemaCXX/type-traits.cpp
@@ -4002,12 +4002,6 @@ enum class UnscopedInt128 : __int128 {};
 enum class ScopedInt128 : __int128 {};
 enum class UnscopedUInt128 : unsigned __int128 {};
 enum class ScopedUInt128 : unsigned __int128 {};
-enum UnscopedBit : unsigned _BitInt(1) {};
-enum ScopedBit : unsigned _BitInt(1) {};
-enum UnscopedIrregular : _BitInt(21) {};
-enum UnscopedUIrregular : unsigned _BitInt(21) {};
-enum class ScopedIrregular : _BitInt(21) {};
-enum class ScopedUIrregular : unsigned _BitInt(21) {};
 
 void make_signed() {
   check_make_signed<char, signed char>();
@@ -4050,11 +4044,6 @@ void make_signed() {
   check_make_signed<UnscopedUInt128, __int128>();
   check_make_signed<ScopedUInt128, __int128>();
 
-  check_make_signed<UnscopedIrregular, _BitInt(21)>();
-  check_make_signed<UnscopedUIrregular, _BitInt(21)>();
-  check_make_signed<ScopedIrregular, _BitInt(21)>();
-  check_make_signed<ScopedUIrregular, _BitInt(21)>();
-
   { using ExpectedError = __make_signed(bool); }
   // expected-error@*:*{{'make_signed' is only compatible with non-bool integers and enum types, but was given 'bool'}}
   { using ExpectedError = __make_signed(UnscopedBool); }
@@ -4063,10 +4052,6 @@ void make_signed() {
   // expected-error@*:*{{'make_signed' is only compatible with non-bool integers and enum types, but was given 'ScopedBool' whose underlying type is 'bool'}}
   { using ExpectedError = __make_signed(unsigned _BitInt(1)); }
   // expected-error@*:*{{'make_signed' is only compatible with non-_BitInt(1) integers and enum types, but was given 'unsigned _BitInt(1)'}}
-  { using ExpectedError = __make_signed(UnscopedBit); }
-  // expected-error@*:*{{'make_signed' is only compatible with non-_BitInt(1) integers and enum types, but was given 'UnscopedBit' whose underlying type is 'unsigned _BitInt(1)'}}
-  { using ExpectedError = __make_signed(ScopedBit); }
-  // expected-error@*:*{{'make_signed' is only compatible with non-_BitInt(1) integers and enum types, but was given 'ScopedBit' whose underlying type is 'unsigned _BitInt(1)'}}
   { using ExpectedError = __make_signed(int[]); }
   // expected-error@*:*{{'make_signed' is only compatible with non-bool integers and enum types, but was given 'int[]'}}
   { using ExpectedError = __make_signed(int[5]); }
@@ -4147,11 +4132,6 @@ void make_unsigned() {
   check_make_unsigned<UnscopedUInt128, unsigned __int128>();
   check_make_unsigned<ScopedUInt128, unsigned __int128>();
 
-  check_make_unsigned<UnscopedIrregular, unsigned _BitInt(21)>();
-  check_make_unsigned<UnscopedUIrregular, unsigned _BitInt(21)>();
-  check_make_unsigned<ScopedIrregular, unsigned _BitInt(21)>();
-  check_make_unsigned<ScopedUIrregular, unsigned _BitInt(21)>();
-
   { using ExpectedError = __make_unsigned(bool); }
   // expected-error@*:*{{'make_unsigned' is only compatible with non-bool integers and enum types, but was given 'bool'}}
   { using ExpectedError = __make_unsigned(UnscopedBool); }
@@ -4160,10 +4140,6 @@ void make_unsigned() {
   // expected-error@*:*{{'make_unsigned' is only compatible with non-bool integers and enum types, but was given 'ScopedBool' whose underlying type is 'bool'}}
   { using ExpectedError = __make_unsigned(unsigned _BitInt(1)); }
   // expected-error@*:*{{'make_unsigned' is only compatible with non-_BitInt(1) integers and enum types, but was given 'unsigned _BitInt(1)'}}
-  { using ExpectedError = __make_unsigned(UnscopedBit); }
-  // expected-error@*:*{{'make_unsigned' is only compatible with non-_BitInt(1) integers and enum types, but was given 'UnscopedBit'}}
-  { using ExpectedError = __make_unsigned(ScopedBit); }
-  // expected-error@*:*{{'make_unsigned' is only compatible with non-_BitInt(1) integers and enum types, but was given 'ScopedBit'}}
   { using ExpectedError = __make_unsigned(int[]); }
   // expected-error@*:*{{'make_unsigned' is only compatible with non-bool integers and enum types, but was given 'int[]'}}
   { using ExpectedError = __make_unsigned(int[5]); }

>From 5bae3a0b0ccf8f4f2bcffc86453197f3cc5a9829 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Thu, 19 Oct 2023 11:09:26 -0700
Subject: [PATCH 38/76] [lldb] Remove CompileUnit::SetSupportFiles overload
 (NFC)

CompileUnit::SetSupportFiles had two overloads, one that took and lvalue
reference and one that takes an rvalue reference. This removes both and
replaces it with an overload that takes the FileSpecList by value and
moves it into the member variable.

Because we're storing the value as a member, this covers both cases. If
the new FileSpecList was passed by lvalue reference, we'd copy it into
the member anyway. If it was passed as an rvalue reference, we'll have
created a new instance using its move and then immediately move it again
into our member. In either case the number of copies remains unchanged.
---
 lldb/include/lldb/Symbol/CompileUnit.h | 3 +--
 lldb/source/Symbol/CompileUnit.cpp     | 6 +-----
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h
index 229ee2d27a703fd..93f191b49985847 100644
--- a/lldb/include/lldb/Symbol/CompileUnit.h
+++ b/lldb/include/lldb/Symbol/CompileUnit.h
@@ -331,8 +331,7 @@ class CompileUnit : public std::enable_shared_from_this<CompileUnit>,
   ///     A line table object pointer that this object now owns.
   void SetLineTable(LineTable *line_table);
 
-  void SetSupportFiles(const FileSpecList &support_files);
-  void SetSupportFiles(FileSpecList &&support_files);
+  void SetSupportFiles(FileSpecList support_files);
 
   void SetDebugMacros(const DebugMacrosSP &debug_macros);
 
diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp
index 280425d5874b494..c9796973940a248 100644
--- a/lldb/source/Symbol/CompileUnit.cpp
+++ b/lldb/source/Symbol/CompileUnit.cpp
@@ -178,11 +178,7 @@ void CompileUnit::SetLineTable(LineTable *line_table) {
   m_line_table_up.reset(line_table);
 }
 
-void CompileUnit::SetSupportFiles(const FileSpecList &support_files) {
-  m_support_files = support_files;
-}
-
-void CompileUnit::SetSupportFiles(FileSpecList &&support_files) {
+void CompileUnit::SetSupportFiles(FileSpecList support_files) {
   m_support_files = std::move(support_files);
 }
 

>From 94123d164b910e0e9cca59438a00e619615e5776 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot at gmail.com>
Date: Thu, 19 Oct 2023 18:16:07 +0000
Subject: [PATCH 39/76] [gn build] Port 460e84398a19

---
 llvm/utils/gn/secondary/llvm/unittests/Target/RISCV/BUILD.gn | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/unittests/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Target/RISCV/BUILD.gn
index 707704fb9dcb4c8..6d19d22a1aa1c85 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Target/RISCV/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Target/RISCV/BUILD.gn
@@ -9,5 +9,8 @@ unittest("RISCVTests") {
     "//llvm/lib/TargetParser",
   ]
   include_dirs = [ "//llvm/lib/Target/RISCV" ]
-  sources = [ "MCInstrAnalysisTest.cpp" ]
+  sources = [
+    "MCInstrAnalysisTest.cpp",
+    "RISCVBaseInfoTest.cpp",
+  ]
 }

>From a91a66483974ec4b2501b1b1f3ded63c1667344a Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis at chromium.org>
Date: Thu, 19 Oct 2023 14:51:28 -0400
Subject: [PATCH 40/76] [gn] port 01263c6c6fb495 (lldb-vscode -> lldb-dap)

---
 llvm/utils/gn/secondary/lldb/test/BUILD.gn                | 2 +-
 .../lldb/tools/{lldb-vscode => lldb-dap}/BUILD.gn         | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)
 rename llvm/utils/gn/secondary/lldb/tools/{lldb-vscode => lldb-dap}/BUILD.gn (88%)

diff --git a/llvm/utils/gn/secondary/lldb/test/BUILD.gn b/llvm/utils/gn/secondary/lldb/test/BUILD.gn
index 2a5100d755019d4..06ef7383ad3b55f 100644
--- a/llvm/utils/gn/secondary/lldb/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/test/BUILD.gn
@@ -163,12 +163,12 @@ group("test") {
     ":lit_unit_site_cfg",
     "//clang/tools/driver:symlinks",
     "//lld/tools/lld:symlinks",
+    "//lldb/tools/lldb-dap",
     "//lldb/tools/driver:lldb",
 
     # XXX lldb-instr, darwin-debug, etc
     "//lldb/tools/lldb-server",
     "//lldb/tools/lldb-test",
-    "//lldb/tools/lldb-vscode",
     "//lldb/utils/lit-cpuid",
 
     #"//lldb/unittests",
diff --git a/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn
similarity index 88%
rename from llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn
rename to llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn
index 36da1ce8db2f027..d8292df8c0e74f2 100644
--- a/llvm/utils/gn/secondary/lldb/tools/lldb-vscode/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/tools/lldb-dap/BUILD.gn
@@ -3,19 +3,19 @@ import("//llvm/utils/gn/build/write_cmake_config.gni")
 import("//llvm/version.gni")
 
 tablegen("Options") {
-  visibility = [ ":lldb-vscode" ]
+  visibility = [ ":lldb-dap" ]
   args = [ "-gen-opt-parser-defs" ]
 }
 
 if (host_os == "mac") {
   write_cmake_config("write_info_plist") {
-    input = "lldb-vscode-Info.plist.in"
-    output = "$target_gen_dir/lldb-vscode-Info.plist"
+    input = "lldb-dap-Info.plist.in"
+    output = "$target_gen_dir/lldb-dap-Info.plist"
     values = [ "LLDB_VERSION=$llvm_version" ]
   }
 }
 
-executable("lldb-vscode") {
+executable("lldb-dap") {
   configs += [ "//llvm/utils/gn/build:lldb_code" ]
   deps = [
     ":Options",

>From c73ad025b13d35cf997e78eef89b24122ebc2fa9 Mon Sep 17 00:00:00 2001
From: Anton Rydahl <44206479+AntonRydahl at users.noreply.github.com>
Date: Thu, 19 Oct 2023 12:24:50 -0700
Subject: [PATCH 41/76] [libc][libm][GPU] Add missing vendor entrypoints to the
 GPU version of `libm` (#66034)

This patch populates the GPU version of `libm` with missing vendor entrypoints. The vendor math entrypoints are disabled by default but can be enabled with the CMake option `LIBC_GPU_VENDOR_MATH=ON`.
---
 libc/config/gpu/entrypoints.txt               |  37 ++-
 libc/src/math/CMakeLists.txt                  |  14 +
 libc/src/math/acos.h                          |  18 ++
 libc/src/math/acosh.h                         |  18 ++
 libc/src/math/asin.h                          |  18 ++
 libc/src/math/asinh.h                         |  18 ++
 libc/src/math/atan.h                          |  18 ++
 libc/src/math/atan2.h                         |  18 ++
 libc/src/math/atan2f.h                        |  18 ++
 libc/src/math/atanh.h                         |  18 ++
 libc/src/math/erf.h                           |  18 ++
 libc/src/math/gpu/CMakeLists.txt              |  70 +++--
 libc/src/math/gpu/{vendor => }/llround.cpp    |   6 +-
 libc/src/math/gpu/{vendor => }/llroundf.cpp   |   6 +-
 libc/src/math/gpu/{sinhf.cpp => lround.cpp}   |   6 +-
 libc/src/math/gpu/{tanf.cpp => lroundf.cpp}   |   6 +-
 libc/src/math/gpu/vendor/CMakeLists.txt       | 296 ++++++++++++++++--
 libc/src/math/gpu/vendor/acos.cpp             |  18 ++
 libc/src/math/gpu/vendor/acosh.cpp            |  18 ++
 libc/src/math/gpu/vendor/amdgpu/amdgpu.h      |  34 +-
 .../src/math/gpu/vendor/amdgpu/declarations.h |  31 +-
 libc/src/math/gpu/vendor/asin.cpp             |  18 ++
 libc/src/math/gpu/vendor/asinh.cpp            |  18 ++
 libc/src/math/gpu/vendor/atan.cpp             |  18 ++
 libc/src/math/gpu/vendor/atan2.cpp            |  20 ++
 libc/src/math/gpu/vendor/atan2f.cpp           |  20 ++
 libc/src/math/gpu/vendor/atanh.cpp            |  18 ++
 .../math/gpu/{tanhf.cpp => vendor/erf.cpp}    |   8 +-
 libc/src/math/gpu/vendor/erff.cpp             |  18 ++
 libc/src/math/gpu/vendor/exp.cpp              |  18 ++
 libc/src/math/gpu/vendor/exp10.cpp            |  18 ++
 libc/src/math/gpu/vendor/exp2.cpp             |  18 ++
 libc/src/math/gpu/vendor/expm1.cpp            |  18 ++
 libc/src/math/gpu/vendor/log.cpp              |  18 ++
 libc/src/math/gpu/vendor/log10.cpp            |  18 ++
 libc/src/math/gpu/vendor/log10f.cpp           |  18 ++
 libc/src/math/gpu/vendor/log1p.cpp            |  18 ++
 libc/src/math/gpu/vendor/log1pf.cpp           |  18 ++
 libc/src/math/gpu/vendor/log2.cpp             |  18 ++
 libc/src/math/gpu/vendor/log2f.cpp            |  18 ++
 libc/src/math/gpu/vendor/logb.cpp             |  18 ++
 libc/src/math/gpu/vendor/logbf.cpp            |  18 ++
 libc/src/math/gpu/vendor/logf.cpp             |  18 ++
 libc/src/math/gpu/vendor/lrint.cpp            |  18 ++
 libc/src/math/gpu/vendor/lrintf.cpp           |  18 ++
 libc/src/math/gpu/vendor/nvptx/declarations.h |  28 +-
 libc/src/math/gpu/vendor/nvptx/nvptx.h        |  28 +-
 libc/src/math/gpu/vendor/tgamma.cpp           |  18 ++
 libc/src/math/gpu/vendor/tgammaf.cpp          |  18 ++
 libc/src/math/sincos.h                        |  18 ++
 libc/src/math/tgamma.h                        |  18 ++
 libc/src/math/tgammaf.h                       |  18 ++
 52 files changed, 1190 insertions(+), 86 deletions(-)
 create mode 100644 libc/src/math/acos.h
 create mode 100644 libc/src/math/acosh.h
 create mode 100644 libc/src/math/asin.h
 create mode 100644 libc/src/math/asinh.h
 create mode 100644 libc/src/math/atan.h
 create mode 100644 libc/src/math/atan2.h
 create mode 100644 libc/src/math/atan2f.h
 create mode 100644 libc/src/math/atanh.h
 create mode 100644 libc/src/math/erf.h
 rename libc/src/math/gpu/{vendor => }/llround.cpp (80%)
 rename libc/src/math/gpu/{vendor => }/llroundf.cpp (80%)
 rename libc/src/math/gpu/{sinhf.cpp => lround.cpp} (70%)
 rename libc/src/math/gpu/{tanf.cpp => lroundf.cpp} (69%)
 create mode 100644 libc/src/math/gpu/vendor/acos.cpp
 create mode 100644 libc/src/math/gpu/vendor/acosh.cpp
 create mode 100644 libc/src/math/gpu/vendor/asin.cpp
 create mode 100644 libc/src/math/gpu/vendor/asinh.cpp
 create mode 100644 libc/src/math/gpu/vendor/atan.cpp
 create mode 100644 libc/src/math/gpu/vendor/atan2.cpp
 create mode 100644 libc/src/math/gpu/vendor/atan2f.cpp
 create mode 100644 libc/src/math/gpu/vendor/atanh.cpp
 rename libc/src/math/gpu/{tanhf.cpp => vendor/erf.cpp} (68%)
 create mode 100644 libc/src/math/gpu/vendor/erff.cpp
 create mode 100644 libc/src/math/gpu/vendor/exp.cpp
 create mode 100644 libc/src/math/gpu/vendor/exp10.cpp
 create mode 100644 libc/src/math/gpu/vendor/exp2.cpp
 create mode 100644 libc/src/math/gpu/vendor/expm1.cpp
 create mode 100644 libc/src/math/gpu/vendor/log.cpp
 create mode 100644 libc/src/math/gpu/vendor/log10.cpp
 create mode 100644 libc/src/math/gpu/vendor/log10f.cpp
 create mode 100644 libc/src/math/gpu/vendor/log1p.cpp
 create mode 100644 libc/src/math/gpu/vendor/log1pf.cpp
 create mode 100644 libc/src/math/gpu/vendor/log2.cpp
 create mode 100644 libc/src/math/gpu/vendor/log2f.cpp
 create mode 100644 libc/src/math/gpu/vendor/logb.cpp
 create mode 100644 libc/src/math/gpu/vendor/logbf.cpp
 create mode 100644 libc/src/math/gpu/vendor/logf.cpp
 create mode 100644 libc/src/math/gpu/vendor/lrint.cpp
 create mode 100644 libc/src/math/gpu/vendor/lrintf.cpp
 create mode 100644 libc/src/math/gpu/vendor/tgamma.cpp
 create mode 100644 libc/src/math/gpu/vendor/tgammaf.cpp
 create mode 100644 libc/src/math/sincos.h
 create mode 100644 libc/src/math/tgamma.h
 create mode 100644 libc/src/math/tgammaf.h

diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 731508088cb6f8b..b20008e2784c412 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -125,11 +125,19 @@ set(TARGET_LIBC_ENTRYPOINTS
 
 set(TARGET_LIBM_ENTRYPOINTS
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
+    libc.src.math.acosh
     libc.src.math.acoshf
+    libc.src.math.asin
     libc.src.math.asinf
+    libc.src.math.asinh
     libc.src.math.asinhf
+    libc.src.math.atan
     libc.src.math.atanf
+    libc.src.math.atan2
+    libc.src.math.atan2f
+    libc.src.math.atanh
     libc.src.math.atanhf
     libc.src.math.ceil
     libc.src.math.ceilf
@@ -139,9 +147,15 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.cosf
     libc.src.math.cosh
     libc.src.math.coshf
+    libc.src.math.erf
+    libc.src.math.erff
+    libc.src.math.exp10
     libc.src.math.exp10f
+    libc.src.math.exp2
     libc.src.math.exp2f
+    libc.src.math.exp
     libc.src.math.expf
+    libc.src.math.expm1
     libc.src.math.expm1f
     libc.src.math.fabs
     libc.src.math.fabsf
@@ -169,15 +183,26 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.llrintf
     libc.src.math.llround
     libc.src.math.llroundf
-    libc.src.math.pow
-    libc.src.math.powf
-    libc.src.math.sin
+    libc.src.math.log10
+    libc.src.math.log10f
+    libc.src.math.log1p
+    libc.src.math.log1pf
+    libc.src.math.log2
+    libc.src.math.log2f
+    libc.src.math.log
+    libc.src.math.logf
+    libc.src.math.lrint
+    libc.src.math.lrintf
+    libc.src.math.lround
+    libc.src.math.lroundf
     libc.src.math.modf
     libc.src.math.modff
     libc.src.math.nearbyint
     libc.src.math.nearbyintf
     libc.src.math.nextafter
     libc.src.math.nextafterf
+    libc.src.math.pow
+    libc.src.math.powf
     libc.src.math.remainder
     libc.src.math.remainderf
     libc.src.math.remquo
@@ -188,6 +213,10 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.roundf
     libc.src.math.scalbn
     libc.src.math.scalbnf
+    libc.src.math.sin
+    libc.src.math.sinf
+    libc.src.math.sincos
+    libc.src.math.sincosf
     libc.src.math.sinh
     libc.src.math.sinhf
     libc.src.math.sqrt
@@ -196,6 +225,8 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.tanf
     libc.src.math.tanh
     libc.src.math.tanhf
+    libc.src.math.tgamma
+    libc.src.math.tgammaf
     libc.src.math.trunc
     libc.src.math.truncf
 )
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 00e0e4e6a583987..f1f72714981a9e5 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -54,14 +54,23 @@ function(add_math_entrypoint_object name)
   )
 endfunction()
 
+add_math_entrypoint_object(acos)
 add_math_entrypoint_object(acosf)
+add_math_entrypoint_object(acosh)
 add_math_entrypoint_object(acoshf)
 
+add_math_entrypoint_object(asin)
 add_math_entrypoint_object(asinf)
+add_math_entrypoint_object(asinh)
 add_math_entrypoint_object(asinhf)
 
+add_math_entrypoint_object(atan)
 add_math_entrypoint_object(atanf)
 
+add_math_entrypoint_object(atan2)
+add_math_entrypoint_object(atan2f)
+
+add_math_entrypoint_object(atanh)
 add_math_entrypoint_object(atanhf)
 
 add_math_entrypoint_object(ceil)
@@ -77,6 +86,7 @@ add_math_entrypoint_object(cosf)
 add_math_entrypoint_object(cosh)
 add_math_entrypoint_object(coshf)
 
+add_math_entrypoint_object(erf)
 add_math_entrypoint_object(erff)
 
 add_math_entrypoint_object(exp)
@@ -199,6 +209,7 @@ add_math_entrypoint_object(scalbn)
 add_math_entrypoint_object(scalbnf)
 add_math_entrypoint_object(scalbnl)
 
+add_math_entrypoint_object(sincos)
 add_math_entrypoint_object(sincosf)
 
 add_math_entrypoint_object(sin)
@@ -217,6 +228,9 @@ add_math_entrypoint_object(tanf)
 add_math_entrypoint_object(tanh)
 add_math_entrypoint_object(tanhf)
 
+add_math_entrypoint_object(tgamma)
+add_math_entrypoint_object(tgammaf)
+
 add_math_entrypoint_object(trunc)
 add_math_entrypoint_object(truncf)
 add_math_entrypoint_object(truncl)
diff --git a/libc/src/math/acos.h b/libc/src/math/acos.h
new file mode 100644
index 000000000000000..da89a9dd3d9d484
--- /dev/null
+++ b/libc/src/math/acos.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for acos --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ACOS_H
+#define LLVM_LIBC_SRC_MATH_ACOS_H
+
+namespace LIBC_NAMESPACE {
+
+double acos(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ACOS_H
diff --git a/libc/src/math/acosh.h b/libc/src/math/acosh.h
new file mode 100644
index 000000000000000..a5bbd82c120b8c4
--- /dev/null
+++ b/libc/src/math/acosh.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for acosh -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ACOSH_H
+#define LLVM_LIBC_SRC_MATH_ACOSH_H
+
+namespace LIBC_NAMESPACE {
+
+double acosh(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ACOSH_H
diff --git a/libc/src/math/asin.h b/libc/src/math/asin.h
new file mode 100644
index 000000000000000..e443776e507d963
--- /dev/null
+++ b/libc/src/math/asin.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for asin --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ASIN_H
+#define LLVM_LIBC_SRC_MATH_ASIN_H
+
+namespace LIBC_NAMESPACE {
+
+double asin(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ASIN_H
diff --git a/libc/src/math/asinh.h b/libc/src/math/asinh.h
new file mode 100644
index 000000000000000..418bf9617984113
--- /dev/null
+++ b/libc/src/math/asinh.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for asinh -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ASINH_H
+#define LLVM_LIBC_SRC_MATH_ASINH_H
+
+namespace LIBC_NAMESPACE {
+
+double asinh(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ASINH_H
diff --git a/libc/src/math/atan.h b/libc/src/math/atan.h
new file mode 100644
index 000000000000000..bcbc97a52ee6d85
--- /dev/null
+++ b/libc/src/math/atan.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for atan --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ATAN_H
+#define LLVM_LIBC_SRC_MATH_ATAN_H
+
+namespace LIBC_NAMESPACE {
+
+double atan(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ATAN_H
diff --git a/libc/src/math/atan2.h b/libc/src/math/atan2.h
new file mode 100644
index 000000000000000..024bbfb5fe9cb84
--- /dev/null
+++ b/libc/src/math/atan2.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for atan2 -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ATAN2_H
+#define LLVM_LIBC_SRC_MATH_ATAN2_H
+
+namespace LIBC_NAMESPACE {
+
+double atan2(double x, double y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ATAN2_H
diff --git a/libc/src/math/atan2f.h b/libc/src/math/atan2f.h
new file mode 100644
index 000000000000000..25d2de09342fa59
--- /dev/null
+++ b/libc/src/math/atan2f.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for atan2f ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ATAN2F_H
+#define LLVM_LIBC_SRC_MATH_ATAN2F_H
+
+namespace LIBC_NAMESPACE {
+
+float atan2f(float x, float y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ATAN2F_H
diff --git a/libc/src/math/atanh.h b/libc/src/math/atanh.h
new file mode 100644
index 000000000000000..dc84d07c02a0ab2
--- /dev/null
+++ b/libc/src/math/atanh.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for atanh -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ATANH_H
+#define LLVM_LIBC_SRC_MATH_ATANH_H
+
+namespace LIBC_NAMESPACE {
+
+double atanh(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ATANH_H
diff --git a/libc/src/math/erf.h b/libc/src/math/erf.h
new file mode 100644
index 000000000000000..a38c92410de1358
--- /dev/null
+++ b/libc/src/math/erf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for erf ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ERF_H
+#define LLVM_LIBC_SRC_MATH_ERF_H
+
+namespace LIBC_NAMESPACE {
+
+double erf(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_ERF_H
diff --git a/libc/src/math/gpu/CMakeLists.txt b/libc/src/math/gpu/CMakeLists.txt
index cee7b7d9db476f2..75a916e2a0112af 100644
--- a/libc/src/math/gpu/CMakeLists.txt
+++ b/libc/src/math/gpu/CMakeLists.txt
@@ -183,6 +183,46 @@ add_math_entrypoint_gpu_object(
     -O2
 )
 
+add_math_entrypoint_gpu_object(
+  lround
+  SRCS
+    lround.cpp
+  HDRS
+    ../lround.h
+  COMPILE_OPTIONS
+    -O2
+)
+
+add_math_entrypoint_gpu_object(
+  lroundf
+  SRCS
+    lroundf.cpp
+  HDRS
+    ../lroundf.h
+  COMPILE_OPTIONS
+    -O2
+)
+
+add_math_entrypoint_gpu_object(
+  llround
+  SRCS
+    llround.cpp
+  HDRS
+    ../llround.h
+  COMPILE_OPTIONS
+    -O2
+)
+
+add_math_entrypoint_gpu_object(
+  llroundf
+  SRCS
+    llroundf.cpp
+  HDRS
+    ../llroundf.h
+  COMPILE_OPTIONS
+    -O2
+)
+
 add_math_entrypoint_gpu_object(
   modf
   SRCS
@@ -283,16 +323,6 @@ add_math_entrypoint_gpu_object(
     -O2
 )
 
-add_math_entrypoint_gpu_object(
-  sinhf
-  SRCS
-    sinhf.cpp
-  HDRS
-    ../sinhf.h
-  COMPILE_OPTIONS
-    -O2
-)
-
 add_math_entrypoint_gpu_object(
   sqrt
   SRCS
@@ -323,16 +353,6 @@ add_math_entrypoint_gpu_object(
     -O2
 )
 
-add_math_entrypoint_gpu_object(
-  tanf
-  SRCS
-    tanf.cpp
-  HDRS
-    ../tanf.h
-  COMPILE_OPTIONS
-    -O2
-)
-
 add_math_entrypoint_gpu_object(
   tanh
   SRCS
@@ -343,16 +363,6 @@ add_math_entrypoint_gpu_object(
     -O2
 )
 
-add_math_entrypoint_gpu_object(
-  tanhf
-  SRCS
-    tanhf.cpp
-  HDRS
-    ../tanhf.h
-  COMPILE_OPTIONS
-    -O2
-)
-
 add_math_entrypoint_gpu_object(
   trunc
   SRCS
diff --git a/libc/src/math/gpu/vendor/llround.cpp b/libc/src/math/gpu/llround.cpp
similarity index 80%
rename from libc/src/math/gpu/vendor/llround.cpp
rename to libc/src/math/gpu/llround.cpp
index e53701bda75cf45..afd98308730a620 100644
--- a/libc/src/math/gpu/vendor/llround.cpp
+++ b/libc/src/math/gpu/llround.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of the llround function for GPU --------------------===//
+//===-- Implementation of the GPU llround function ------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,12 +9,10 @@
 #include "src/math/llround.h"
 #include "src/__support/common.h"
 
-#include "common.h"
-
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(long long, llround, (double x)) {
-  return internal::llround(x);
+  return __builtin_llround(x);
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/llroundf.cpp b/libc/src/math/gpu/llroundf.cpp
similarity index 80%
rename from libc/src/math/gpu/vendor/llroundf.cpp
rename to libc/src/math/gpu/llroundf.cpp
index ddb5ec00a036f55..897ed15b692848e 100644
--- a/libc/src/math/gpu/vendor/llroundf.cpp
+++ b/libc/src/math/gpu/llroundf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of the llroundf function for GPU -------------------===//
+//===-- Implementation of the GPU llroundf function -----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,12 +9,10 @@
 #include "src/math/llroundf.h"
 #include "src/__support/common.h"
 
-#include "common.h"
-
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(long long, llroundf, (float x)) {
-  return internal::llroundf(x);
+  return __builtin_lroundf(x);
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/sinhf.cpp b/libc/src/math/gpu/lround.cpp
similarity index 70%
rename from libc/src/math/gpu/sinhf.cpp
rename to libc/src/math/gpu/lround.cpp
index ed69dffe3ea831d..51e8f2245af8ea3 100644
--- a/libc/src/math/gpu/sinhf.cpp
+++ b/libc/src/math/gpu/lround.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of the GPU sinhf function --------------------------===//
+//===-- Implementation of the GPU lround function -------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,11 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "src/math/sinhf.h"
+#include "src/math/lround.h"
 #include "src/__support/common.h"
 
 namespace LIBC_NAMESPACE {
 
-LLVM_LIBC_FUNCTION(float, sinhf, (float x)) { return __builtin_sinhf(x); }
+LLVM_LIBC_FUNCTION(long, lround, (double x)) { return __builtin_lround(x); }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/tanf.cpp b/libc/src/math/gpu/lroundf.cpp
similarity index 69%
rename from libc/src/math/gpu/tanf.cpp
rename to libc/src/math/gpu/lroundf.cpp
index da7bd54ab08ae30..2a6fe7200d8cbad 100644
--- a/libc/src/math/gpu/tanf.cpp
+++ b/libc/src/math/gpu/lroundf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of the GPU tanf function ---------------------------===//
+//===-- Implementation of the GPU lroundf function ------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,11 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "src/math/tanf.h"
+#include "src/math/lroundf.h"
 #include "src/__support/common.h"
 
 namespace LIBC_NAMESPACE {
 
-LLVM_LIBC_FUNCTION(float, tanf, (float x)) { return __builtin_tanf(x); }
+LLVM_LIBC_FUNCTION(long, lroundf, (float x)) { return __builtin_lroundf(x); }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/CMakeLists.txt b/libc/src/math/gpu/vendor/CMakeLists.txt
index 2ee74a06a02d461..a154c94a7009ada 100644
--- a/libc/src/math/gpu/vendor/CMakeLists.txt
+++ b/libc/src/math/gpu/vendor/CMakeLists.txt
@@ -29,6 +29,17 @@ endif()
 # will link in identity metadata from both libraries. This silences the warning.
 list(APPEND bitcode_link_flags "-Wno-linker-warnings")
 
+add_entrypoint_object(
+  acos
+  SRCS
+    acos.cpp
+  HDRS
+    ../../acos.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
 add_entrypoint_object(
   acosf
   SRCS
@@ -40,6 +51,17 @@ add_entrypoint_object(
     -O2
 )
 
+add_entrypoint_object(
+  acosh
+  SRCS
+    acosh.cpp
+  HDRS
+    ../../acosh.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
 add_entrypoint_object(
   acoshf
   SRCS
@@ -51,6 +73,17 @@ add_entrypoint_object(
     -O2
 )
 
+add_entrypoint_object(
+  asin
+  SRCS
+    asin.cpp
+  HDRS
+    ../../asin.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
 add_entrypoint_object(
   asinf
   SRCS
@@ -63,11 +96,22 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
-  asinhf
+  asinh
   SRCS
-    asinhf.cpp
+    asinh.cpp
   HDRS
-    ../../asinhf.h
+    ../../asinh.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  atan
+  SRCS
+    atan.cpp
+  HDRS
+    ../../atan.h
   COMPILE_OPTIONS
     ${bitcode_link_flags}
     -O2
@@ -84,6 +128,39 @@ add_entrypoint_object(
     -O2
 )
 
+add_entrypoint_object(
+  atan2
+  SRCS
+    atan2.cpp
+  HDRS
+    ../../atan2.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  atan2f
+  SRCS
+    atan2f.cpp
+  HDRS
+    ../../atan2f.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  atanh
+  SRCS
+    atanh.cpp
+  HDRS
+    ../../atanh.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
 add_entrypoint_object(
   atanhf
   SRCS
@@ -139,6 +216,50 @@ add_entrypoint_object(
     -O2
 )
 
+add_entrypoint_object(
+  erf
+  SRCS
+    erf.cpp
+  HDRS
+    ../../erf.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  erff
+  SRCS
+    erff.cpp
+  HDRS
+    ../../erff.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  exp
+  SRCS
+    exp.cpp
+  HDRS
+    ../../exp.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  exp10
+  SRCS
+    exp10.cpp
+  HDRS
+    ../../exp10.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
 add_entrypoint_object(
   exp10f
   SRCS
@@ -150,6 +271,17 @@ add_entrypoint_object(
     -O2
 )
 
+add_entrypoint_object(
+  exp2
+  SRCS
+    exp2.cpp
+  HDRS
+    ../../exp2.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
 add_entrypoint_object(
   exp2f
   SRCS
@@ -172,6 +304,17 @@ add_entrypoint_object(
     -O2
 )
 
+add_entrypoint_object(
+  expm1
+  SRCS
+    expm1.cpp
+  HDRS
+    ../../expm1.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
 add_entrypoint_object(
   expm1f
   SRCS
@@ -249,6 +392,94 @@ add_entrypoint_object(
     -O2
 )
 
+add_entrypoint_object(
+  log10
+  SRCS
+    log10.cpp
+  HDRS
+    ../../log10.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  log10f
+  SRCS
+    log10f.cpp
+  HDRS
+    ../../log10f.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  log2
+  SRCS
+    log2.cpp
+  HDRS
+    ../../log2.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  log2f
+  SRCS
+    log2f.cpp
+  HDRS
+    ../../log2f.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  log
+  SRCS
+    log.cpp
+  HDRS
+    ../../log.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  logf
+  SRCS
+    logf.cpp
+  HDRS
+    ../../logf.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  lrint
+  SRCS
+    lrint.cpp
+  HDRS
+    ../../lrint.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  lrintf
+  SRCS
+    lrintf.cpp
+  HDRS
+    ../../lrintf.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
 add_entrypoint_object(
   ldexp
   SRCS
@@ -272,67 +503,66 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
-  llrint
+  log1p
   SRCS
-    llrint.cpp
+    log1p.cpp
   HDRS
-    ../../llrint.h
+    ../../log1p.h
   COMPILE_OPTIONS
     ${bitcode_link_flags}
     -O2
 )
 
 add_entrypoint_object(
-  llrintf
+  log1pf
   SRCS
-    llrintf.cpp
+    log1pf.cpp
   HDRS
-    ../../llrintf.h
+    ../../log1pf.h
   COMPILE_OPTIONS
     ${bitcode_link_flags}
     -O2
 )
 
 add_entrypoint_object(
-  remquo
+  llrint
   SRCS
-    remquo.cpp
+    llrint.cpp
   HDRS
-    ../../remquo.h
+    ../../llrint.h
   COMPILE_OPTIONS
     ${bitcode_link_flags}
     -O2
 )
 
 add_entrypoint_object(
-  remquof
+  llrintf
   SRCS
-    remquof.cpp
+    llrintf.cpp
   HDRS
-    ../../remquof.h
+    ../../llrintf.h
   COMPILE_OPTIONS
     ${bitcode_link_flags}
     -O2
 )
 
-
 add_entrypoint_object(
-  llround
+  remquo
   SRCS
-    llround.cpp
+    remquo.cpp
   HDRS
-    ../../llround.h
+    ../../remquo.h
   COMPILE_OPTIONS
     ${bitcode_link_flags}
     -O2
 )
 
 add_entrypoint_object(
-  llroundf
+  remquof
   SRCS
-    llroundf.cpp
+    remquof.cpp
   HDRS
-    ../../llroundf.h
+    ../../remquof.h
   COMPILE_OPTIONS
     ${bitcode_link_flags}
     -O2
@@ -515,6 +745,28 @@ add_entrypoint_object(
     -O2
 )
 
+add_entrypoint_object(
+  tgamma
+  SRCS
+    tgamma.cpp
+  HDRS
+    ../../tgamma.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
+add_entrypoint_object(
+  tgammaf
+  SRCS
+    tgammaf.cpp
+  HDRS
+    ../../tgammaf.h
+  COMPILE_OPTIONS
+    ${bitcode_link_flags}
+    -O2
+)
+
 add_entrypoint_object(
   frexp
   SRCS
diff --git a/libc/src/math/gpu/vendor/acos.cpp b/libc/src/math/gpu/vendor/acos.cpp
new file mode 100644
index 000000000000000..83b674fa6ae3b7c
--- /dev/null
+++ b/libc/src/math/gpu/vendor/acos.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU acos function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/acos.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, acos, (double x)) { return internal::acos(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/acosh.cpp b/libc/src/math/gpu/vendor/acosh.cpp
new file mode 100644
index 000000000000000..cc1b8b572b3db7d
--- /dev/null
+++ b/libc/src/math/gpu/vendor/acosh.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU acosh function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/acosh.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, acosh, (double x)) { return internal::acosh(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/amdgpu/amdgpu.h b/libc/src/math/gpu/vendor/amdgpu/amdgpu.h
index 280ae49989f9446..43961fc75982abf 100644
--- a/libc/src/math/gpu/vendor/amdgpu/amdgpu.h
+++ b/libc/src/math/gpu/vendor/amdgpu/amdgpu.h
@@ -16,19 +16,33 @@
 
 namespace LIBC_NAMESPACE {
 namespace internal {
+LIBC_INLINE double acos(double x) { return __ocml_acos_f64(x); }
 LIBC_INLINE float acosf(float x) { return __ocml_acos_f32(x); }
+LIBC_INLINE double acosh(double x) { return __ocml_acosh_f64(x); }
 LIBC_INLINE float acoshf(float x) { return __ocml_acosh_f32(x); }
+LIBC_INLINE double asin(double x) { return __ocml_asin_f64(x); }
 LIBC_INLINE float asinf(float x) { return __ocml_asin_f32(x); }
+LIBC_INLINE double asinh(double x) { return __ocml_asinh_f64(x); }
 LIBC_INLINE float asinhf(float x) { return __ocml_asinh_f32(x); }
+LIBC_INLINE double atan(double x) { return __ocml_atan_f64(x); }
 LIBC_INLINE float atanf(float x) { return __ocml_atan_f32(x); }
+LIBC_INLINE double atan2(double x, double y) { return __ocml_atan2_f64(x, y); }
+LIBC_INLINE float atan2f(float x, float y) { return __ocml_atan2_f32(x, y); }
+LIBC_INLINE double atanh(double x) { return __ocml_atanh_f64(x); }
 LIBC_INLINE float atanhf(float x) { return __ocml_atanh_f32(x); }
 LIBC_INLINE double cos(double x) { return __ocml_cos_f64(x); }
 LIBC_INLINE float cosf(float x) { return __ocml_cos_f32(x); }
 LIBC_INLINE double cosh(double x) { return __ocml_cosh_f64(x); }
 LIBC_INLINE float coshf(float x) { return __ocml_cosh_f32(x); }
+LIBC_INLINE double erf(double x) { return __ocml_erf_f64(x); }
+LIBC_INLINE float erff(float x) { return __ocml_erf_f32(x); }
+LIBC_INLINE double exp(double x) { return __builtin_exp(x); }
 LIBC_INLINE float expf(float x) { return __builtin_expf(x); }
-LIBC_INLINE float exp2f(float x) { return __builtin_exp2f(x); }
+LIBC_INLINE double exp2(double x) { return __ocml_exp2_f64(x); }
+LIBC_INLINE float exp2f(float x) { return __ocml_exp2_f32(x); }
+LIBC_INLINE double exp10(double x) { return __ocml_exp10_f64(x); }
 LIBC_INLINE float exp10f(float x) { return __ocml_exp10_f32(x); }
+LIBC_INLINE double expm1(double x) { return __ocml_expm1_f64(x); }
 LIBC_INLINE float expm1f(float x) { return __ocml_expm1_f32(x); }
 LIBC_INLINE double fdim(double x, double y) { return __ocml_fdim_f64(x, y); }
 LIBC_INLINE float fdimf(float x, float y) { return __ocml_fdim_f32(x, y); }
@@ -44,11 +58,19 @@ LIBC_INLINE long long llrint(double x) {
 LIBC_INLINE long long llrintf(float x) {
   return static_cast<long long>(__builtin_rintf(x));
 }
-LIBC_INLINE long long llround(double x) {
-  return static_cast<long long>(__builtin_round(x));
+LIBC_INLINE double log10(double x) { return __ocml_log10_f64(x); }
+LIBC_INLINE float log10f(float x) { return __ocml_log10_f32(x); }
+LIBC_INLINE double log1p(double x) { return __ocml_log1p_f64(x); }
+LIBC_INLINE float log1pf(float x) { return __ocml_log1p_f32(x); }
+LIBC_INLINE double log2(double x) { return __ocml_log2_f64(x); }
+LIBC_INLINE float log2f(float x) { return __ocml_log2_f32(x); }
+LIBC_INLINE double log(double x) { return __ocml_log_f64(x); }
+LIBC_INLINE float logf(float x) { return __ocml_log_f32(x); }
+LIBC_INLINE long lrint(double x) {
+  return static_cast<long>(__builtin_rint(x));
 }
-LIBC_INLINE long long llroundf(float x) {
-  return static_cast<long long>(__builtin_roundf(x));
+LIBC_INLINE long lrintf(float x) {
+  return static_cast<long>(__builtin_rintf(x));
 }
 LIBC_INLINE double nextafter(double x, double y) {
   return __ocml_nextafter_f64(x, y);
@@ -96,6 +118,8 @@ LIBC_INLINE float remquof(float x, float y, int *q) {
   *q = tmp;
   return r;
 }
+LIBC_INLINE double tgamma(double x) { return __ocml_tgamma_f64(x); }
+LIBC_INLINE float tgammaf(float x) { return __ocml_tgamma_f32(x); }
 
 } // namespace internal
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/amdgpu/declarations.h b/libc/src/math/gpu/vendor/amdgpu/declarations.h
index b07c079682580a4..7a01cbc6ab19d00 100644
--- a/libc/src/math/gpu/vendor/amdgpu/declarations.h
+++ b/libc/src/math/gpu/vendor/amdgpu/declarations.h
@@ -15,35 +15,54 @@ namespace LIBC_NAMESPACE {
 
 extern "C" {
 float __ocml_acos_f32(float);
+double __ocml_acos_f64(double);
 float __ocml_acosh_f32(float);
+double __ocml_acosh_f64(double);
 float __ocml_asin_f32(float);
+double __ocml_asin_f64(double);
 float __ocml_asinh_f32(float);
+double __ocml_asinh_f64(double);
 float __ocml_atan_f32(float);
+double __ocml_atan_f64(double);
+float __ocml_atan2_f32(float, float);
+double __ocml_atan2_f64(double, double);
 float __ocml_atanh_f32(float);
+double __ocml_atanh_f64(double);
 float __ocml_cos_f32(float);
 double __ocml_cos_f64(double);
 float __ocml_cosh_f32(float);
 double __ocml_cosh_f64(double);
+float __ocml_erf_f32(float);
+double __ocml_erf_f64(double);
 float __ocml_exp_f32(float);
+double __ocml_exp_f64(double);
 float __ocml_exp2_f32(float);
+double __ocml_exp2_f64(double);
 float __ocml_exp10_f32(float);
+double __ocml_exp10_f64(double);
+double __ocml_exp2_f64(double);
 float __ocml_expm1_f32(float);
+double __ocml_expm1_f64(double);
 float __ocml_fdim_f32(float, float);
 double __ocml_fdim_f64(double, double);
-double __ocml_hypot_f64(double, double);
 float __ocml_hypot_f32(float, float);
+double __ocml_hypot_f64(double, double);
 int __ocml_ilogb_f64(double);
 int __ocml_ilogb_f32(float);
 float __ocml_ldexp_f32(float, int);
 double __ocml_ldexp_f64(double, int);
+float __ocml_log10_f32(float);
+double __ocml_log10_f64(double);
+float __ocml_log1p_f32(float);
+double __ocml_log1p_f64(double);
+float __ocml_log2_f32(float);
+double __ocml_log2_f64(double);
+float __ocml_log_f32(float);
+double __ocml_log_f64(double);
 float __ocml_nextafter_f32(float, float);
 double __ocml_nextafter_f64(double, double);
 float __ocml_pow_f32(float, float);
 double __ocml_pow_f64(double, double);
-double __ocml_rint_f64(double);
-float __ocml_rint_f32(float);
-double __ocml_round_f64(double);
-float __ocml_round_f32(float);
 float __ocml_sin_f32(float);
 double __ocml_sin_f64(double);
 float __ocml_sincos_f32(float, float *);
@@ -56,6 +75,8 @@ float __ocml_tanh_f32(float);
 double __ocml_tanh_f64(double);
 float __ocml_remquo_f32(float, float, gpu::Private<int> *);
 double __ocml_remquo_f64(double, double, gpu::Private<int> *);
+double __ocml_tgamma_f64(double);
+float __ocml_tgamma_f32(float);
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/asin.cpp b/libc/src/math/gpu/vendor/asin.cpp
new file mode 100644
index 000000000000000..24a8a136e88eb2c
--- /dev/null
+++ b/libc/src/math/gpu/vendor/asin.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU asin function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/asin.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, asin, (double x)) { return internal::asin(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/asinh.cpp b/libc/src/math/gpu/vendor/asinh.cpp
new file mode 100644
index 000000000000000..f417d9fd8c1f733
--- /dev/null
+++ b/libc/src/math/gpu/vendor/asinh.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU asinh function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/asinh.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, asinh, (double x)) { return internal::asinh(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/atan.cpp b/libc/src/math/gpu/vendor/atan.cpp
new file mode 100644
index 000000000000000..45d7f02c02cd636
--- /dev/null
+++ b/libc/src/math/gpu/vendor/atan.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU atan function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/atan.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, atan, (double x)) { return internal::atan(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/atan2.cpp b/libc/src/math/gpu/vendor/atan2.cpp
new file mode 100644
index 000000000000000..94e215e52ca6028
--- /dev/null
+++ b/libc/src/math/gpu/vendor/atan2.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of the GPU atan2 function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/atan2.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, atan2, (double x, double y)) {
+  return internal::atan2(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/atan2f.cpp b/libc/src/math/gpu/vendor/atan2f.cpp
new file mode 100644
index 000000000000000..70caa568e32d936
--- /dev/null
+++ b/libc/src/math/gpu/vendor/atan2f.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of the GPU atan2f function -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/atan2f.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, atan2f, (float x, float y)) {
+  return internal::atan2f(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/atanh.cpp b/libc/src/math/gpu/vendor/atanh.cpp
new file mode 100644
index 000000000000000..07a75fcbbfc7c6a
--- /dev/null
+++ b/libc/src/math/gpu/vendor/atanh.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU atanh function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/atanh.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, atanh, (double x)) { return internal::atanh(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/tanhf.cpp b/libc/src/math/gpu/vendor/erf.cpp
similarity index 68%
rename from libc/src/math/gpu/tanhf.cpp
rename to libc/src/math/gpu/vendor/erf.cpp
index be666fd9f8740c3..190321ca25992ed 100644
--- a/libc/src/math/gpu/tanhf.cpp
+++ b/libc/src/math/gpu/vendor/erf.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of the GPU tanhf function --------------------------===//
+//===-- Implementation of the GPU erf function ----------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,11 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "src/math/tanhf.h"
+#include "src/math/erf.h"
 #include "src/__support/common.h"
 
+#include "common.h"
+
 namespace LIBC_NAMESPACE {
 
-LLVM_LIBC_FUNCTION(float, tanhf, (float x)) { return __builtin_tanhf(x); }
+LLVM_LIBC_FUNCTION(double, erf, (double x)) { return internal::erf(x); }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/erff.cpp b/libc/src/math/gpu/vendor/erff.cpp
new file mode 100644
index 000000000000000..a5a08be54b0756b
--- /dev/null
+++ b/libc/src/math/gpu/vendor/erff.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU erff function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/erff.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, erff, (float x)) { return internal::erff(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/exp.cpp b/libc/src/math/gpu/vendor/exp.cpp
new file mode 100644
index 000000000000000..ee5a22019f6a54e
--- /dev/null
+++ b/libc/src/math/gpu/vendor/exp.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU exp function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/exp.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, exp, (double x)) { return internal::exp(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/exp10.cpp b/libc/src/math/gpu/vendor/exp10.cpp
new file mode 100644
index 000000000000000..8557a33f0188449
--- /dev/null
+++ b/libc/src/math/gpu/vendor/exp10.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU exp10 function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/exp10.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, exp10, (double x)) { return internal::exp10(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/exp2.cpp b/libc/src/math/gpu/vendor/exp2.cpp
new file mode 100644
index 000000000000000..ffa23d810a9b1e3
--- /dev/null
+++ b/libc/src/math/gpu/vendor/exp2.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU exp2 function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/exp2.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, exp2, (double x)) { return internal::exp2(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/expm1.cpp b/libc/src/math/gpu/vendor/expm1.cpp
new file mode 100644
index 000000000000000..6ac5f753b9e4df1
--- /dev/null
+++ b/libc/src/math/gpu/vendor/expm1.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU expm1 function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/expm1.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, expm1, (double x)) { return internal::expm1(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/log.cpp b/libc/src/math/gpu/vendor/log.cpp
new file mode 100644
index 000000000000000..a97689abcc21717
--- /dev/null
+++ b/libc/src/math/gpu/vendor/log.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU log function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, log, (double x)) { return internal::log(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/log10.cpp b/libc/src/math/gpu/vendor/log10.cpp
new file mode 100644
index 000000000000000..c7a917a75cc969f
--- /dev/null
+++ b/libc/src/math/gpu/vendor/log10.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU log10 function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log10.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, log10, (double x)) { return internal::log10(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/log10f.cpp b/libc/src/math/gpu/vendor/log10f.cpp
new file mode 100644
index 000000000000000..489f5f558be1f59
--- /dev/null
+++ b/libc/src/math/gpu/vendor/log10f.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU log10f function -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log10f.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, log10f, (float x)) { return internal::log10f(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/log1p.cpp b/libc/src/math/gpu/vendor/log1p.cpp
new file mode 100644
index 000000000000000..720d23e2f952b8f
--- /dev/null
+++ b/libc/src/math/gpu/vendor/log1p.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU log1p function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log1p.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, log1p, (double x)) { return internal::log1p(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/log1pf.cpp b/libc/src/math/gpu/vendor/log1pf.cpp
new file mode 100644
index 000000000000000..96ad48b529cf6fb
--- /dev/null
+++ b/libc/src/math/gpu/vendor/log1pf.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU log1pf function -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log1pf.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, log1pf, (float x)) { return internal::log1pf(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/log2.cpp b/libc/src/math/gpu/vendor/log2.cpp
new file mode 100644
index 000000000000000..9fc8a81e7e7570d
--- /dev/null
+++ b/libc/src/math/gpu/vendor/log2.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU log2 function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log2.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, log2, (double x)) { return internal::log2(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/log2f.cpp b/libc/src/math/gpu/vendor/log2f.cpp
new file mode 100644
index 000000000000000..62df41b69b0bd89
--- /dev/null
+++ b/libc/src/math/gpu/vendor/log2f.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU log2f function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log2f.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, log2f, (float x)) { return internal::log2f(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/logb.cpp b/libc/src/math/gpu/vendor/logb.cpp
new file mode 100644
index 000000000000000..5dea57d41b08b1d
--- /dev/null
+++ b/libc/src/math/gpu/vendor/logb.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU logb function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/logb.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, logb, (double x)) { return internal::logb(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/logbf.cpp b/libc/src/math/gpu/vendor/logbf.cpp
new file mode 100644
index 000000000000000..1a59df3e09a89b9
--- /dev/null
+++ b/libc/src/math/gpu/vendor/logbf.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU logbf function --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/logbf.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, logbf, (float x)) { return internal::logbf(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/logf.cpp b/libc/src/math/gpu/vendor/logf.cpp
new file mode 100644
index 000000000000000..527b028e100d5ee
--- /dev/null
+++ b/libc/src/math/gpu/vendor/logf.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU logf function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/logf.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, logf, (float x)) { return internal::logf(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/lrint.cpp b/libc/src/math/gpu/vendor/lrint.cpp
new file mode 100644
index 000000000000000..a08996b755b5280
--- /dev/null
+++ b/libc/src/math/gpu/vendor/lrint.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the lrint function for GPU ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/lrint.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(long, lrint, (double x)) { return internal::lrint(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/lrintf.cpp b/libc/src/math/gpu/vendor/lrintf.cpp
new file mode 100644
index 000000000000000..695a9b8202cf4ac
--- /dev/null
+++ b/libc/src/math/gpu/vendor/lrintf.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the lrintf function for GPU ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/lrintf.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(long, lrintf, (float x)) { return internal::lrintf(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/nvptx/declarations.h b/libc/src/math/gpu/vendor/nvptx/declarations.h
index 003a9a8a5dfa55d..9cb2be67b85b1a9 100644
--- a/libc/src/math/gpu/vendor/nvptx/declarations.h
+++ b/libc/src/math/gpu/vendor/nvptx/declarations.h
@@ -12,19 +12,33 @@
 namespace LIBC_NAMESPACE {
 
 extern "C" {
+double __nv_acos(double);
 float __nv_acosf(float);
+double __nv_acosh(double);
 float __nv_acoshf(float);
+double __nv_asin(double);
 float __nv_asinf(float);
+double __nv_asinh(double);
 float __nv_asinhf(float);
+double __nv_atan(double);
 float __nv_atanf(float);
+double __nv_atan2(double, double);
+float __nv_atan2f(float, float);
+double __nv_atanh(double);
 float __nv_atanhf(float);
 double __nv_cos(double);
 float __nv_cosf(float);
 double __nv_cosh(double);
 float __nv_coshf(float);
+double __nv_erf(double);
+float __nv_erff(float);
+double __nv_exp(double);
 float __nv_expf(float);
+double __nv_exp2(double);
 float __nv_exp2f(float);
+double __nv_exp10(double);
 float __nv_exp10f(float);
+double __nv_expm1(double);
 float __nv_expm1f(float);
 double __nv_fdim(double, double);
 float __nv_fdimf(float, float);
@@ -36,8 +50,16 @@ double __nv_ldexp(double, int);
 float __nv_ldexpf(float, int);
 long long __nv_llrint(double);
 long long __nv_llrintf(float);
-long long __nv_llround(double);
-long long __nv_llroundf(float);
+long __nv_lrint(double);
+long __nv_lrintf(float);
+double __nv_log10(double);
+float __nv_log10f(float);
+double __nv_log1p(double);
+float __nv_log1pf(float);
+double __nv_log2(double);
+float __nv_log2f(float);
+double __nv_log(double);
+float __nv_logf(float);
 double __nv_nextafter(double, double);
 float __nv_nextafterf(float, float);
 double __nv_pow(double, double);
@@ -58,6 +80,8 @@ double __nv_scalbn(double, int);
 float __nv_scalbnf(float, int);
 double __nv_remquo(double, double, int *);
 float __nv_remquof(float, float, int *);
+double __nv_tgamma(double);
+float __nv_tgammaf(float);
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/nvptx/nvptx.h b/libc/src/math/gpu/vendor/nvptx/nvptx.h
index 4298d56db6a027c..110d570a84a3930 100644
--- a/libc/src/math/gpu/vendor/nvptx/nvptx.h
+++ b/libc/src/math/gpu/vendor/nvptx/nvptx.h
@@ -15,19 +15,33 @@
 
 namespace LIBC_NAMESPACE {
 namespace internal {
+LIBC_INLINE double acos(double x) { return __nv_acos(x); }
 LIBC_INLINE float acosf(float x) { return __nv_acosf(x); }
+LIBC_INLINE double acosh(double x) { return __nv_acosh(x); }
 LIBC_INLINE float acoshf(float x) { return __nv_acoshf(x); }
+LIBC_INLINE double asin(double x) { return __nv_asin(x); }
 LIBC_INLINE float asinf(float x) { return __nv_asinf(x); }
+LIBC_INLINE double asinh(double x) { return __nv_asinh(x); }
 LIBC_INLINE float asinhf(float x) { return __nv_asinhf(x); }
+LIBC_INLINE double atan2(double x, double y) { return __nv_atan2(x, y); }
+LIBC_INLINE float atan2f(float x, float y) { return __nv_atan2f(x, y); }
+LIBC_INLINE double atan(double x) { return __nv_atan(x); }
 LIBC_INLINE float atanf(float x) { return __nv_atanf(x); }
+LIBC_INLINE double atanh(double x) { return __nv_atanh(x); }
 LIBC_INLINE float atanhf(float x) { return __nv_atanhf(x); }
 LIBC_INLINE double cos(double x) { return __nv_cos(x); }
 LIBC_INLINE float cosf(float x) { return __nv_cosf(x); }
 LIBC_INLINE double cosh(double x) { return __nv_cosh(x); }
 LIBC_INLINE float coshf(float x) { return __nv_coshf(x); }
+LIBC_INLINE double erf(double x) { return __nv_erf(x); }
+LIBC_INLINE float erff(float x) { return __nv_erff(x); }
+LIBC_INLINE double exp(double x) { return __nv_exp(x); }
 LIBC_INLINE float expf(float x) { return __nv_expf(x); }
+LIBC_INLINE double exp2(double x) { return __nv_exp2(x); }
 LIBC_INLINE float exp2f(float x) { return __nv_exp2f(x); }
+LIBC_INLINE double exp10(double x) { return __nv_exp10(x); }
 LIBC_INLINE float exp10f(float x) { return __nv_exp10f(x); }
+LIBC_INLINE double expm1(double x) { return __nv_expm1(x); }
 LIBC_INLINE float expm1f(float x) { return __nv_expm1f(x); }
 LIBC_INLINE double fdim(double x, double y) { return __nv_fdim(x, y); }
 LIBC_INLINE float fdimf(float x, float y) { return __nv_fdimf(x, y); }
@@ -39,8 +53,16 @@ LIBC_INLINE double ldexp(double x, int i) { return __nv_ldexp(x, i); }
 LIBC_INLINE float ldexpf(float x, int i) { return __nv_ldexpf(x, i); }
 LIBC_INLINE long long llrint(double x) { return __nv_llrint(x); }
 LIBC_INLINE long long llrintf(float x) { return __nv_llrintf(x); }
-LIBC_INLINE long long llround(double x) { return __nv_llround(x); }
-LIBC_INLINE long long llroundf(float x) { return __nv_llroundf(x); }
+LIBC_INLINE double log10(double x) { return __nv_log10(x); }
+LIBC_INLINE float log10f(float x) { return __nv_log10f(x); }
+LIBC_INLINE double log1p(double x) { return __nv_log1p(x); }
+LIBC_INLINE float log1pf(float x) { return __nv_log1pf(x); }
+LIBC_INLINE double log2(double x) { return __nv_log2(x); }
+LIBC_INLINE float log2f(float x) { return __nv_log2f(x); }
+LIBC_INLINE double log(double x) { return __nv_log(x); }
+LIBC_INLINE float logf(float x) { return __nv_logf(x); }
+LIBC_INLINE long lrint(double x) { return __nv_lrint(x); }
+LIBC_INLINE long lrintf(float x) { return __nv_lrintf(x); }
 LIBC_INLINE double nextafter(double x, double y) {
   return __nv_nextafter(x, y);
 }
@@ -71,6 +93,8 @@ LIBC_INLINE double remquo(double x, double y, int *i) {
 LIBC_INLINE float remquof(float x, float y, int *i) {
   return __nv_remquof(x, y, i);
 }
+LIBC_INLINE double tgamma(double x) { return __nv_tgamma(x); }
+LIBC_INLINE float tgammaf(float x) { return __nv_tgammaf(x); }
 
 } // namespace internal
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/tgamma.cpp b/libc/src/math/gpu/vendor/tgamma.cpp
new file mode 100644
index 000000000000000..e86116a2b0abea0
--- /dev/null
+++ b/libc/src/math/gpu/vendor/tgamma.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU tgamma function -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/tgamma.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(double, tgamma, (double x)) { return internal::tgamma(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/gpu/vendor/tgammaf.cpp b/libc/src/math/gpu/vendor/tgammaf.cpp
new file mode 100644
index 000000000000000..552919bae446957
--- /dev/null
+++ b/libc/src/math/gpu/vendor/tgammaf.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of the GPU tgammaf function ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/tgammaf.h"
+#include "src/__support/common.h"
+
+#include "common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, tgammaf, (float x)) { return internal::tgammaf(x); }
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/sincos.h b/libc/src/math/sincos.h
new file mode 100644
index 000000000000000..6235a7c5c04546b
--- /dev/null
+++ b/libc/src/math/sincos.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for sincos ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_SINCOS_H
+#define LLVM_LIBC_SRC_MATH_SINCOS_H
+
+namespace LIBC_NAMESPACE {
+
+void sincos(double x, double *sinx, double *cosx);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_SINCOS_H
diff --git a/libc/src/math/tgamma.h b/libc/src/math/tgamma.h
new file mode 100644
index 000000000000000..24590ed1a467695
--- /dev/null
+++ b/libc/src/math/tgamma.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for tgamma ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_TGAMMA_H
+#define LLVM_LIBC_SRC_MATH_TGAMMA_H
+
+namespace LIBC_NAMESPACE {
+
+double tgamma(double x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_TGAMMA_H
diff --git a/libc/src/math/tgammaf.h b/libc/src/math/tgammaf.h
new file mode 100644
index 000000000000000..d28e4c325152abe
--- /dev/null
+++ b/libc/src/math/tgammaf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for tgammaf -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_TGAMMAF_H
+#define LLVM_LIBC_SRC_MATH_TGAMMAF_H
+
+namespace LIBC_NAMESPACE {
+
+float tgammaf(float x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_TGAMMAF_H

>From 3daa771047e16919f161a12a8a9124bbe13a89a6 Mon Sep 17 00:00:00 2001
From: Duo Wang <duow1 at uci.edu>
Date: Thu, 19 Oct 2023 12:30:05 -0700
Subject: [PATCH 42/76] [libcxx][test] Fix empty.gen selftest on windows
 (#69403)

Using `true` as a no-op unfortunately does not work on windows, which
fails libcxx lit tests on windows. Lit provides the `:` internal shell
builtin which is equivalent to `true`.
---
 libcxx/test/libcxx/selftest/gen.cpp/empty.gen.cpp     | 2 +-
 libcxx/test/libcxx/selftest/sh.cpp/run-success.sh.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/libcxx/selftest/gen.cpp/empty.gen.cpp b/libcxx/test/libcxx/selftest/gen.cpp/empty.gen.cpp
index 1915bede471f5e4..e0a36db25775e67 100644
--- a/libcxx/test/libcxx/selftest/gen.cpp/empty.gen.cpp
+++ b/libcxx/test/libcxx/selftest/gen.cpp/empty.gen.cpp
@@ -8,4 +8,4 @@
 
 // Make sure we can generate no tests at all
 
-// RUN: true
+// RUN: :
diff --git a/libcxx/test/libcxx/selftest/sh.cpp/run-success.sh.cpp b/libcxx/test/libcxx/selftest/sh.cpp/run-success.sh.cpp
index 1d8747c71948454..724d03ddb848e10 100644
--- a/libcxx/test/libcxx/selftest/sh.cpp/run-success.sh.cpp
+++ b/libcxx/test/libcxx/selftest/sh.cpp/run-success.sh.cpp
@@ -8,4 +8,4 @@
 
 // Make sure the test passes if it succeeds to run
 
-// RUN: true
+// RUN: :

>From 3cac608fbd0811b2f5c59c6e13148162ccd8543e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 19 Oct 2023 20:52:20 +0100
Subject: [PATCH 43/76] [LV] Add interleave only test case with reduction
 requiring casts.

This adds test coverage for a crash exposed by
d311126349b8fe1684d62154a9fa5a7bbb0b713.
---
 .../interleave-and-scalarize-only.ll          | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
index b74829c698ce3a0..8832d66a2c83ac3 100644
--- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll
@@ -232,3 +232,40 @@ loop:
 exit:
   ret void
 }
+
+define i16 @reduction_with_casts() {
+; CHECK-LABEL: define i16 @reduction_with_casts() {
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.+]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.+]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[VEC_PHI]], 65535
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[VEC_PHI1]], 65535
+; CHECK-NEXT:    [[TMP2]] = add i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP1]], 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9998
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label %scalar.ph
+;
+entry:
+  br label %loop
+
+loop:
+  %count.0.in1 = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %iv = phi i16 [ 1, %entry ], [ %iv.next, %loop ]
+  %conv1 = and i32 %count.0.in1, 65535
+  %add = add nuw nsw i32 %conv1, 1
+  %iv.next = add i16 %iv, 1
+  %cmp = icmp eq i16 %iv.next, 10000
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  %add.lcssa = phi i32 [ %add, %loop ]
+  %count.0 = trunc i32 %add.lcssa to i16
+  ret i16 %count.0
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; DBG: {{.*}}

>From 616c86accbf4c9ada37da6fb6b04554dec0fffee Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar at google.com>
Date: Thu, 19 Oct 2023 13:06:17 -0700
Subject: [PATCH 44/76] [mlir][drr] Set operand segment in rewrite

This allows some basic variadic operands in rewrites. There were some workarounds employed (like "aliasing" the attribute). Couldn't find a way to do this directly with properties.
---
 mlir/test/lib/Dialect/Test/TestOps.td  | 23 +++++++++++++++++++++++
 mlir/test/mlir-tblgen/pattern.mlir     | 13 +++++++++++++
 mlir/tools/mlir-tblgen/RewriterGen.cpp | 21 +++++++++++++++++++++
 3 files changed, 57 insertions(+)

diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index edb63924b3553f2..1add9bd3c329438 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -2495,6 +2495,29 @@ def TestDefaultStrAttrHasValueOp : TEST_Op<"has_str_value"> {
 def : Pat<(TestDefaultStrAttrNoValueOp $value),
           (TestDefaultStrAttrHasValueOp ConstantStrAttr<StrAttr, "foo">)>;
 
+//===----------------------------------------------------------------------===//
+// Test Ops with variadics
+//===----------------------------------------------------------------------===//
+
+def TestVariadicRewriteSrcOp : TEST_Op<"variadic_rewrite_src_op", [AttrSizedOperandSegments]> {
+  let arguments = (ins
+    Variadic<AnyType>:$arg,
+    AnyType:$brg,
+    Variadic<AnyType>:$crg
+  );
+}
+
+def TestVariadicRewriteDstOp : TEST_Op<"variadic_rewrite_dst_op", [AttrSizedOperandSegments]> {
+  let arguments = (ins
+    AnyType:$brg,
+    Variadic<AnyType>:$crg,
+    Variadic<AnyType>:$arg
+  );
+}
+
+def : Pat<(TestVariadicRewriteSrcOp $arg, $brg, $crg),
+          (TestVariadicRewriteDstOp $brg, $crg, $arg)>;
+
 //===----------------------------------------------------------------------===//
 // Test Ops with Default-Valued Attributes and Differing Print Settings
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/mlir-tblgen/pattern.mlir b/mlir/test/mlir-tblgen/pattern.mlir
index 5f776338bd40be8..7f9c450f15b21e8 100644
--- a/mlir/test/mlir-tblgen/pattern.mlir
+++ b/mlir/test/mlir-tblgen/pattern.mlir
@@ -683,3 +683,16 @@ func.func @testConstantStrAttr() -> () {
   test.no_str_value {value = "bar"}
   return
 }
+
+//===----------------------------------------------------------------------===//
+// Test that patterns with variadics propagate sizes
+//===----------------------------------------------------------------------===//
+
+func.func @testVariadic(%arg_0: i32, %arg_1: i32, %brg: i64,
+    %crg_0: f32, %crg_1: f32, %crg_2: f32, %crg_3: f32) -> () {
+  // CHECK: "test.variadic_rewrite_dst_op"(%arg2, %arg3, %arg4, %arg5, %arg6, %arg0, %arg1) <{operandSegmentSizes = array<i32: 1, 4, 2>}> : (i64, f32, f32, f32, f32, i32, i32) -> ()
+  "test.variadic_rewrite_src_op"(%arg_0, %arg_1, %brg,
+    %crg_0, %crg_1, %crg_2, %crg_3) {operandSegmentSizes = array<i32: 2, 1, 4>} :
+    (i32, i32, i64, f32, f32, f32, f32) -> ()
+  return
+}
diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp
index 9f36a3b430274a3..77c34cb03e987ea 100644
--- a/mlir/tools/mlir-tblgen/RewriterGen.cpp
+++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp
@@ -1743,10 +1743,15 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs(
       "if (auto tmpAttr = {1}) {\n"
       "  tblgen_attrs.emplace_back(rewriter.getStringAttr(\"{0}\"), "
       "tmpAttr);\n}\n";
+  int numVariadic = 0;
+  bool hasOperandSegmentSizes = false;
+  std::vector<std::string> sizes;
   for (int argIndex = 0, e = resultOp.getNumArgs(); argIndex < e; ++argIndex) {
     if (resultOp.getArg(argIndex).is<NamedAttribute *>()) {
       // The argument in the op definition.
       auto opArgName = resultOp.getArgName(argIndex);
+      hasOperandSegmentSizes =
+          hasOperandSegmentSizes || opArgName == "operandSegmentSizes";
       if (auto subTree = node.getArgAsNestedDag(argIndex)) {
         if (!subTree.isNativeCodeCall())
           PrintFatalError(loc, "only NativeCodeCall allowed in nested dag node "
@@ -1766,6 +1771,7 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs(
         resultOp.getArg(argIndex).get<NamedTypeConstraint *>();
     std::string varName;
     if (operand->isVariadic()) {
+      ++numVariadic;
       std::string range;
       if (node.isNestedDagArg(argIndex)) {
         range = childNodeNames.lookup(argIndex);
@@ -1777,7 +1783,9 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs(
       range = symbolInfoMap.getValueAndRangeUse(range);
       os << formatv("for (auto v: {0}) {{\n  tblgen_values.push_back(v);\n}\n",
                     range);
+      sizes.push_back(formatv("static_cast<int32_t>({0}.size())", range));
     } else {
+      sizes.push_back("1");
       os << formatv("tblgen_values.push_back(");
       if (node.isNestedDagArg(argIndex)) {
         os << symbolInfoMap.getValueAndRangeUse(
@@ -1804,6 +1812,19 @@ void PatternEmitter::createAggregateLocalVarsForOpArgs(
       os << ");\n";
     }
   }
+
+  if (numVariadic > 1 && !hasOperandSegmentSizes) {
+    // Only set size if it can't be computed.
+    const auto *sameVariadicSize =
+        resultOp.getTrait("::mlir::OpTrait::SameVariadicOperandSize");
+    if (!sameVariadicSize) {
+      const char *setSizes = R"(
+        tblgen_attrs.emplace_back(rewriter.getStringAttr("operandSegmentSizes"),
+          rewriter.getDenseI32ArrayAttr({{ {0} }));
+          )";
+      os.printReindented(formatv(setSizes, llvm::join(sizes, ", ")).str());
+    }
+  }
 }
 
 StaticMatcherHelper::StaticMatcherHelper(raw_ostream &os,

>From 51094545e28d222dc11eea513904506624d715c6 Mon Sep 17 00:00:00 2001
From: Aleksandr Platonov <platonov.aleksandr at huawei.com>
Date: Thu, 19 Oct 2023 23:09:31 +0300
Subject: [PATCH 45/76] [clang][index] Fix processing of CompoundAssignOperator
 at setting up reference roles (#69370)

Without this patch in expressions like `foo += 1` reference `foo` has no
read and write roles.

This happens because `CompoundAssignOperator` is also a
`BinaryOperator`, thus handling `CompoindAssignOperator` in `else`
branch is a dead code.
---
 clang/lib/Index/IndexBody.cpp        | 18 +++++++++---------
 clang/unittests/Index/IndexTests.cpp | 25 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Index/IndexBody.cpp b/clang/lib/Index/IndexBody.cpp
index e88f321f18a7129..08136baa5d408e9 100644
--- a/clang/lib/Index/IndexBody.cpp
+++ b/clang/lib/Index/IndexBody.cpp
@@ -77,9 +77,15 @@ class BodyIndexer : public RecursiveASTVisitor<BodyIndexer> {
     const Stmt *Parent = *It;
 
     if (auto BO = dyn_cast<BinaryOperator>(Parent)) {
-      if (BO->getOpcode() == BO_Assign && BO->getLHS()->IgnoreParenCasts() == E)
-        Roles |= (unsigned)SymbolRole::Write;
-
+      if (BO->getOpcode() == BO_Assign) {
+        if (BO->getLHS()->IgnoreParenCasts() == E)
+          Roles |= (unsigned)SymbolRole::Write;
+      } else if (auto CA = dyn_cast<CompoundAssignOperator>(Parent)) {
+        if (CA->getLHS()->IgnoreParenCasts() == E) {
+          Roles |= (unsigned)SymbolRole::Read;
+          Roles |= (unsigned)SymbolRole::Write;
+        }
+      }
     } else if (auto UO = dyn_cast<UnaryOperator>(Parent)) {
       if (UO->isIncrementDecrementOp()) {
         Roles |= (unsigned)SymbolRole::Read;
@@ -88,12 +94,6 @@ class BodyIndexer : public RecursiveASTVisitor<BodyIndexer> {
         Roles |= (unsigned)SymbolRole::AddressOf;
       }
 
-    } else if (auto CA = dyn_cast<CompoundAssignOperator>(Parent)) {
-      if (CA->getLHS()->IgnoreParenCasts() == E) {
-        Roles |= (unsigned)SymbolRole::Read;
-        Roles |= (unsigned)SymbolRole::Write;
-      }
-
     } else if (auto CE = dyn_cast<CallExpr>(Parent)) {
       if (CE->getCallee()->IgnoreParenCasts() == E) {
         addCallRole(Roles, Relations);
diff --git a/clang/unittests/Index/IndexTests.cpp b/clang/unittests/Index/IndexTests.cpp
index 4d19f47283c2853..8e9a1c6bf88245c 100644
--- a/clang/unittests/Index/IndexTests.cpp
+++ b/clang/unittests/Index/IndexTests.cpp
@@ -428,6 +428,31 @@ TEST(IndexTest, NonTypeTemplateParameter) {
                              WrittenAt(Position(3, 15)))));
 }
 
+TEST(IndexTest, ReadWriteRoles) {
+  std::string Code = R"cpp(
+    int main() {
+      int foo = 0;
+      foo = 2;
+      foo += 1;
+      int bar = foo;
+  }
+  )cpp";
+  auto Index = std::make_shared<Indexer>();
+  IndexingOptions Opts;
+  Opts.IndexFunctionLocals = true;
+  tooling::runToolOnCode(std::make_unique<IndexAction>(Index, Opts), Code);
+  EXPECT_THAT(
+      Index->Symbols,
+      AllOf(Contains(AllOf(QName("foo"), HasRole(SymbolRole::Write),
+                           WrittenAt(Position(4, 7)))),
+            Contains(AllOf(QName("foo"),
+                           HasRole(static_cast<unsigned>(SymbolRole::Read) |
+                                   static_cast<unsigned>(SymbolRole::Write)),
+                           WrittenAt(Position(5, 7)))),
+            Contains(AllOf(QName("foo"), HasRole(SymbolRole::Read),
+                           WrittenAt(Position(6, 17))))));
+}
+
 } // namespace
 } // namespace index
 } // namespace clang

>From f9632cee30b788df7a6241d7802224d8f633973a Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Thu, 19 Oct 2023 11:58:09 -0700
Subject: [PATCH 46/76] [lldb] Remove FileSpecList::GetFileSpecPointerAtIndex
 (NFC)

There's only one use and it eventually converts the pointer into a
reference. Simplify things and always use references.
---
 lldb/include/lldb/Target/Target.h                |  2 +-
 lldb/include/lldb/Utility/FileSpecList.h         | 13 -------------
 lldb/source/Commands/CommandObjectBreakpoint.cpp |  4 ++--
 lldb/source/Target/Target.cpp                    |  4 ++--
 lldb/source/Utility/FileSpecList.cpp             |  6 ------
 5 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index 8752b42a9518983..82045988018b606 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -683,7 +683,7 @@ class Target : public std::enable_shared_from_this<Target>,
   // Use this to create a breakpoint from a load address and a module file spec
   lldb::BreakpointSP CreateAddressInModuleBreakpoint(lldb::addr_t file_addr,
                                                      bool internal,
-                                                     const FileSpec *file_spec,
+                                                     const FileSpec &file_spec,
                                                      bool request_hardware);
 
   // Use this to create Address breakpoints:
diff --git a/lldb/include/lldb/Utility/FileSpecList.h b/lldb/include/lldb/Utility/FileSpecList.h
index 6b88cd2f3091379..14e8069f5ebd6d2 100644
--- a/lldb/include/lldb/Utility/FileSpecList.h
+++ b/lldb/include/lldb/Utility/FileSpecList.h
@@ -155,19 +155,6 @@ class FileSpecList {
   ///     returned.
   const FileSpec &GetFileSpecAtIndex(size_t idx) const;
 
-  /// Get file specification pointer at index.
-  ///
-  /// Gets a file from the file list. The file objects that are returned can
-  /// be tested using FileSpec::operator void*().
-  ///
-  /// \param[in] idx
-  ///     An index into the file list.
-  ///
-  /// \return
-  ///     A pointer to a contained FileSpec object at index \a idx.
-  ///     If \a idx is out of range, then an NULL is returned.
-  const FileSpec *GetFileSpecPointerAtIndex(size_t idx) const;
-
   /// Get the memory cost of this object.
   ///
   /// Return the size in bytes that this object takes in memory. This returns
diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp
index 327dae4fd2afbba..18cbb9528b717a5 100644
--- a/lldb/source/Commands/CommandObjectBreakpoint.cpp
+++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp
@@ -603,8 +603,8 @@ class CommandObjectBreakpointSet : public CommandObjectParsed {
       //  will track the load location of the library.
       size_t num_modules_specified = m_options.m_modules.GetSize();
       if (num_modules_specified == 1) {
-        const FileSpec *file_spec =
-            m_options.m_modules.GetFileSpecPointerAtIndex(0);
+        const FileSpec &file_spec =
+            m_options.m_modules.GetFileSpecAtIndex(0);
         bp_sp = target.CreateAddressInModuleBreakpoint(
             m_options.m_load_addr, internal, file_spec, m_options.m_hardware);
       } else if (num_modules_specified == 0) {
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index 069b7bcdc40e614..5f8756c57675c95 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -480,12 +480,12 @@ BreakpointSP Target::CreateBreakpoint(const Address &addr, bool internal,
 
 lldb::BreakpointSP
 Target::CreateAddressInModuleBreakpoint(lldb::addr_t file_addr, bool internal,
-                                        const FileSpec *file_spec,
+                                        const FileSpec &file_spec,
                                         bool request_hardware) {
   SearchFilterSP filter_sp(
       new SearchFilterForUnconstrainedSearches(shared_from_this()));
   BreakpointResolverSP resolver_sp(new BreakpointResolverAddress(
-      nullptr, file_addr, file_spec ? *file_spec : FileSpec()));
+      nullptr, file_addr, file_spec));
   return CreateBreakpoint(filter_sp, resolver_sp, internal, request_hardware,
                           false);
 }
diff --git a/lldb/source/Utility/FileSpecList.cpp b/lldb/source/Utility/FileSpecList.cpp
index e5e0ac3e5981b1b..35486fdc7eff16e 100644
--- a/lldb/source/Utility/FileSpecList.cpp
+++ b/lldb/source/Utility/FileSpecList.cpp
@@ -140,12 +140,6 @@ const FileSpec &FileSpecList::GetFileSpecAtIndex(size_t idx) const {
   return g_empty_file_spec;
 }
 
-const FileSpec *FileSpecList::GetFileSpecPointerAtIndex(size_t idx) const {
-  if (idx < m_files.size())
-    return &m_files[idx];
-  return nullptr;
-}
-
 // Return the size in bytes that this object takes in memory. This returns the
 // size in bytes of this object's member variables and any FileSpec objects its
 // member variables contain, the result doesn't not include the string values

>From b2f50b49a8c45305d27f0393ee6248e3e8851788 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Thu, 19 Oct 2023 13:35:34 -0700
Subject: [PATCH 47/76] [clang-format][NFC] Use UnwrappedLineParser::eof() for
 consistency

---
 clang/lib/Format/UnwrappedLineParser.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index bdedfad3b78ba72..7bb487d020ea6f7 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -213,7 +213,7 @@ void UnwrappedLineParser::parse() {
     }
 
     // Create line with eof token.
-    assert(FormatTok->is(tok::eof));
+    assert(eof());
     pushToken(FormatTok);
     addUnwrappedLine();
 

>From d681461098c40a32f9d88ca28860bdb956dfd80d Mon Sep 17 00:00:00 2001
From: Austin Kerbow <Austin.Kerbow at amd.com>
Date: Thu, 19 Oct 2023 13:43:35 -0700
Subject: [PATCH 48/76] [AMDGPU] Add doc updates for kernarg preloading
 (#67516)

---
 llvm/docs/AMDGPUUsage.rst | 66 ++++++++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 11 deletions(-)

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 8022816d7e616d3..9427df94e128e28 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -360,7 +360,7 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
      ``gfx90a``                  ``amdgcn``   dGPU  - sramecc         - Absolute      - *rocm-amdhsa* *TBA*
                                                     - tgsplit           flat
                                                     - xnack             scratch                       .. TODO::
-                                                                      - Packed
+                                                    - kernarg preload - Packed
                                                                         work-item                       Add product
                                                                         IDs                             names.
 
@@ -381,21 +381,21 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
      ``gfx940``                  ``amdgcn``   dGPU  - sramecc         - Architected                   *TBA*
                                                     - tgsplit           flat
                                                     - xnack             scratch                       .. TODO::
-                                                                      - Packed
+                                                    - kernarg preload - Packed
                                                                         work-item                       Add product
                                                                         IDs                             names.
 
      ``gfx941``                  ``amdgcn``   dGPU  - sramecc         - Architected                   *TBA*
                                                     - tgsplit           flat
                                                     - xnack             scratch                       .. TODO::
-                                                                      - Packed
+                                                    - kernarg preload - Packed
                                                                         work-item                       Add product
                                                                         IDs                             names.
 
      ``gfx942``                  ``amdgcn``   dGPU  - sramecc         - Architected                   *TBA*
                                                     - tgsplit           flat
                                                     - xnack             scratch                       .. TODO::
-                                                                      - Packed
+                                                    - kernarg preload - Packed
                                                                         work-item                       Add product
                                                                         IDs                             names.
 
@@ -4375,12 +4375,24 @@ The fields used by CP for code objects before V3 also match those specified in
                                                      dynamically sized stack.
                                                      This is only set in code
                                                      object v5 and later.
-     463:460 1 bit                                   Reserved, must be 0.
-     464     1 bit   RESERVED_464                    Deprecated, must be 0.
-     467:465 3 bits                                  Reserved, must be 0.
-     468     1 bit   RESERVED_468                    Deprecated, must be 0.
-     469:471 3 bits                                  Reserved, must be 0.
-     511:472 5 bytes                                 Reserved, must be 0.
+     463:460 4 bits                                  Reserved, must be 0.
+     470:464 7 bits  KERNARG_PRELOAD_SPEC_LENGTH     GFX6-GFX9
+                                                       - Reserved, must be 0.
+                                                     GFX90A, GFX940
+                                                       - The number of dwords from
+                                                         the kernarg segment to preload
+                                                         into User SGPRs before kernel
+                                                         execution. (see
+                                                         :ref:`amdgpu-amdhsa-kernarg-preload`).
+     479:471 9 bits  KERNARG_PRELOAD_SPEC_OFFSET     GFX6-GFX9
+                                                       - Reserved, must be 0.
+                                                     GFX90A, GFX940
+                                                       - An offset in dwords into the
+                                                         kernarg segment to begin
+                                                         preloading data into User
+                                                         SGPRs. (see
+                                                         :ref:`amdgpu-amdhsa-kernarg-preload`).
+     511:480 4 bytes                                 Reserved, must be 0.
      512     **Total size 64 bytes.**
      ======= ====================================================================
 
@@ -5002,7 +5014,7 @@ for enabled registers are dense starting at SGPR0: the first enabled register is
 SGPR0, the next enabled register is SGPR1 etc.; disabled registers do not have
 an SGPR number.
 
-The initial SGPRs comprise up to 16 User SRGPs that are set by CP and apply to
+The initial SGPRs comprise up to 16 User SGPRs that are set by CP and apply to
 all wavefronts of the grid. It is possible to specify more than 16 User SGPRs
 using the ``enable_sgpr_*`` bit fields, in which case only the first 16 are
 actually initialized. These are then immediately followed by the System SGPRs
@@ -5045,6 +5057,9 @@ SGPR register initial state is defined in
      then       Flat Scratch Init          2      See
                 (enable_sgpr_flat_scratch         :ref:`amdgpu-amdhsa-kernel-prolog-flat-scratch`.
                 _init)
+     then       Preloaded Kernargs         N/A    See
+                (kernarg_preload_spec             :ref:`amdgpu-amdhsa-kernarg-preload`.
+                _length)
      then       Private Segment Size       1      The 32-bit byte size of a
                 (enable_sgpr_private              single work-item's memory
                 _segment_size)                    allocation. This is the
@@ -5177,6 +5192,31 @@ following properties:
 * MTYPE set to support memory coherence that matches the runtime (such as CC for
   APU and NC for dGPU).
 
+.. _amdgpu-amdhsa-kernarg-preload:
+
+Preloaded Kernel Arguments
+++++++++++++++++++++++++++
+
+On hardware that supports this feature, kernel arguments can be preloaded into
+User SGPRs, up to the maximum number of User SGPRs available. The allocation of
+Preload SGPRs occurs directly after the last enabled non-kernarg preload User
+SGPR. (See :ref:`amdgpu-amdhsa-initial-kernel-execution-state`)
+
+The data preloaded is copied from the kernarg segment, the amount of data is
+determined by the value specified in the kernarg_preload_spec_length field of
+the kernel descriptor. This data is then loaded into consecutive User SGPRs. The
+number of SGPRs receiving preloaded kernarg data corresponds with the value
+given by kernarg_preload_spec_length. The preloading starts at the dword offset
+within the kernarg segment, which is specified by the
+kernarg_preload_spec_offset field.
+
+If the kernarg_preload_spec_length is non-zero, the CP firmware will append an
+additional 256 bytes to the kernel_code_entry_byte_offset. This addition
+facilitates the incorporation of a prologue to the kernel entry to handle cases
+where code designed for kernarg preloading is executed on hardware equipped with
+incompatible firmware. If hardware has compatible firmware the 256 bytes at the
+start of the kernel entry will be skipped.
+
 .. _amdgpu-amdhsa-kernel-prolog:
 
 Kernel Prolog
@@ -15352,6 +15392,10 @@ terminated by an ``.end_amdhsa_kernel`` directive.
                                                                                                :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx11-table`.
      ``.amdhsa_exception_int_div_zero``                       0                   GFX6-GFX11   Controls ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO in
                                                                                                :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx11-table`.
+     ``.amdhsa_user_sgpr_kernarg_preload_length``             0                   GFX90A,      Controls KERNARG_PRELOAD_SPEC_LENGTH in
+                                                                                  GFX940       :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+     ``.amdhsa_user_sgpr_kernarg_preload_offset``             0                   GFX90A,      Controls KERNARG_PRELOAD_SPEC_OFFSET in
+                                                                                  GFX940       :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
      ======================================================== =================== ============ ===================
 
 .amdgpu_metadata

>From 10951ca4fe25e15ad893552eaa34a00aeba156b4 Mon Sep 17 00:00:00 2001
From: Aart Bik <39774503+aartbik at users.noreply.github.com>
Date: Thu, 19 Oct 2023 13:46:22 -0700
Subject: [PATCH 49/76] [mlir][sparse] use uint64_t type for dim/rank
 consistently (#69626)

---
 .../ExecutionEngine/SparseTensor/Storage.h    | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
index f1aeb12c662fdc1..ad92ee1f89fc153 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -303,7 +303,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
       uint64_t lvlRank = getLvlRank();
       uint64_t valIdx = 0;
       // Linearize the address
-      for (size_t lvl = 0; lvl < lvlRank; lvl++)
+      for (uint64_t lvl = 0; lvl < lvlRank; lvl++)
         valIdx = valIdx * getLvlSize(lvl) + lvlCoords[lvl];
       values[valIdx] = val;
       return;
@@ -338,7 +338,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
     values[c] = 0;
     filled[c] = false;
     // Subsequent insertions are quick.
-    for (uint64_t i = 1; i < count; ++i) {
+    for (uint64_t i = 1; i < count; i++) {
       assert(c < added[i] && "non-lexicographic insertion");
       c = added[i];
       assert(c <= expsz);
@@ -394,27 +394,27 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
 
     // In-place permutation.
     auto applyPerm = [this](std::vector<uint64_t> &perm) {
-      size_t length = perm.size();
-      size_t lvlRank = getLvlRank();
+      uint64_t length = perm.size();
+      uint64_t lvlRank = getLvlRank();
       // Cache for the current level coordinates.
       std::vector<P> lvlCrds(lvlRank);
-      for (size_t i = 0; i < length; i++) {
-        size_t current = i;
+      for (uint64_t i = 0; i < length; i++) {
+        uint64_t current = i;
         if (i != perm[current]) {
-          for (size_t l = 0; l < lvlRank; l++)
+          for (uint64_t l = 0; l < lvlRank; l++)
             lvlCrds[l] = coordinates[l][i];
           V val = values[i];
           // Deals with a permutation cycle.
           while (i != perm[current]) {
-            size_t next = perm[current];
+            uint64_t next = perm[current];
             // Swaps the level coordinates and value.
-            for (size_t l = 0; l < lvlRank; l++)
+            for (uint64_t l = 0; l < lvlRank; l++)
               coordinates[l][current] = coordinates[l][next];
             values[current] = values[next];
             perm[current] = current;
             current = next;
           }
-          for (size_t l = 0; l < lvlRank; l++)
+          for (uint64_t l = 0; l < lvlRank; l++)
             coordinates[l][current] = lvlCrds[l];
           values[current] = val;
           perm[current] = current;
@@ -557,7 +557,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
     const uint64_t lastLvl = lvlRank - 1;
     assert(diffLvl <= lvlRank);
     const uint64_t stop = lvlRank - diffLvl;
-    for (uint64_t i = 0; i < stop; ++i) {
+    for (uint64_t i = 0; i < stop; i++) {
       const uint64_t l = lastLvl - i;
       finalizeSegment(l, lvlCursor[l] + 1);
     }
@@ -569,7 +569,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
                V val) {
     const uint64_t lvlRank = getLvlRank();
     assert(diffLvl <= lvlRank);
-    for (uint64_t l = diffLvl; l < lvlRank; ++l) {
+    for (uint64_t l = diffLvl; l < lvlRank; l++) {
       const uint64_t c = lvlCoords[l];
       appendCrd(l, full, c);
       full = 0;
@@ -582,7 +582,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   /// in the argument differ from those in the current cursor.
   uint64_t lexDiff(const uint64_t *lvlCoords) const {
     const uint64_t lvlRank = getLvlRank();
-    for (uint64_t l = 0; l < lvlRank; ++l) {
+    for (uint64_t l = 0; l < lvlRank; l++) {
       const auto crd = lvlCoords[l];
       const auto cur = lvlCursor[l];
       if (crd > cur || (crd == cur && !isUniqueLvl(l)) ||
@@ -705,7 +705,7 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
   // really use nnz and dense/sparse distribution.
   bool allDense = true;
   uint64_t sz = 1;
-  for (uint64_t l = 0; l < lvlRank; ++l) {
+  for (uint64_t l = 0; l < lvlRank; l++) {
     const DimLevelType dlt = lvlTypes[l]; // Avoid redundant bounds checking.
     if (isCompressedDLT(dlt)) {
       positions[l].reserve(sz + 1);

>From 3f8e5fd08f33c3e8bce464f3b866dda5210ca943 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks at google.com>
Date: Thu, 19 Oct 2023 13:50:13 -0700
Subject: [PATCH 50/76] [NFC] Format some code in GlobalVariable.h

---
 llvm/include/llvm/IR/GlobalVariable.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/IR/GlobalVariable.h b/llvm/include/llvm/IR/GlobalVariable.h
index f915dba5c6595df..5ddffd16acc68f2 100644
--- a/llvm/include/llvm/IR/GlobalVariable.h
+++ b/llvm/include/llvm/IR/GlobalVariable.h
@@ -40,11 +40,12 @@ class GlobalVariable : public GlobalObject, public ilist_node<GlobalVariable> {
   friend class SymbolTableListTraits<GlobalVariable>;
 
   AttributeSet Attrs;
-  bool isConstantGlobal : 1;                   // Is this a global constant?
-  bool isExternallyInitializedConstant : 1;    // Is this a global whose value
-                                               // can change from its initial
-                                               // value before global
-                                               // initializers are run?
+
+  // Is this a global constant?
+  bool isConstantGlobal : 1;
+  // Is this a global whose value can change from its initial value before
+  // global initializers are run?
+  bool isExternallyInitializedConstant : 1;
 
 public:
   /// GlobalVariable ctor - If a parent module is specified, the global is

>From d173ce4a670e88b65c52f6fc1bf10d133ee35704 Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard at google.com>
Date: Thu, 19 Oct 2023 13:58:30 -0700
Subject: [PATCH 51/76] [libc++][Android] Support libc++ testing on Android
 (#69274)

I could probably break this commit into more pieces.

---

This patch adds libc++ support for Android L (Android 5.0+) and up,
tested using the Android team's current compiler, a recent version of
the AOSP sysroot, and the x86[-64] Android Emulator.

CMake and Lit Configuration:

Add runtimes/cmake/android/Arch-${ARCH}.cmake files that configure CMake
to cross-compile to Android without using CMake's built-in NDK support
(which only works with an actual packaged NDK).

Add libcxx/cmake/caches/AndroidNDK.cmake that builds and tests libc++
(and libc++abi) for Android. This file configures libc++ to match what
the NDK distributes, e.g.:
- libc++_shared.so (includes libc++abi objects, there is no
libc++abi.so). libunwind is linked statically but not exported.
 - libc++_static.a (does not include libc++abi) and libc++abi.a
 - `std::__ndk1` namespace
- All the libraries are built with `__ANDROID_API__=21`, even when they
are linked to something targeting a higher API level.

(However, when the Android LLVM team builds these components, they do
not use these CMake cache files. Instead they use Python scripts to
configure the builds. See
https://android.googlesource.com/toolchain/llvm_android/.)

Add llvm-libc++[abi].android-ndk.cfg.in files that test the Android
NDK's libc++_shared.so. These files can target old or new Android
devices. The Android LLVM team uses these test files to test libc++ for
both arm/arm64 and x86/x86_64 architectures.

The Android testing mode works by setting %{executor} to adb_run.py,
which uses `adb push` and `adb shell` to run tests remotely. adb_run.py
always runs tests as the "shell" user even on an old emulator where "adb
unroot" doesn't work. The script has workarounds for old Android
devices. The script uses a Unix domain socket on the host
(--job-limit-socket) to restrict concurrent adb invocations. Compiling
the tests is a major part of libc++ testing run-time, so it's desirable
to exploit all the host cores without overburdening the test devices,
which can have far fewer cores.

BuildKite CI:

Add a builder to run-buildbot, `android-ndk-*`, that uses Android Clang
and an Android sysroot to build libc++, then starts an Android emulator
container to run tests.

Run the emulator and an adb server in a separate Docker container
(libcxx-ci-android-emulator), and create a separate Docker image for
each emulator OS system image. Set ADB_SERVER_SOCKET to connect to the
container's adb server. Running the only adb server inside the container
makes cleanup more reliable between test runs, e.g. the adb client
doesn't create a `~/.android` directory and the adb server can be
restarted along with the emulator using docker stop/run. (N.B. The
emulator insists on connecting to an adb server and will start one
itself if it can't connect to one.)

The suffix to the android-ndk-* job is a label that concisely specifies
an Android SDK emulator image. e.g.:
 - "system-images;android-21;default;x86" ==> 21-def-x86
 - "system-images;android-33;google_apis;x86_64" ==> 33-goog-x86_64

Fixes: https://github.com/llvm/llvm-project/issues/69270
Differential Revision: https://reviews.llvm.org/D139147
---
 libcxx/cmake/caches/AndroidNDK.cmake          |  38 +++
 libcxx/docs/index.rst                         |   1 +
 .../configs/llvm-libc++-android-ndk.cfg.in    |  47 ++++
 libcxx/utils/adb_run.py                       | 256 ++++++++++++++++++
 libcxx/utils/ci/BOT_OWNERS.txt                |   5 +
 libcxx/utils/ci/run-buildbot                  |  50 ++++
 .../ci/vendor/android/Dockerfile.emulator     |  59 ++++
 .../vendor/android/build-emulator-images.sh   |  28 ++
 .../ci/vendor/android/emulator-entrypoint.sh  |  49 ++++
 .../ci/vendor/android/emulator-functions.sh   | 110 ++++++++
 .../vendor/android/emulator-wait-for-ready.sh |  30 ++
 .../vendor/android/setup-env-for-emulator.sh  |  13 +
 .../utils/ci/vendor/android/start-emulator.sh |  43 +++
 .../utils/ci/vendor/android/stop-emulator.sh  |  25 ++
 libcxx/utils/libcxx/test/android.py           |  97 +++++++
 .../configs/llvm-libc++abi-android-ndk.cfg.in |  40 +++
 llvm/utils/lit/lit/TestingConfig.py           |   1 +
 runtimes/cmake/android/Arch-arm.cmake         |   7 +
 runtimes/cmake/android/Arch-arm64.cmake       |   7 +
 runtimes/cmake/android/Arch-x86.cmake         |   7 +
 runtimes/cmake/android/Arch-x86_64.cmake      |   7 +
 runtimes/cmake/android/Common.cmake           |   6 +
 22 files changed, 926 insertions(+)
 create mode 100644 libcxx/cmake/caches/AndroidNDK.cmake
 create mode 100644 libcxx/test/configs/llvm-libc++-android-ndk.cfg.in
 create mode 100755 libcxx/utils/adb_run.py
 create mode 100644 libcxx/utils/ci/vendor/android/Dockerfile.emulator
 create mode 100755 libcxx/utils/ci/vendor/android/build-emulator-images.sh
 create mode 100755 libcxx/utils/ci/vendor/android/emulator-entrypoint.sh
 create mode 100644 libcxx/utils/ci/vendor/android/emulator-functions.sh
 create mode 100755 libcxx/utils/ci/vendor/android/emulator-wait-for-ready.sh
 create mode 100644 libcxx/utils/ci/vendor/android/setup-env-for-emulator.sh
 create mode 100755 libcxx/utils/ci/vendor/android/start-emulator.sh
 create mode 100755 libcxx/utils/ci/vendor/android/stop-emulator.sh
 create mode 100644 libcxx/utils/libcxx/test/android.py
 create mode 100644 libcxxabi/test/configs/llvm-libc++abi-android-ndk.cfg.in
 create mode 100644 runtimes/cmake/android/Arch-arm.cmake
 create mode 100644 runtimes/cmake/android/Arch-arm64.cmake
 create mode 100644 runtimes/cmake/android/Arch-x86.cmake
 create mode 100644 runtimes/cmake/android/Arch-x86_64.cmake
 create mode 100644 runtimes/cmake/android/Common.cmake

diff --git a/libcxx/cmake/caches/AndroidNDK.cmake b/libcxx/cmake/caches/AndroidNDK.cmake
new file mode 100644
index 000000000000000..86c5219b8b42b5f
--- /dev/null
+++ b/libcxx/cmake/caches/AndroidNDK.cmake
@@ -0,0 +1,38 @@
+# Build libc++abi and libc++ closely resembling what is shipped in the Android
+# NDK.
+
+# The NDK names the libraries libc++_shared.so and libc++_static.a. Using the
+# libc++_shared.so soname ensures that the library doesn't interact with the
+# libc++.so in /system/lib[64].
+set(LIBCXX_SHARED_OUTPUT_NAME c++_shared CACHE STRING "")
+set(LIBCXX_STATIC_OUTPUT_NAME c++_static CACHE STRING "")
+
+# The NDK libc++ uses a special namespace to help isolate its symbols from those
+# in the platform's STL (e.g. /system/lib[64]/libc++.so, but possibly stlport on
+# older versions of Android).
+set(LIBCXX_ABI_VERSION 1 CACHE STRING "")
+set(LIBCXX_ABI_NAMESPACE __ndk1 CACHE STRING "")
+
+# CMake doesn't add a version suffix to an Android shared object filename,
+# (because CMAKE_PLATFORM_NO_VERSIONED_SONAME is set), so it writes both a
+# libc++_shared.so ELF file and a libc++_shared.so linker script to the same
+# output path (the script clobbers the binary). Turn off the linker script.
+set(LIBCXX_ENABLE_ABI_LINKER_SCRIPT OFF CACHE BOOL "")
+
+set(LIBCXX_STATICALLY_LINK_ABI_IN_SHARED_LIBRARY ON CACHE BOOL "")
+set(LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "")
+
+# Clang links libc++ by default, but it doesn't exist yet. The libc++ CMake
+# files specify -nostdlib++ to avoid this problem, but CMake's default "compiler
+# works" testing doesn't pass that flag, so force those tests to pass.
+set(CMAKE_C_COMPILER_WORKS ON CACHE BOOL "")
+set(CMAKE_CXX_COMPILER_WORKS ON CACHE BOOL "")
+
+# Use adb to push tests to a locally-connected device (e.g. emulator) and run
+# them.
+set(LIBCXX_TEST_CONFIG "llvm-libc++-android-ndk.cfg.in" CACHE STRING "")
+set(LIBCXXABI_TEST_CONFIG "llvm-libc++abi-android-ndk.cfg.in" CACHE STRING "")
+
+# CMAKE_SOURCE_DIR refers to the "<monorepo>/runtimes" directory.
+set(LIBCXX_EXECUTOR "${CMAKE_SOURCE_DIR}/../libcxx/utils/adb_run.py" CACHE STRING "")
+set(LIBCXXABI_EXECUTOR "${LIBCXX_EXECUTOR}" CACHE STRING "")
diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst
index 9c2a83bde3c0f4f..72c80d7dc954a33 100644
--- a/libcxx/docs/index.rst
+++ b/libcxx/docs/index.rst
@@ -130,6 +130,7 @@ Target platform Target architecture       Notes
 macOS 10.9+     i386, x86_64, arm64       Building the shared library itself requires targetting macOS 10.13+
 FreeBSD 12+     i386, x86_64, arm
 Linux           i386, x86_64, arm, arm64  Only glibc-2.24 and later and no other libc is officially supported
+Android 5.0+    i386, x86_64, arm, arm64
 Windows         i386, x86_64              Both MSVC and MinGW style environments, ABI in MSVC environments is :doc:`unstable <DesignDocs/ABIVersioning>`
 AIX 7.2TL5+     powerpc, powerpc64
 =============== ========================= ============================
diff --git a/libcxx/test/configs/llvm-libc++-android-ndk.cfg.in b/libcxx/test/configs/llvm-libc++-android-ndk.cfg.in
new file mode 100644
index 000000000000000..1be85279a120aaf
--- /dev/null
+++ b/libcxx/test/configs/llvm-libc++-android-ndk.cfg.in
@@ -0,0 +1,47 @@
+# This testing configuration handles running the test suite against LLVM's
+# libc++ using adb and a libc++_shared.so library on Android.
+
+lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg')
+
+import re
+import site
+
+site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils'))
+
+import libcxx.test.android
+import libcxx.test.config
+import libcxx.test.params
+
+config.substitutions.append(('%{flags}',
+    '--sysroot @CMAKE_SYSROOT@' if '@CMAKE_SYSROOT@' else ''
+))
+
+compile_flags = '-nostdinc++ -I %{include} -I %{target-include} -I %{libcxx}/test/support'
+if re.match(r'i686-linux-android(21|22|23)$', config.target_triple):
+    # 32-bit x86 Android has a bug where the stack is sometimes misaligned.
+    # The problem appears limited to versions before Android N (API 24) and only
+    # __attribute__((constructor)) functions. Compile with -mstackrealign to
+    # work around the bug.
+    # TODO: Consider automatically doing something like this in Clang itself (LIBCXX-ANDROID-FIXME)
+    # See https://github.com/android/ndk/issues/693.
+    compile_flags += ' -mstackrealign'
+config.substitutions.append(('%{compile_flags}', compile_flags))
+
+# The NDK library is called "libc++_shared.so". Use LD_LIBRARY_PATH to find
+# libc++_shared.so because older Bionic dynamic loaders don't support rpath
+# lookup.
+config.substitutions.append(('%{link_flags}',
+    '-nostdlib++ -L %{lib} -lc++_shared'
+))
+config.substitutions.append(('%{exec}',
+    '%{executor}' +
+    ' --job-limit-socket ' + libcxx.test.android.adb_job_limit_socket() +
+    ' --prepend-path-env LD_LIBRARY_PATH /data/local/tmp/libc++ --execdir %T -- '
+))
+
+libcxx.test.config.configure(
+    libcxx.test.params.DEFAULT_PARAMETERS,
+    libcxx.test.features.DEFAULT_FEATURES,
+    config,
+    lit_config
+)
diff --git a/libcxx/utils/adb_run.py b/libcxx/utils/adb_run.py
new file mode 100755
index 000000000000000..b54198fed44a43f
--- /dev/null
+++ b/libcxx/utils/adb_run.py
@@ -0,0 +1,256 @@
+#!/usr/bin/env python3
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+"""adb_run.py is a utility for running a libc++ test program via adb.
+"""
+
+import argparse
+import hashlib
+import os
+import re
+import shlex
+import socket
+import subprocess
+import sys
+from typing import List, Optional, Tuple
+
+
+# Sync a host file /path/to/dir/file to ${REMOTE_BASE_DIR}/run-${HASH}/dir/file.
+REMOTE_BASE_DIR = "/data/local/tmp/adb_run"
+
+g_job_limit_socket = None
+g_verbose = False
+
+
+def run_adb_sync_command(command: List[str]) -> None:
+    """Run an adb command and discard the output, unless the command fails. If
+    the command fails, dump the output instead, and exit the script with
+    failure.
+    """
+    if g_verbose:
+        sys.stderr.write(f"running: {shlex.join(command)}\n")
+    proc = subprocess.run(command, universal_newlines=True,
+                          stdin=subprocess.DEVNULL, stdout=subprocess.PIPE,
+                          stderr=subprocess.STDOUT, encoding="utf-8")
+    if proc.returncode != 0:
+        # adb's stdout (e.g. for adb push) should normally be discarded, but
+        # on failure, it should be shown. Print it to stderr because it's
+        # unrelated to the test program's stdout output. A common error caught
+        # here is "No space left on device".
+        sys.stderr.write(f"{proc.stdout}\n"
+                         f"error: adb command exited with {proc.returncode}: "
+                         f"{shlex.join(command)}\n")
+        sys.exit(proc.returncode)
+
+
+def sync_test_dir(local_dir: str, remote_dir: str) -> None:
+    """Sync the libc++ test directory on the host to the remote device."""
+
+    # Optimization: The typical libc++ test directory has only a single
+    # *.tmp.exe file in it. In that case, skip the `mkdir` command, which is
+    # normally necessary because we don't know if the target directory already
+    # exists on the device.
+    local_files = os.listdir(local_dir)
+    if len(local_files) == 1:
+        local_file = os.path.join(local_dir, local_files[0])
+        remote_file = os.path.join(remote_dir, local_files[0])
+        if not os.path.islink(local_file) and os.path.isfile(local_file):
+            run_adb_sync_command(["adb", "push", "--sync", local_file,
+                                  remote_file])
+            return
+
+    assert os.path.basename(local_dir) == os.path.basename(remote_dir)
+    run_adb_sync_command(["adb", "shell", "mkdir", "-p", remote_dir])
+    run_adb_sync_command(["adb", "push", "--sync", local_dir,
+                          os.path.dirname(remote_dir)])
+
+
+def build_env_arg(env_args: List[str], prepend_path_args: List[Tuple[str, str]]) -> str:
+    components = []
+    for arg in env_args:
+        k, v = arg.split("=", 1)
+        components.append(f"export {k}={shlex.quote(v)}; ")
+    for k, v in prepend_path_args:
+        components.append(f"export {k}={shlex.quote(v)}${{{k}:+:${k}}}; ")
+    return "".join(components)
+
+
+def run_command(args: argparse.Namespace) -> int:
+    local_dir = args.execdir
+    assert local_dir.startswith("/")
+    assert not local_dir.endswith("/")
+
+    # Copy each execdir to a subdir of REMOTE_BASE_DIR. Name the directory using
+    # a hash of local_dir so that concurrent adb_run invocations don't create
+    # the same intermediate parent directory. At least `adb push` has trouble
+    # with concurrent mkdir syscalls on common parent directories. (Somehow
+    # mkdir fails with EAGAIN/EWOULDBLOCK, see internal Google bug,
+    # b/289311228.)
+    local_dir_hash = hashlib.sha1(local_dir.encode()).hexdigest()
+    remote_dir = f"{REMOTE_BASE_DIR}/run-{local_dir_hash}/{os.path.basename(local_dir)}"
+    sync_test_dir(local_dir, remote_dir)
+
+    adb_shell_command = (
+        # Set the environment early so that PATH can be overridden. Overriding
+        # PATH is useful for:
+        #  - Replacing older shell utilities with toybox (e.g. on old devices).
+        #  - Adding a `bash` command that delegates to `sh` (mksh).
+        f"{build_env_arg(args.env, args.prepend_path_env)}"
+
+        # Set a high oom_score_adj so that, if the test program uses too much
+        # memory, it is killed before anything else on the device. The default
+        # oom_score_adj is -1000, so a test using too much memory typically
+        # crashes the device.
+        "echo 1000 >/proc/self/oom_score_adj; "
+
+        # If we're running as root, switch to the shell user. The libc++
+        # filesystem tests require running without root permissions. Some x86
+        # emulator devices (before Android N) do not have a working `adb unroot`
+        # and always run as root. Non-debug builds typically lack `su` and only
+        # run as the shell user.
+        #
+        # Some libc++ tests create temporary files in the working directory,
+        # which might be owned by root. Before switching to shell, make the
+        # cwd writable (and readable+executable) to every user.
+        #
+        # N.B.:
+        #  - Avoid "id -u" because it wasn't supported until Android M.
+        #  - The `env` and `which` commands were also added in Android M.
+        #  - Starting in Android M, su from root->shell resets PATH, so we need
+        #    to modify it again in the new environment.
+        #  - Avoid chmod's "a+rwx" syntax because it's not supported until
+        #    Android N.
+        #  - Defining this function allows specifying the arguments to the test
+        #    program (i.e. "$@") only once.
+        "run_without_root() {"
+        "  chmod 777 .;"
+        "  case \"$(id)\" in"
+        "    *\"uid=0(root)\"*)"
+        "    if command -v env >/dev/null; then"
+        "      su shell \"$(command -v env)\" PATH=\"$PATH\" \"$@\";"
+        "    else"
+        "      su shell \"$@\";"
+        "    fi;;"
+        "    *) \"$@\";;"
+        "  esac;"
+        "}; "
+    )
+
+    # Older versions of Bionic limit the length of argv[0] to 127 bytes
+    # (SOINFO_NAME_LEN-1), and the path to libc++ tests tend to exceed this
+    # limit. Changing the working directory works around this limit. The limit
+    # is increased to 4095 (PATH_MAX-1) in Android M (API 23).
+    command_line = [arg.replace(local_dir + "/", "./") for arg in args.command]
+
+    # Prior to the adb feature "shell_v2" (added in Android N), `adb shell`
+    # always created a pty:
+    #  - This merged stdout and stderr together.
+    #  - The pty converts LF to CRLF.
+    #  - The exit code of the shell command wasn't propagated.
+    # Work around all three limitations, unless "shell_v2" is present.
+    proc = subprocess.run(["adb", "features"], check=True,
+                          stdin=subprocess.DEVNULL, stdout=subprocess.PIPE,
+                          encoding="utf-8")
+    adb_features = set(proc.stdout.strip().split())
+    has_shell_v2 = "shell_v2" in adb_features
+    if has_shell_v2:
+        adb_shell_command += (
+            f"cd {remote_dir} && run_without_root {shlex.join(command_line)}"
+        )
+    else:
+        adb_shell_command += (
+            f"{{"
+            f"  stdout=$("
+            f"    cd {remote_dir} && run_without_root {shlex.join(command_line)};"
+            f"    echo -n __libcxx_adb_exit__=$?"
+            f"  ); "
+            f"}} 2>&1; "
+            f"echo -n __libcxx_adb_stdout__\"$stdout\""
+        )
+
+    adb_command_line = ["adb", "shell", adb_shell_command]
+    if g_verbose:
+        sys.stderr.write(f"running: {shlex.join(adb_command_line)}\n")
+
+    if has_shell_v2:
+        proc = subprocess.run(adb_command_line, shell=False, check=False,
+                              encoding="utf-8")
+        return proc.returncode
+    else:
+        proc = subprocess.run(adb_command_line, shell=False, check=False,
+                              stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                              encoding="utf-8")
+        # The old `adb shell` mode used a pty, which converted LF to CRLF.
+        # Convert it back.
+        output = proc.stdout.replace("\r\n", "\n")
+
+        if proc.returncode:
+            sys.stderr.write(f"error: adb failed:\n"
+                             f"  command: {shlex.join(adb_command_line)}\n"
+                             f"  output: {output}\n")
+            return proc.returncode
+
+        match = re.match(r"(.*)__libcxx_adb_stdout__(.*)__libcxx_adb_exit__=(\d+)$",
+                     output, re.DOTALL)
+        if not match:
+            sys.stderr.write(f"error: could not parse adb output:\n"
+                             f"  command: {shlex.join(adb_command_line)}\n"
+                             f"  output: {output}\n")
+            return 1
+
+        sys.stderr.write(match.group(1))
+        sys.stdout.write(match.group(2))
+        return int(match.group(3))
+
+
+def connect_to_job_limiter_server(sock_addr: str) -> None:
+    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+
+    try:
+        sock.connect(sock_addr)
+    except (FileNotFoundError, ConnectionRefusedError) as e:
+        # Copying-and-pasting an adb_run.py command-line from a lit test failure
+        # is likely to fail because the socket no longer exists (or is
+        # inactive), so just give a warning.
+        sys.stderr.write(f"warning: could not connect to {sock_addr}: {e}\n")
+        return
+
+    # The connect call can succeed before the server has called accept, because
+    # of the listen backlog, so wait for the server to send a byte.
+    sock.recv(1)
+
+    # Keep the socket open until this process ends, then let the OS close the
+    # connection automatically.
+    global g_job_limit_socket
+    g_job_limit_socket = sock
+
+
+def main() -> int:
+    """Main function (pylint wants this docstring)."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--execdir", type=str, required=True)
+    parser.add_argument("--env", type=str, required=False, action="append",
+                        default=[], metavar="NAME=VALUE")
+    parser.add_argument("--prepend-path-env", type=str, nargs=2, required=False,
+                        action="append", default=[],
+                        metavar=("NAME", "PATH"))
+    parser.add_argument("--job-limit-socket")
+    parser.add_argument("--verbose", "-v", default=False, action="store_true")
+    parser.add_argument("command", nargs=argparse.ONE_OR_MORE)
+    args = parser.parse_args()
+
+    global g_verbose
+    g_verbose = args.verbose
+    if args.job_limit_socket is not None:
+        connect_to_job_limiter_server(args.job_limit_socket)
+    return run_command(args)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/libcxx/utils/ci/BOT_OWNERS.txt b/libcxx/utils/ci/BOT_OWNERS.txt
index f6eb91cdac4bb07..721b19e52d8bcb1 100644
--- a/libcxx/utils/ci/BOT_OWNERS.txt
+++ b/libcxx/utils/ci/BOT_OWNERS.txt
@@ -15,3 +15,8 @@ D: Armv7, Armv8, AArch64
 N: LLVM on Power
 E: powerllvm at ca.ibm.com
 D: AIX, ppc64le
+
+N: Android libc++
+E: rprichard at google.com
+H: rprichard
+D: Emulator-based x86[-64] libc++ CI testing
diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot
index b5c48568c995e3c..69ad58ed079ea01 100755
--- a/libcxx/utils/ci/run-buildbot
+++ b/libcxx/utils/ci/run-buildbot
@@ -156,6 +156,13 @@ function generate-cmake-libcxx-win() {
           "${@}"
 }
 
+function generate-cmake-android() {
+    generate-cmake-base \
+          -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi" \
+          -DLIBCXX_CXX_ABI=libcxxabi \
+          "${@}"
+}
+
 function check-runtimes() {
     echo "+++ Running the libc++ tests"
     ${NINJA} -vC "${BUILD_DIR}" check-cxx
@@ -706,6 +713,49 @@ aix)
     check-abi-list
     check-runtimes
 ;;
+android-ndk-*)
+    clean
+
+    ANDROID_EMU_IMG="${BUILDER#android-ndk-}"
+    . "${MONOREPO_ROOT}/libcxx/utils/ci/vendor/android/emulator-functions.sh"
+    if ! validate_emu_img "${ANDROID_EMU_IMG}"; then
+        echo "error: android-ndk suffix must be a valid emulator image (${ANDROID_EMU_IMG})" >&2
+        exit 1
+    fi
+    ARCH=$(arch_of_emu_img ${ANDROID_EMU_IMG})
+
+    # Use the Android compiler by default.
+    export CC=${CC:-/opt/android/clang/clang-current/bin/clang}
+    export CXX=${CXX:-/opt/android/clang/clang-current/bin/clang++}
+
+    # The NDK libc++_shared.so is always built against the oldest supported API
+    # level. When tests are run against a device with a newer API level, test
+    # programs can be built for any supported API level, but building for the
+    # newest API (i.e. the system image's API) is probably the most interesting.
+    PARAMS="target_triple=$(triple_of_arch ${ARCH})$(api_of_emu_img ${ANDROID_EMU_IMG})"
+    generate-cmake-android -C "${MONOREPO_ROOT}/runtimes/cmake/android/Arch-${ARCH}.cmake" \
+                           -C "${MONOREPO_ROOT}/libcxx/cmake/caches/AndroidNDK.cmake" \
+                           -DCMAKE_SYSROOT=/opt/android/ndk/sysroot \
+                           -DLIBCXX_TEST_PARAMS="${PARAMS}" \
+                           -DLIBCXXABI_TEST_PARAMS="${PARAMS}"
+    check-abi-list
+    ${NINJA} -vC "${BUILD_DIR}" install-cxx install-cxxabi
+
+    # Start the emulator and make sure we can connect to the adb server running
+    # inside of it.
+    "${MONOREPO_ROOT}/libcxx/utils/ci/vendor/android/start-emulator.sh" ${ANDROID_EMU_IMG}
+    trap "${MONOREPO_ROOT}/libcxx/utils/ci/vendor/android/stop-emulator.sh" EXIT
+    . "${MONOREPO_ROOT}/libcxx/utils/ci/vendor/android/setup-env-for-emulator.sh"
+
+    # Create adb_run early to avoid concurrent `mkdir -p` of common parent
+    # directories.
+    adb shell mkdir -p /data/local/tmp/adb_run
+    adb push "${BUILD_DIR}/lib/libc++_shared.so" /data/local/tmp/libc++/libc++_shared.so
+    echo "+++ Running the libc++ tests"
+    ${NINJA} -vC "${BUILD_DIR}" check-cxx
+    echo "+++ Running the libc++abi tests"
+    ${NINJA} -vC "${BUILD_DIR}" check-cxxabi
+;;
 #################################################################
 # Insert vendor-specific internal configurations below.
 #
diff --git a/libcxx/utils/ci/vendor/android/Dockerfile.emulator b/libcxx/utils/ci/vendor/android/Dockerfile.emulator
new file mode 100644
index 000000000000000..54953f40014e745
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/Dockerfile.emulator
@@ -0,0 +1,59 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+FROM ubuntu:jammy
+
+RUN apt-get update && apt-get install -y \
+    curl \
+    netcat-openbsd \
+    openjdk-11-jdk \
+    sudo \
+    unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+ENV ANDROID_HOME /opt/android/sdk
+
+RUN curl -sL https://dl.google.com/android/repository/commandlinetools-linux-9477386_latest.zip -o cmdline-tools.zip && \
+    mkdir -p ${ANDROID_HOME} && \
+    unzip cmdline-tools.zip -d ${ANDROID_HOME}/cmdline-tools && \
+    mv ${ANDROID_HOME}/cmdline-tools/cmdline-tools ${ANDROID_HOME}/cmdline-tools/latest && \
+    rm cmdline-tools.zip
+ENV PATH="${ANDROID_HOME}/cmdline-tools/latest/bin:${PATH}"
+
+RUN yes | sdkmanager --licenses
+RUN sdkmanager --install emulator
+ENV PATH="${ANDROID_HOME}/emulator:${PATH}"
+
+ARG API  # e.g. 21
+RUN sdkmanager --install "platforms;android-${API}"
+
+ARG TYPE  # one of: default, google_apis, or google_apis_playstore
+ARG ABI   # e.g. armeabi-v7a, x86
+ENV EMU_PACKAGE_NAME="system-images;android-${API};${TYPE};${ABI}"
+RUN sdkmanager --install "${EMU_PACKAGE_NAME}"
+
+COPY ./emulator-entrypoint.sh /opt/emulator/bin/emulator-entrypoint.sh
+COPY ./emulator-wait-for-ready.sh /opt/emulator/bin/emulator-wait-for-ready.sh
+ENV PATH="/opt/emulator/bin:${PATH}"
+ENV PATH="${ANDROID_HOME}/platform-tools:${PATH}"
+
+# Setup password-less sudo so that /dev/kvm permissions can be changed. Run the
+# emulator in an unprivileged user for reliability (and it might require it?)
+RUN echo "ALL ALL = (ALL) NOPASSWD: ALL" >> /etc/sudoers
+RUN useradd --create-home emulator
+USER emulator
+WORKDIR /home/emulator
+
+# Size of emulator /data partition in megabytes.
+ENV EMU_PARTITION_SIZE=8192
+
+EXPOSE 5037
+
+HEALTHCHECK CMD emulator-wait-for-ready.sh 5
+
+ENTRYPOINT ["emulator-entrypoint.sh"]
diff --git a/libcxx/utils/ci/vendor/android/build-emulator-images.sh b/libcxx/utils/ci/vendor/android/build-emulator-images.sh
new file mode 100755
index 000000000000000..f467ffc6231f5e9
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/build-emulator-images.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+set -e
+
+THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
+. "${THIS_DIR}/emulator-functions.sh"
+
+build_image() {
+    local EMU_IMG="$1"
+    validate_emu_img_syntax "${EMU_IMG}"
+    docker build -t $(docker_image_of_emu_img ${EMU_IMG}) \
+        -f Dockerfile.emulator . \
+        --build-arg API=$(api_of_emu_img ${EMU_IMG}) \
+        --build-arg TYPE=$(type_of_emu_img ${EMU_IMG}) \
+        --build-arg ABI=$(abi_of_arch $(arch_of_emu_img ${EMU_IMG}))
+}
+
+cd "${THIS_DIR}"
+
+build_image 21-def-x86
+build_image 33-goog-x86_64
diff --git a/libcxx/utils/ci/vendor/android/emulator-entrypoint.sh b/libcxx/utils/ci/vendor/android/emulator-entrypoint.sh
new file mode 100755
index 000000000000000..e4538697266a441
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/emulator-entrypoint.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+# This script is the entrypoint of an Android Emulator Docker container.
+
+set -e
+
+# The container's /dev/kvm has the same UID+GID as the host device. Changing the
+# ownership inside the container doesn't affect the UID+GID on the host.
+sudo chown emulator:emulator /dev/kvm
+
+# Always use a copy of platform-tools provided by the host to ensure that the
+# versions of adb match between the host and the emulator.
+if [ ! -x /mnt/android-platform-tools/platform-tools/adb ]; then
+    echo "error: This image requires platform-tools mounted at" \
+         "/mnt/android-platform-tools containing platform-tools/adb" >&2
+    exit 1
+fi
+sudo cp -r /mnt/android-platform-tools/platform-tools /opt/android/sdk
+
+# Start an adb host server. `adb start-server` blocks until the port is ready.
+# Use ADB_REJECT_KILL_SERVER=1 to ensure that an adb protocol version mismatch
+# doesn't kill the adb server.
+ADB_REJECT_KILL_SERVER=1 adb -a start-server
+
+# This syntax (using an IP address of 127.0.0.1 rather than localhost) seems to
+# prevent the adb client from ever spawning an adb host server.
+export ADB_SERVER_SOCKET=tcp:127.0.0.1:5037
+
+# The AVD could already exist if the Docker container were stopped and then
+# restarted.
+if [ ! -d ~/.android/avd/emulator.avd ]; then
+    # N.B. AVD creation takes a few seconds and creates a mostly-empty
+    # multi-gigabyte userdata disk image. (It's not useful to create the AVDs in
+    # advance.)
+    avdmanager --verbose create avd --name emulator \
+        --package "${EMU_PACKAGE_NAME}" --device pixel_5
+fi
+
+# Use exec so that the emulator is PID 1, so that `docker stop` kills the
+# emulator.
+exec emulator @emulator -no-audio -no-window \
+    -partition-size "${EMU_PARTITION_SIZE}"
diff --git a/libcxx/utils/ci/vendor/android/emulator-functions.sh b/libcxx/utils/ci/vendor/android/emulator-functions.sh
new file mode 100644
index 000000000000000..27eea2af157cc81
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/emulator-functions.sh
@@ -0,0 +1,110 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+# Bash functions for managing the names of emulator system images.
+
+# Parse the image name and set variables: API, TYPE, and ARCH.
+__parse_emu_img() {
+    if [[ "${1}" =~ ([0-9]+)-(def|goog|play)-(arm|arm64|x86|x86_64)$ ]]; then
+        API=${BASH_REMATCH[1]}
+        case ${BASH_REMATCH[2]} in
+            def) TYPE=default ;;
+            goog) TYPE=google_apis ;;
+            play) TYPE=google_apis_playstore ;;
+        esac
+        ARCH=${BASH_REMATCH[3]}
+        return 0
+    else
+        return 1
+    fi
+}
+
+# Check that the emulator image name has valid syntax.
+validate_emu_img_syntax() {
+    local EMU_IMG="${1}"
+    local API TYPE ARCH
+    if ! __parse_emu_img "${EMU_IMG}"; then
+        echo "\
+error: invalid emulator image name: ${EMU_IMG}
+  expected \"\${API}-\${TYPE}-\${ARCH}\" where API is a number, TYPE is one of
+  (def|goog|play), and ARCH is one of arm, arm64, x86, or x86_64." >&2
+        return 1
+    fi
+}
+
+docker_image_of_emu_img() {
+    echo "android-emulator-${1}"
+}
+
+# Check that the emulator image name has valid syntax and that the Docker image
+# is present. On failure, writes an error to stderr and exits the script.
+validate_emu_img() {
+    local EMU_IMG="${1}"
+    if ! validate_emu_img_syntax "${EMU_IMG}"; then
+        return 1
+    fi
+    # Make sure Docker is working before trusting other Docker commands.
+    # Temporarily suppress command echoing so we only show 'docker info' output
+    # on failure, and only once.
+    if (set +x; !(docker info &>/dev/null || docker info)); then
+        echo "error: Docker is required for emulator usage but 'docker info' failed" >&2
+        return 1
+    fi
+    local DOCKER_IMAGE=$(docker_image_of_emu_img ${EMU_IMG})
+    if ! docker image inspect ${DOCKER_IMAGE} &>/dev/null; then
+        echo "error: emulator Docker image (${DOCKER_IMAGE}) is not installed" >&2
+        return 1
+    fi
+}
+
+api_of_emu_img() {
+    local API TYPE ARCH
+    __parse_emu_img "${1}"
+    echo ${API}
+}
+
+type_of_emu_img() {
+    local API TYPE ARCH
+    __parse_emu_img "${1}"
+    echo ${TYPE}
+}
+
+arch_of_emu_img() {
+    local API TYPE ARCH
+    __parse_emu_img "${1}"
+    echo ${ARCH}
+}
+
+# Expand the short emu_img string into the full SDK package string identifying
+# the system image.
+sdk_package_of_emu_img() {
+    local API TYPE ARCH
+    __parse_emu_img "${1}"
+    echo "system-images;android-${API};${TYPE};$(abi_of_arch ${ARCH})"
+}
+
+# Return the Android ABI string for an architecture.
+abi_of_arch() {
+    case "${1}" in
+        arm) echo armeabi-v7a ;;
+        arm64) echo aarch64-v8a ;;
+        x86) echo x86 ;;
+        x86_64) echo x86_64 ;;
+        *) echo "error: unhandled arch ${1}" >&2; exit 1 ;;
+    esac
+}
+
+triple_of_arch() {
+    case "${1}" in
+        arm) echo armv7a-linux-androideabi ;;
+        arm64) echo aarch64-linux-android ;;
+        x86) echo i686-linux-android ;;
+        x86_64) echo x86_64-linux-android ;;
+        *) echo "error: unhandled arch ${1}" >&2; exit 1 ;;
+    esac
+}
diff --git a/libcxx/utils/ci/vendor/android/emulator-wait-for-ready.sh b/libcxx/utils/ci/vendor/android/emulator-wait-for-ready.sh
new file mode 100755
index 000000000000000..0c35794792891cf
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/emulator-wait-for-ready.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+set -ex
+
+# Time to wait in seconds. The emulator ought to start in 5-15 seconds or so,
+# so add a safety factor in case something takes longer in CI.
+TIMEOUT=${1-300}
+
+# This syntax (using an IP address of 127.0.0.1 rather than localhost) seems to
+# prevent the adb client from ever spawning an adb host server.
+export ADB_SERVER_SOCKET=tcp:127.0.0.1:5037
+
+# Invoke nc first to ensure that something is listening to port 5037. Otherwise,
+# invoking adb might fork an adb server.
+#
+# TODO: Consider waiting for `adb shell getprop dev.bootcomplete 2>/dev/null
+# | grep 1 >/dev/null` as well. It adds ~4 seconds to 21-def-x86 and ~15 seconds
+# to 33-goog-x86_64 and doesn't seem to be necessary for running libc++ tests.
+timeout ${TIMEOUT} bash -c '
+until (nc -z localhost 5037 && adb wait-for-device); do
+    sleep 0.5
+done
+'
diff --git a/libcxx/utils/ci/vendor/android/setup-env-for-emulator.sh b/libcxx/utils/ci/vendor/android/setup-env-for-emulator.sh
new file mode 100644
index 000000000000000..7de6cde7a7ad410
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/setup-env-for-emulator.sh
@@ -0,0 +1,13 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+export ADB_SERVER_SOCKET="tcp:$(docker inspect \
+    -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' \
+    libcxx-ci-android-emulator):5037"
+
+echo "setup-env-for-emulator.sh: setting ADB_SERVER_SOCKET to ${ADB_SERVER_SOCKET}"
diff --git a/libcxx/utils/ci/vendor/android/start-emulator.sh b/libcxx/utils/ci/vendor/android/start-emulator.sh
new file mode 100755
index 000000000000000..2d6e272675ea003
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/start-emulator.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+# Starts a new Docker container using a Docker image containing the Android
+# Emulator and an OS image. Stops and removes the old container if it exists
+# already.
+
+set -e
+
+THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
+. "${THIS_DIR}/emulator-functions.sh"
+
+EMU_IMG="${1}"
+if ! validate_emu_img "${EMU_IMG}"; then
+    echo "error: The first argument must be a valid emulator image." >&2
+    exit 1
+fi
+
+"${THIS_DIR}/stop-emulator.sh"
+
+# Start the container.
+docker run --name libcxx-ci-android-emulator --detach --device /dev/kvm \
+    -eEMU_PARTITION_SIZE=8192 \
+    --volume android-platform-tools:/mnt/android-platform-tools \
+    $(docker_image_of_emu_img ${EMU_IMG})
+ERR=0
+docker exec libcxx-ci-android-emulator emulator-wait-for-ready.sh || ERR=${?}
+echo "Emulator container initial logs:"
+docker logs libcxx-ci-android-emulator
+if [ ${ERR} != 0 ]; then
+    exit ${ERR}
+fi
+
+# Make sure the device is accessible from outside the emulator container and
+# advertise to the user that this script exists.
+. "${THIS_DIR}/setup-env-for-emulator.sh"
+adb wait-for-device
diff --git a/libcxx/utils/ci/vendor/android/stop-emulator.sh b/libcxx/utils/ci/vendor/android/stop-emulator.sh
new file mode 100755
index 000000000000000..b5797ccb344f4ba
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/stop-emulator.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+set -e
+
+THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
+. "${THIS_DIR}/emulator-functions.sh"
+
+# Cleanup the emulator if it's already running.
+if docker container inspect libcxx-ci-android-emulator &>/dev/null; then
+    echo "Stopping existing emulator container..."
+    docker stop libcxx-ci-android-emulator
+
+    echo "Emulator container final logs:"
+    docker logs libcxx-ci-android-emulator
+
+    echo "Removing existing emulator container..."
+    docker rm libcxx-ci-android-emulator
+fi
diff --git a/libcxx/utils/libcxx/test/android.py b/libcxx/utils/libcxx/test/android.py
new file mode 100644
index 000000000000000..29c681581b90d62
--- /dev/null
+++ b/libcxx/utils/libcxx/test/android.py
@@ -0,0 +1,97 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+import atexit
+import os
+import re
+import select
+import socket
+import subprocess
+import tempfile
+import threading
+from typing import List
+
+
+def _get_cpu_count() -> int:
+    # Determine the number of cores by listing a /sys directory. Older devices
+    # lack `nproc`. Even if a static toybox binary is pushed to the device, it may
+    # return an incorrect value. (e.g. On a Nexus 7 running Android 5.0, toybox
+    # nproc returns 1 even though the device has 4 CPUs.)
+    job = subprocess.run(["adb", "shell", "ls /sys/devices/system/cpu"],
+                         encoding="utf8", check=False,
+                         stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if job.returncode == 1:
+        # Maybe adb is missing, maybe ANDROID_SERIAL needs to be defined, maybe the
+        # /sys subdir isn't there. Most errors will be handled later, just use one
+        # job. (N.B. The adb command still succeeds even if ls fails on older
+        # devices that lack the shell_v2 adb feature.)
+        return 1
+    # Make sure there are no CR characters in the output. Pre-shell_v2, the adb
+    # stdout comes from a master pty so newlines are CRLF-delimited. On Windows,
+    # LF might also get expanded to CRLF.
+    cpu_listing = job.stdout.replace('\r', '\n')
+
+    # Count lines that match "cpu${DIGITS}".
+    result = len([line for line in cpu_listing.splitlines()
+                  if re.match(r'cpu(\d)+$', line)])
+
+    # Restrict the result to something reasonable.
+    if result < 1:
+        result = 1
+    if result > 1024:
+        result = 1024
+
+    return result
+
+
+def _job_limit_socket_thread(temp_dir: tempfile.TemporaryDirectory,
+                             server: socket.socket, job_count: int) -> None:
+    """Service the job limit server socket, accepting only as many connections
+    as there should be concurrent jobs.
+    """
+    clients: List[socket.socket] = []
+    while True:
+        rlist = list(clients)
+        if len(clients) < job_count:
+            rlist.append(server)
+        rlist, _, _ = select.select(rlist, [], [])
+        for sock in rlist:
+            if sock == server:
+                new_client, _ = server.accept()
+                new_client.send(b"x")
+                clients.append(new_client)
+            else:
+                sock.close()
+                clients.remove(sock)
+
+
+def adb_job_limit_socket() -> str:
+    """An Android device can frequently have many fewer cores than the host
+    (e.g. 4 versus 128). We want to exploit all the device cores without
+    overburdening it.
+
+    Create a Unix domain socket that only allows as many connections as CPUs on
+    the Android device.
+    """
+
+    # Create the job limit server socket.
+    temp_dir = tempfile.TemporaryDirectory(prefix="libcxx_")
+    sock_addr = temp_dir.name + "/adb_job.sock"
+    server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+    server.bind(sock_addr)
+    server.listen(1)
+
+    # Spawn a thread to service the socket. As a daemon thread, its existence
+    # won't prevent interpreter shutdown. The temp dir will still be removed on
+    # shutdown.
+    cpu_count = _get_cpu_count()
+    threading.Thread(target=_job_limit_socket_thread,
+                     args=(temp_dir, server, cpu_count),
+                     daemon=True).start()
+
+    return sock_addr
diff --git a/libcxxabi/test/configs/llvm-libc++abi-android-ndk.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-android-ndk.cfg.in
new file mode 100644
index 000000000000000..f2cb62a32d4e834
--- /dev/null
+++ b/libcxxabi/test/configs/llvm-libc++abi-android-ndk.cfg.in
@@ -0,0 +1,40 @@
+# This testing configuration handles running the test suite against LLVM's
+# libc++abi using adb and a libc++_shared.so library on Android.
+
+lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg')
+
+import re
+import site
+
+site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils'))
+
+import libcxx.test.android
+import libcxx.test.config
+import libcxx.test.params
+
+config.substitutions.append(('%{flags}',
+    '--sysroot @CMAKE_SYSROOT@' if '@CMAKE_SYSROOT@' else ''
+))
+config.substitutions.append(('%{compile_flags}',
+    '-nostdinc++ -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -I %{libcxx}/test/support -I %{libcxx}/src -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS'
+))
+
+# The NDK library is called "libc++_shared.so". Use LD_LIBRARY_PATH to find
+# libc++_shared.so because older Bionic dynamic loaders don't support rpath
+# lookup. The Android libc++ shared library exports libc++abi, so we don't need
+# to link with -lc++abi.
+config.substitutions.append(('%{link_flags}',
+    '-nostdlib++ -L %{lib} -lc++_shared'
+))
+config.substitutions.append(('%{exec}',
+    '%{executor}' +
+    ' --job-limit-socket ' + libcxx.test.android.adb_job_limit_socket() +
+    ' --prepend-path-env LD_LIBRARY_PATH /data/local/tmp/libc++ --execdir %T -- '
+))
+
+libcxx.test.config.configure(
+    libcxx.test.params.DEFAULT_PARAMETERS,
+    libcxx.test.features.DEFAULT_FEATURES,
+    config,
+    lit_config
+)
diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py
index 541bd01ef8e8f95..eb9f8de2a7f960c 100644
--- a/llvm/utils/lit/lit/TestingConfig.py
+++ b/llvm/utils/lit/lit/TestingConfig.py
@@ -43,6 +43,7 @@ def fromdefaults(litConfig):
             "TSAN_OPTIONS",
             "UBSAN_OPTIONS",
             "ADB",
+            "ADB_SERVER_SOCKET",
             "ANDROID_SERIAL",
             "SSH_AUTH_SOCK",
             "SANITIZER_IGNORE_CVE_2016_2143",
diff --git a/runtimes/cmake/android/Arch-arm.cmake b/runtimes/cmake/android/Arch-arm.cmake
new file mode 100644
index 000000000000000..c9e6e2509cb2e39
--- /dev/null
+++ b/runtimes/cmake/android/Arch-arm.cmake
@@ -0,0 +1,7 @@
+include(${CMAKE_CURRENT_LIST_DIR}/Common.cmake)
+
+set(CMAKE_SYSTEM_PROCESSOR "armv7-a" CACHE STRING "")
+set(CMAKE_ASM_COMPILER_TARGET "armv7a-linux-androideabi21" CACHE STRING "")
+set(CMAKE_C_COMPILER_TARGET   "armv7a-linux-androideabi21" CACHE STRING "")
+set(CMAKE_CXX_COMPILER_TARGET "armv7a-linux-androideabi21" CACHE STRING "")
+set(ANDROID_NATIVE_API_LEVEL 21 CACHE STRING "")
diff --git a/runtimes/cmake/android/Arch-arm64.cmake b/runtimes/cmake/android/Arch-arm64.cmake
new file mode 100644
index 000000000000000..9cae3313fad0d5a
--- /dev/null
+++ b/runtimes/cmake/android/Arch-arm64.cmake
@@ -0,0 +1,7 @@
+include(${CMAKE_CURRENT_LIST_DIR}/Common.cmake)
+
+set(CMAKE_SYSTEM_PROCESSOR "aarch64" CACHE STRING "")
+set(CMAKE_ASM_COMPILER_TARGET "aarch64-linux-android21" CACHE STRING "")
+set(CMAKE_C_COMPILER_TARGET   "aarch64-linux-android21" CACHE STRING "")
+set(CMAKE_CXX_COMPILER_TARGET "aarch64-linux-android21" CACHE STRING "")
+set(ANDROID_NATIVE_API_LEVEL 21 CACHE STRING "")
diff --git a/runtimes/cmake/android/Arch-x86.cmake b/runtimes/cmake/android/Arch-x86.cmake
new file mode 100644
index 000000000000000..31b7a719e6a3d18
--- /dev/null
+++ b/runtimes/cmake/android/Arch-x86.cmake
@@ -0,0 +1,7 @@
+include(${CMAKE_CURRENT_LIST_DIR}/Common.cmake)
+
+set(CMAKE_SYSTEM_PROCESSOR "i686" CACHE STRING "")
+set(CMAKE_ASM_COMPILER_TARGET "i686-linux-android21" CACHE STRING "")
+set(CMAKE_C_COMPILER_TARGET   "i686-linux-android21" CACHE STRING "")
+set(CMAKE_CXX_COMPILER_TARGET "i686-linux-android21" CACHE STRING "")
+set(ANDROID_NATIVE_API_LEVEL 21 CACHE STRING "")
diff --git a/runtimes/cmake/android/Arch-x86_64.cmake b/runtimes/cmake/android/Arch-x86_64.cmake
new file mode 100644
index 000000000000000..a109fac6974f5f0
--- /dev/null
+++ b/runtimes/cmake/android/Arch-x86_64.cmake
@@ -0,0 +1,7 @@
+include(${CMAKE_CURRENT_LIST_DIR}/Common.cmake)
+
+set(CMAKE_SYSTEM_PROCESSOR "x86_64" CACHE STRING "")
+set(CMAKE_ASM_COMPILER_TARGET "x86_64-linux-android21" CACHE STRING "")
+set(CMAKE_C_COMPILER_TARGET   "x86_64-linux-android21" CACHE STRING "")
+set(CMAKE_CXX_COMPILER_TARGET "x86_64-linux-android21" CACHE STRING "")
+set(ANDROID_NATIVE_API_LEVEL 21 CACHE STRING "")
diff --git a/runtimes/cmake/android/Common.cmake b/runtimes/cmake/android/Common.cmake
new file mode 100644
index 000000000000000..3f49334651a0f69
--- /dev/null
+++ b/runtimes/cmake/android/Common.cmake
@@ -0,0 +1,6 @@
+set(CMAKE_SYSTEM_NAME "Android" CACHE STRING "")
+
+# Set the CMake system version to "1" to inhibit CMake's built-in support for
+# compiling using the Android NDK, which gets in the way when we're not using an
+# NDK.
+set(CMAKE_SYSTEM_VERSION "1" CACHE STRING "")

>From 049993eae6bef539ac4bca7ddeede78282e496d9 Mon Sep 17 00:00:00 2001
From: Nuri Amari <nuriamari at meta.com>
Date: Thu, 19 Oct 2023 13:59:57 -0700
Subject: [PATCH 52/76] [FunctionComparator] Differentiate instructions passing
 different MDStrings (#69543)

Prior to this patch, differing metadata operands to two otherwise
identical instructions was not enough to consider the instructions
different in the eyes of the function comparator. This breaks LLVM
virtual function elimination, among other features.

In this patch, we handle the case where two associated operands are
MDStrings of different value. This patch does not differentiate more
complex metadata operands.

---------

Co-authored-by: Nuri Amari <nuriamari at fb.com>
---
 .../Transforms/Utils/FunctionComparator.cpp   | 30 ++++++++++++-
 .../mergefunc-preserve-vfe-intrinsics.ll      | 44 +++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/MergeFunc/mergefunc-preserve-vfe-intrinsics.ll

diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index b1d74f67377e27f..79ca99d1566ce25 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -160,10 +160,23 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
 int FunctionComparator::cmpMetadata(const Metadata *L,
                                     const Metadata *R) const {
   // TODO: the following routine coerce the metadata contents into constants
-  // before comparison.
+  // or MDStrings before comparison.
   // It ignores any other cases, so that the metadata nodes are considered
   // equal even though this is not correct.
   // We should structurally compare the metadata nodes to be perfect here.
+
+  auto *MDStringL = dyn_cast<MDString>(L);
+  auto *MDStringR = dyn_cast<MDString>(R);
+  if (MDStringL && MDStringR) {
+    if (MDStringL == MDStringR)
+      return 0;
+    return MDStringL->getString().compare(MDStringR->getString());
+  }
+  if (MDStringR)
+    return -1;
+  if (MDStringL)
+    return 1;
+
   auto *CL = dyn_cast<ConstantAsMetadata>(L);
   auto *CR = dyn_cast<ConstantAsMetadata>(R);
   if (CL == CR)
@@ -820,6 +833,21 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
   if (ConstR)
     return -1;
 
+  const MetadataAsValue *MetadataValueL = dyn_cast<MetadataAsValue>(L);
+  const MetadataAsValue *MetadataValueR = dyn_cast<MetadataAsValue>(R);
+  if (MetadataValueL && MetadataValueR) {
+    if (MetadataValueL == MetadataValueR)
+      return 0;
+
+    return cmpMetadata(MetadataValueL->getMetadata(),
+                       MetadataValueR->getMetadata());
+  }
+
+  if (MetadataValueL)
+    return 1;
+  if (MetadataValueR)
+    return -1;
+
   const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
   const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
 
diff --git a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-vfe-intrinsics.ll b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-vfe-intrinsics.ll
new file mode 100644
index 000000000000000..1c29aec8d8f28d1
--- /dev/null
+++ b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-vfe-intrinsics.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes=mergefunc -S %s | FileCheck %s
+
+; This test contains three identical functions, aside from the metadata
+; they pass to a function call. This test verifies that the function merger
+; pass is able to merge the two functions that are truly identical,
+; but the third that passes different metadata is preserved
+
+declare { ptr, i1 } @llvm.type.checked.load(ptr, i32, metadata)
+
+define i1 @merge_candidate_a(ptr %ptr, i32 %offset) {
+; CHECK-LABEL: define i1 @merge_candidate_a(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[OFFSET:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call { ptr, i1 } @llvm.type.checked.load(ptr [[PTR]], i32 [[OFFSET]], metadata !"common_metadata")
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { ptr, i1 } [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %1 = call { ptr, i1 } @llvm.type.checked.load(ptr %ptr, i32 %offset, metadata !"common_metadata")
+  %2 = extractvalue { ptr, i1 } %1, 1
+  ret i1 %2
+}
+
+define i1 @merge_candidate_c(ptr %ptr, i32 %offset) {
+; CHECK-LABEL: define i1 @merge_candidate_c(
+; CHECK-SAME: ptr [[PTR:%.*]], i32 [[OFFSET:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call { ptr, i1 } @llvm.type.checked.load(ptr [[PTR]], i32 [[OFFSET]], metadata !"different_metadata")
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { ptr, i1 } [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %1 = call { ptr, i1 } @llvm.type.checked.load(ptr %ptr, i32 %offset, metadata !"different_metadata")
+  %2 = extractvalue { ptr, i1 } %1, 1
+  ret i1 %2
+}
+
+define i1 @merge_candidate_b(ptr %ptr, i32 %offset) {
+; CHECK-LABEL: define i1 @merge_candidate_b(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i1 @merge_candidate_a(ptr [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    ret i1 [[TMP3]]
+;
+  %1 = call { ptr, i1 } @llvm.type.checked.load(ptr %ptr, i32 %offset, metadata !"common_metadata")
+  %2 = extractvalue { ptr, i1 } %1, 1
+  ret i1 %2
+}

>From a39215768b58bb39716820959cedf82c76648770 Mon Sep 17 00:00:00 2001
From: Joseph Huber <35342157+jhuber6 at users.noreply.github.com>
Date: Thu, 19 Oct 2023 17:00:01 -0400
Subject: [PATCH 53/76] [libc] Rework the 'fgets' implementation on the GPU
 (#69635)

Summary:
The `fgets` function as implemented is not functional currently when
called with multiple threads. This is because we rely on reapeatedly
polling the character to detect EOF. This doesn't work when there are
multiple threads that may with to poll the characters. this patch pulls
out the logic into a standalone RPC call to handle this in a single
operation such that calling it from multiple threads functions as
expected. It also makes it less slow because we no longer make N RPC
calls for N characters.
---
 libc/include/llvm-libc-types/rpc_opcodes_t.h |  1 +
 libc/src/stdio/gpu/fgets.cpp                 | 28 +++++++++-----------
 libc/utils/gpu/server/rpc_server.cpp         | 16 +++++++++++
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index 2fd318f06a7db16..7b85428dd3445e8 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -17,6 +17,7 @@ typedef enum {
   RPC_WRITE_TO_STREAM,
   RPC_WRITE_TO_STDOUT_NEWLINE,
   RPC_READ_FROM_STREAM,
+  RPC_READ_FGETS,
   RPC_OPEN_FILE,
   RPC_CLOSE_FILE,
   RPC_MALLOC,
diff --git a/libc/src/stdio/gpu/fgets.cpp b/libc/src/stdio/gpu/fgets.cpp
index 4cabd15001a7f54..5ea4bdcdc9e0fef 100644
--- a/libc/src/stdio/gpu/fgets.cpp
+++ b/libc/src/stdio/gpu/fgets.cpp
@@ -22,24 +22,20 @@ LLVM_LIBC_FUNCTION(char *, fgets,
   if (count < 1)
     return nullptr;
 
-  // This implementation is very slow as it makes multiple RPC calls.
-  unsigned char c = '\0';
-  int i = 0;
-  for (; i < count - 1 && c != '\n'; ++i) {
-    auto r = file::read(stream, &c, 1);
-    if (r != 1)
-      break;
-
-    str[i] = c;
-  }
-
-  bool has_error = LIBC_NAMESPACE::ferror(stream);
-  bool has_eof = LIBC_NAMESPACE::feof(stream);
-
-  if (has_error || (i == 0 && has_eof))
+  uint64_t recv_size;
+  void *buf = nullptr;
+  rpc::Client::Port port = rpc::client.open<RPC_READ_FGETS>();
+  port.send([=](rpc::Buffer *buffer) {
+    buffer->data[0] = count;
+    buffer->data[1] = file::from_stream(stream);
+  });
+  port.recv_n(&buf, &recv_size,
+              [&](uint64_t) { return reinterpret_cast<void *>(str); });
+  port.close();
+
+  if (recv_size == 0)
     return nullptr;
 
-  str[i] = '\0';
   return str;
 }
 
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 0550115f7cd1a1c..05e900edc6993bb 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -101,6 +101,22 @@ struct Server {
       });
       break;
     }
+    case RPC_READ_FGETS: {
+      uint64_t sizes[lane_size] = {0};
+      void *data[lane_size] = {nullptr};
+      port->recv([&](rpc::Buffer *buffer, uint32_t id) {
+        data[id] = new char[buffer->data[0]];
+        const char *str =
+            fgets(reinterpret_cast<char *>(data[id]), buffer->data[0],
+                  file::to_stream(buffer->data[1]));
+        sizes[id] = !str ? 0 : std::strlen(str) + 1;
+      });
+      port->send_n(data, sizes);
+      for (uint32_t id = 0; id < lane_size; ++id)
+        if (data[id])
+          delete[] reinterpret_cast<uint8_t *>(data[id]);
+      break;
+    }
     case RPC_OPEN_FILE: {
       uint64_t sizes[lane_size] = {0};
       void *paths[lane_size] = {nullptr};

>From 630037ede4cec8870764bed27c1289388ac33097 Mon Sep 17 00:00:00 2001
From: Joseph Huber <35342157+jhuber6 at users.noreply.github.com>
Date: Thu, 19 Oct 2023 17:01:43 -0400
Subject: [PATCH 54/76] [libc] Partially implement 'rand' for the GPU (#66167)

Summary:
This patch partially implements the `rand` function on the GPU. This is
partial because the GPU currently doesn't support thread local storage
or static initializers. To implement this on the GPU. I use 1/8th of the
local / shared memory quota to treak the shared memory as thread local
storage. This is done by simply allocating enough storage for each
thread in the block and indexing into this based off of the thread id.
The downside to this is that it does not initialize `srand` correctly to
be `1` as the standard says, it is also wasteful. In the future we
should figure out a way to support TLS on the GPU so that this can be
completely common and less resource intensive.
---
 libc/config/gpu/entrypoints.txt    |  2 ++
 libc/src/stdlib/rand.cpp           | 10 ++++++----
 libc/src/stdlib/rand_util.cpp      |  6 ++++++
 libc/src/stdlib/rand_util.h        | 22 ++++++++++++++++++++++
 libc/test/src/stdlib/rand_test.cpp |  3 +++
 5 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index b20008e2784c412..756cddb0e8e5287 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -63,6 +63,8 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdlib.lldiv
     libc.src.stdlib.qsort
     libc.src.stdlib.qsort_r
+    libc.src.stdlib.rand
+    libc.src.stdlib.srand
     libc.src.stdlib.strtod
     libc.src.stdlib.strtof
     libc.src.stdlib.strtol
diff --git a/libc/src/stdlib/rand.cpp b/libc/src/stdlib/rand.cpp
index c5737a8b8454775..ad543b4048a949e 100644
--- a/libc/src/stdlib/rand.cpp
+++ b/libc/src/stdlib/rand.cpp
@@ -15,10 +15,12 @@ namespace LIBC_NAMESPACE {
 // An implementation of the xorshift64star pseudo random number generator. This
 // is a good general purpose generator for most non-cryptographics applications.
 LLVM_LIBC_FUNCTION(int, rand, (void)) {
-  rand_next ^= rand_next >> 12;
-  rand_next ^= rand_next << 25;
-  rand_next ^= rand_next >> 27;
-  return static_cast<int>((rand_next * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
+  unsigned long x = rand_next;
+  x ^= x >> 12;
+  x ^= x << 25;
+  x ^= x >> 27;
+  rand_next = x;
+  return static_cast<int>((x * 0x2545F4914F6CDD1Dul) >> 32) & RAND_MAX;
 }
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/rand_util.cpp b/libc/src/stdlib/rand_util.cpp
index fc2cd304e77d297..1f3dbce9c786088 100644
--- a/libc/src/stdlib/rand_util.cpp
+++ b/libc/src/stdlib/rand_util.cpp
@@ -11,8 +11,14 @@
 
 namespace LIBC_NAMESPACE {
 
+#ifdef LIBC_TARGET_ARCH_IS_GPU
+// FIXME: Local GPU memory cannot be initialized so we cannot currently provide
+// a standard compliant default value.
+ThreadLocal<unsigned long> rand_next;
+#else
 // C standard 7.10p2: If 'rand' is called before 'srand' it is to proceed as if
 // the 'srand' function was called with a value of '1'.
 LIBC_THREAD_LOCAL unsigned long rand_next = 1;
+#endif
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/rand_util.h b/libc/src/stdlib/rand_util.h
index 55c36a45bbc6b94..cadd6b5cdcbb8c0 100644
--- a/libc/src/stdlib/rand_util.h
+++ b/libc/src/stdlib/rand_util.h
@@ -9,11 +9,33 @@
 #ifndef LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
 #define LLVM_LIBC_SRC_STDLIB_RAND_UTIL_H
 
+#include "src/__support/GPU/utils.h"
 #include "src/__support/macros/attributes.h"
 
 namespace LIBC_NAMESPACE {
 
+#ifdef LIBC_TARGET_ARCH_IS_GPU
+// Implement thread local storage on the GPU using local memory. Each thread
+// gets its slot in the local memory array and is private to the group.
+// TODO: We need to implement the 'thread_local' keyword on the GPU. This is an
+// inefficient and incomplete stand-in until that is done.
+template <typename T> class ThreadLocal {
+private:
+  static constexpr long MAX_THREADS = 1024;
+  [[clang::loader_uninitialized]] static inline gpu::Local<T>
+      storage[MAX_THREADS];
+
+public:
+  LIBC_INLINE operator T() const { return storage[gpu::get_thread_id()]; }
+  LIBC_INLINE void operator=(const T &value) {
+    storage[gpu::get_thread_id()] = value;
+  }
+};
+
+extern ThreadLocal<unsigned long> rand_next;
+#else
 extern LIBC_THREAD_LOCAL unsigned long rand_next;
+#endif
 
 } // namespace LIBC_NAMESPACE
 
diff --git a/libc/test/src/stdlib/rand_test.cpp b/libc/test/src/stdlib/rand_test.cpp
index 6f25708e539053d..7934dc16aa461a8 100644
--- a/libc/test/src/stdlib/rand_test.cpp
+++ b/libc/test/src/stdlib/rand_test.cpp
@@ -23,12 +23,15 @@ TEST(LlvmLibcRandTest, UnsetSeed) {
     vals[i] = val;
   }
 
+  // FIXME: The GPU implementation cannot initialize the seed correctly.
+#ifndef LIBC_TARGET_ARCH_IS_GPU
   // The C standard specifies that if 'srand' is never called it should behave
   // as if 'srand' was called with a value of 1. If we seed the value with 1 we
   // should get the same sequence as the unseeded version.
   LIBC_NAMESPACE::srand(1);
   for (size_t i = 0; i < 1000; ++i)
     ASSERT_EQ(LIBC_NAMESPACE::rand(), vals[i]);
+#endif
 }
 
 TEST(LlvmLibcRandTest, SetSeed) {

>From afdad4fd402d1e8917375960df7104bc16f4cba9 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Thu, 19 Oct 2023 14:02:23 -0700
Subject: [PATCH 55/76] workflows/release-tasks: Fix release note artifact
 upload (#69522)

---
 .github/workflows/release-tasks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release-tasks.yml b/.github/workflows/release-tasks.yml
index 065b84dd8822eb7..85b720e323d1b1d 100644
--- a/.github/workflows/release-tasks.yml
+++ b/.github/workflows/release-tasks.yml
@@ -48,7 +48,7 @@ jobs:
           ./llvm/utils/release/github-upload-release.py --token ${{ github.token }} --release ${{ steps.validate-tag.outputs.release-version }} upload --files ./*doxygen*.tar.xz
 
       - name: Create Release Notes Artifact
-        uses: actions/download-artifact at v3
+        uses: actions/upload-artifact at v3
         with:
           name: release-notes
           path: docs-build/html-export/

>From 621a271aac421a6253931be585303bd3958d0f1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Thu, 19 Oct 2023 14:03:16 -0700
Subject: [PATCH 56/76] [flang][openacc] Warn for num_gangs, num_workers and
 vector_length on acc serial (#69622)

For portability with other compilers, just issue a portability warning
instead of a hard error when `num_gangs`, `num_workers` or
`vector_length` are present on an `!$acc serial` directive
---
 flang/lib/Semantics/check-acc-structure.cpp   | 21 ++++++++++++++++---
 .../lib/Semantics/check-directive-structure.h | 18 ++++++++++------
 flang/test/Semantics/OpenACC/acc-serial.f90   |  6 +++---
 3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp
index ef253586cfa0ed1..763418ede24d5a7 100644
--- a/flang/lib/Semantics/check-acc-structure.cpp
+++ b/flang/lib/Semantics/check-acc-structure.cpp
@@ -380,14 +380,12 @@ CHECK_SIMPLE_CLAUSE(IfPresent, ACCC_if_present)
 CHECK_SIMPLE_CLAUSE(Independent, ACCC_independent)
 CHECK_SIMPLE_CLAUSE(NoCreate, ACCC_no_create)
 CHECK_SIMPLE_CLAUSE(Nohost, ACCC_nohost)
-CHECK_SIMPLE_CLAUSE(NumWorkers, ACCC_num_workers)
 CHECK_SIMPLE_CLAUSE(Private, ACCC_private)
 CHECK_SIMPLE_CLAUSE(Read, ACCC_read)
 CHECK_SIMPLE_CLAUSE(Seq, ACCC_seq)
 CHECK_SIMPLE_CLAUSE(Tile, ACCC_tile)
 CHECK_SIMPLE_CLAUSE(UseDevice, ACCC_use_device)
 CHECK_SIMPLE_CLAUSE(Vector, ACCC_vector)
-CHECK_SIMPLE_CLAUSE(VectorLength, ACCC_vector_length)
 CHECK_SIMPLE_CLAUSE(Wait, ACCC_wait)
 CHECK_SIMPLE_CLAUSE(Worker, ACCC_worker)
 CHECK_SIMPLE_CLAUSE(Write, ACCC_write)
@@ -541,13 +539,30 @@ void AccStructureChecker::Enter(const parser::AccClause::Gang &g) {
 }
 
 void AccStructureChecker::Enter(const parser::AccClause::NumGangs &n) {
-  CheckAllowed(llvm::acc::Clause::ACCC_num_gangs);
+  CheckAllowed(llvm::acc::Clause::ACCC_num_gangs,
+      /*warnInsteadOfError=*/GetContext().directive ==
+              llvm::acc::Directive::ACCD_serial ||
+          GetContext().directive == llvm::acc::Directive::ACCD_serial_loop);
 
   if (n.v.size() > 3)
     context_.Say(GetContext().clauseSource,
         "NUM_GANGS clause accepts a maximum of 3 arguments"_err_en_US);
 }
 
+void AccStructureChecker::Enter(const parser::AccClause::NumWorkers &n) {
+  CheckAllowed(llvm::acc::Clause::ACCC_num_workers,
+      /*warnInsteadOfError=*/GetContext().directive ==
+              llvm::acc::Directive::ACCD_serial ||
+          GetContext().directive == llvm::acc::Directive::ACCD_serial_loop);
+}
+
+void AccStructureChecker::Enter(const parser::AccClause::VectorLength &n) {
+  CheckAllowed(llvm::acc::Clause::ACCC_vector_length,
+      /*warnInsteadOfError=*/GetContext().directive ==
+              llvm::acc::Directive::ACCD_serial ||
+          GetContext().directive == llvm::acc::Directive::ACCD_serial_loop);
+}
+
 void AccStructureChecker::Enter(const parser::AccClause::Reduction &reduction) {
   CheckAllowed(llvm::acc::Clause::ACCC_reduction);
 
diff --git a/flang/lib/Semantics/check-directive-structure.h b/flang/lib/Semantics/check-directive-structure.h
index 14a3151e672685e..9c3aa47e19e5c7c 100644
--- a/flang/lib/Semantics/check-directive-structure.h
+++ b/flang/lib/Semantics/check-directive-structure.h
@@ -333,7 +333,7 @@ class DirectiveStructureChecker : public virtual BaseChecker {
 
   void CheckRequireAtLeastOneOf(bool warnInsteadOfError = false);
 
-  void CheckAllowed(C clause);
+  void CheckAllowed(C clause, bool warnInsteadOfError = false);
 
   void CheckAtLeastOneClause();
 
@@ -452,15 +452,21 @@ std::string DirectiveStructureChecker<D, C, PC,
 // Check that clauses present on the directive are allowed clauses.
 template <typename D, typename C, typename PC, std::size_t ClauseEnumSize>
 void DirectiveStructureChecker<D, C, PC, ClauseEnumSize>::CheckAllowed(
-    C clause) {
+    C clause, bool warnInsteadOfError) {
   if (!GetContext().allowedClauses.test(clause) &&
       !GetContext().allowedOnceClauses.test(clause) &&
       !GetContext().allowedExclusiveClauses.test(clause) &&
       !GetContext().requiredClauses.test(clause)) {
-    context_.Say(GetContext().clauseSource,
-        "%s clause is not allowed on the %s directive"_err_en_US,
-        parser::ToUpperCaseLetters(getClauseName(clause).str()),
-        parser::ToUpperCaseLetters(GetContext().directiveSource.ToString()));
+    if (warnInsteadOfError)
+      context_.Say(GetContext().clauseSource,
+          "%s clause is not allowed on the %s directive and will be ignored"_port_en_US,
+          parser::ToUpperCaseLetters(getClauseName(clause).str()),
+          parser::ToUpperCaseLetters(GetContext().directiveSource.ToString()));
+    else
+      context_.Say(GetContext().clauseSource,
+          "%s clause is not allowed on the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(getClauseName(clause).str()),
+          parser::ToUpperCaseLetters(GetContext().directiveSource.ToString()));
     return;
   }
   if ((GetContext().allowedOnceClauses.test(clause) ||
diff --git a/flang/test/Semantics/OpenACC/acc-serial.f90 b/flang/test/Semantics/OpenACC/acc-serial.f90
index a052ef4e476a885..afcfc00a40ec64d 100644
--- a/flang/test/Semantics/OpenACC/acc-serial.f90
+++ b/flang/test/Semantics/OpenACC/acc-serial.f90
@@ -77,15 +77,15 @@ program openacc_serial_validity
   !$acc serial wait(wait1) wait(wait2)
   !$acc end serial
 
-  !ERROR: NUM_GANGS clause is not allowed on the SERIAL directive
+  !PORTABILITY: NUM_GANGS clause is not allowed on the SERIAL directive and will be ignored
   !$acc serial num_gangs(8)
   !$acc end serial
 
-  !ERROR: NUM_WORKERS clause is not allowed on the SERIAL directive
+  !PORTABILITY: NUM_WORKERS clause is not allowed on the SERIAL directive and will be ignored
   !$acc serial num_workers(8)
   !$acc end serial
 
-  !ERROR: VECTOR_LENGTH clause is not allowed on the SERIAL directive
+  !PORTABILITY: VECTOR_LENGTH clause is not allowed on the SERIAL directive and will be ignored
   !$acc serial vector_length(128)
   !$acc end serial
 

>From ea9e116e5a24e834142bc4024b57697b48599a9e Mon Sep 17 00:00:00 2001
From: Felipe de Azevedo Piovezan <fpiovezan at apple.com>
Date: Thu, 19 Oct 2023 14:10:31 -0700
Subject: [PATCH 57/76] [lldb][NFCI] Remove duplicated code in DWARFParser
 (#69531)

The method DWARFDebugInfoEntry::Extract needs to skip over all the data
in the debug_info / debug_types section for each DIE. It had the logic
to do so hardcoded inside a loop, when it already exists in a neatly
isolated function.
---
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  | 141 ++----------------
 1 file changed, 9 insertions(+), 132 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index a18836e5d9bbbb4..1b0fefedf983694 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -50,16 +50,12 @@ bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &data,
   lldbassert(abbr_idx <= UINT16_MAX);
   m_abbr_idx = abbr_idx;
 
-  // assert (fixed_form_sizes);  // For best performance this should be
-  // specified!
-
   if (m_abbr_idx == 0) {
     m_tag = llvm::dwarf::DW_TAG_null;
     m_has_children = false;
     return true; // NULL debug tag entry
   }
 
-  lldb::offset_t offset = *offset_ptr;
   const auto *abbrevDecl = GetAbbreviationDeclarationPtr(cu);
   if (abbrevDecl == nullptr) {
     cu->GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
@@ -74,137 +70,18 @@ bool DWARFDebugInfoEntry::Extract(const DWARFDataExtractor &data,
   m_tag = abbrevDecl->getTag();
   m_has_children = abbrevDecl->hasChildren();
   // Skip all data in the .debug_info or .debug_types for the attributes
-  dw_form_t form;
   for (const auto &attribute : abbrevDecl->attributes()) {
-    form = attribute.Form;
-    std::optional<uint8_t> fixed_skip_size =
-        DWARFFormValue::GetFixedSize(form, cu);
-    if (fixed_skip_size)
-      offset += *fixed_skip_size;
-    else {
-      bool form_is_indirect = false;
-      do {
-        form_is_indirect = false;
-        uint32_t form_size = 0;
-        switch (form) {
-        // Blocks if inlined data that have a length field and the data bytes
-        // inlined in the .debug_info/.debug_types
-        case DW_FORM_exprloc:
-        case DW_FORM_block:
-          form_size = data.GetULEB128(&offset);
-          break;
-        case DW_FORM_block1:
-          form_size = data.GetU8_unchecked(&offset);
-          break;
-        case DW_FORM_block2:
-          form_size = data.GetU16_unchecked(&offset);
-          break;
-        case DW_FORM_block4:
-          form_size = data.GetU32_unchecked(&offset);
-          break;
-
-        // Inlined NULL terminated C-strings
-        case DW_FORM_string:
-          data.GetCStr(&offset);
-          break;
-
-        // Compile unit address sized values
-        case DW_FORM_addr:
-          form_size = cu->GetAddressByteSize();
-          break;
-        case DW_FORM_ref_addr:
-          if (cu->GetVersion() <= 2)
-            form_size = cu->GetAddressByteSize();
-          else
-            form_size = 4;
-          break;
+    if (DWARFFormValue::SkipValue(attribute.Form, data, offset_ptr, cu))
+      continue;
 
-        // 0 sized form
-        case DW_FORM_flag_present:
-          form_size = 0;
-          break;
-
-        // 1 byte values
-        case DW_FORM_addrx1:
-        case DW_FORM_data1:
-        case DW_FORM_flag:
-        case DW_FORM_ref1:
-        case DW_FORM_strx1:
-          form_size = 1;
-          break;
-
-        // 2 byte values
-        case DW_FORM_addrx2:
-        case DW_FORM_data2:
-        case DW_FORM_ref2:
-        case DW_FORM_strx2:
-          form_size = 2;
-          break;
-
-        // 3 byte values
-        case DW_FORM_addrx3:
-        case DW_FORM_strx3:
-          form_size = 3;
-          break;
-
-        // 4 byte values
-        case DW_FORM_addrx4:
-        case DW_FORM_data4:
-        case DW_FORM_ref4:
-        case DW_FORM_strx4:
-          form_size = 4;
-          break;
-
-        // 8 byte values
-        case DW_FORM_data8:
-        case DW_FORM_ref8:
-        case DW_FORM_ref_sig8:
-          form_size = 8;
-          break;
-
-        // signed or unsigned LEB 128 values
-        case DW_FORM_addrx:
-        case DW_FORM_loclistx:
-        case DW_FORM_rnglistx:
-        case DW_FORM_sdata:
-        case DW_FORM_udata:
-        case DW_FORM_ref_udata:
-        case DW_FORM_GNU_addr_index:
-        case DW_FORM_GNU_str_index:
-        case DW_FORM_strx:
-          data.Skip_LEB128(&offset);
-          break;
-
-        case DW_FORM_indirect:
-          form_is_indirect = true;
-          form = static_cast<dw_form_t>(data.GetULEB128(&offset));
-          break;
-
-        case DW_FORM_strp:
-        case DW_FORM_line_strp:
-        case DW_FORM_sec_offset:
-          data.GetU32(&offset);
-          break;
-
-        case DW_FORM_implicit_const:
-          form_size = 0;
-          break;
-
-        default:
-          cu->GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
-              "[{0:x16}]: Unsupported DW_FORM_{1:x}, please file a bug "
-              "and "
-              "attach the file at the start of this error message",
-              (uint64_t)m_offset, (unsigned)form);
-          *offset_ptr = m_offset;
-          return false;
-        }
-        offset += form_size;
-
-      } while (form_is_indirect);
-    }
+    cu->GetSymbolFileDWARF().GetObjectFile()->GetModule()->ReportError(
+        "[{0:x16}]: Unsupported DW_FORM_{1:x}, please file a bug "
+        "and "
+        "attach the file at the start of this error message",
+        (uint64_t)m_offset, (unsigned)attribute.Form);
+    *offset_ptr = m_offset;
+    return false;
   }
-  *offset_ptr = offset;
   return true;
 }
 

>From a30095a1e43040786ff88bd5a37fb4c995445573 Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard at google.com>
Date: Thu, 19 Oct 2023 14:15:20 -0700
Subject: [PATCH 58/76] [libc++][Android] Add libcxx-builder-android Docker
 image (#69273)

Add a Dockerfile for a new Docker image, libcxx-builder-android, that
extends libcxx-builder with support for testing Android.

The image includes these things:

 * An Android Clang compiler and sysroot.

* The Android platform-tools (e.g. adb), so that an Android buildbot can
run programs on an Android device. At container startup, copy these
platform tools to an "android-platform-tools" Docker volume to share
them with an emulator container. This copying ensures that the emulator
and libcxx-builder containers avoid mismatched adb versions.

* Docker, so that an Android buildbot can manage a sibling Docker
container that runs the Android emulator.

Add an Android-specific run-buildbot-container script for local
development. Currently using this script requires building
libcxx-build-android and an emulator image locally.

Fixes: https://github.com/llvm/llvm-project/issues/69270
Differential Revision: https://reviews.llvm.org/D155271
---
 libcxx/utils/ci/vendor/android/Dockerfile     | 83 +++++++++++++++++++
 .../ci/vendor/android/container-setup.sh      | 19 +++++
 .../ci/vendor/android/run-buildbot-container  | 31 +++++++
 libcxx/utils/data/ignore_format.txt           |  2 +
 4 files changed, 135 insertions(+)
 create mode 100644 libcxx/utils/ci/vendor/android/Dockerfile
 create mode 100755 libcxx/utils/ci/vendor/android/container-setup.sh
 create mode 100755 libcxx/utils/ci/vendor/android/run-buildbot-container

diff --git a/libcxx/utils/ci/vendor/android/Dockerfile b/libcxx/utils/ci/vendor/android/Dockerfile
new file mode 100644
index 000000000000000..0acfff8e031dc81
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/Dockerfile
@@ -0,0 +1,83 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+#
+# This Dockerfile extends ldionne/libcxx-builder with Android support, including
+# Android Clang and sysroot, Android platform-tools, and the Docker client.
+#
+#   $ docker build -t libcxx-builder-android libcxx/utils/ci/vendor/android
+#
+
+FROM ldionne/libcxx-builder
+
+# Switch back to the root user to install things into /opt and /usr.
+USER root
+WORKDIR /
+
+# Install the Android platform tools (e.g. adb) into /opt/android/sdk.
+RUN apt-get update && apt-get install -y unzip
+RUN mkdir -p /opt/android/sdk && cd /opt/android/sdk && \
+    curl -LO https://dl.google.com/android/repository/platform-tools-latest-linux.zip && \
+    unzip platform-tools-latest-linux.zip && \
+    rm platform-tools-latest-linux.zip
+
+# Install the current Android compiler. Specify the prebuilts commit to retrieve
+# this compiler version even after it's removed from HEAD.
+ENV ANDROID_CLANG_VERSION=r498229b
+ENV ANDROID_CLANG_PREBUILTS_COMMIT=5186d132c99aa75dc25207c392e3ea5b93d0107e
+RUN git clone --filter=blob:none --sparse \
+        https://android.googlesource.com/platform/prebuilts/clang/host/linux-x86 \
+        /opt/android/clang && \
+    git -C /opt/android/clang checkout ${ANDROID_CLANG_PREBUILTS_COMMIT} && \
+    git -C /opt/android/clang sparse-checkout add clang-${ANDROID_CLANG_VERSION} && \
+    rm -fr /opt/android/clang/.git && \
+    ln -sf /opt/android/clang/clang-${ANDROID_CLANG_VERSION} /opt/android/clang/clang-current && \
+    # The "git sparse-checkout" and "ln" commands succeed even if nothing was
+    # checked out, so use this "ls" command to fix that.
+    ls /opt/android/clang/clang-current/bin/clang
+
+# Install an Android sysroot. New AOSP sysroots are available at
+# https://ci.android.com/builds/branches/aosp-main/grid, the "ndk" target. The
+# NDK also makes its sysroot prebuilt available at
+# https://android.googlesource.com/platform/prebuilts/ndk/+/refs/heads/dev/platform/sysroot.
+ENV ANDROID_SYSROOT_BID=10957860
+RUN cd /opt/android && \
+    curl -L -o ndk_platform.tar.bz2 \
+        https://androidbuildinternal.googleapis.com/android/internal/build/v3/builds/${ANDROID_SYSROOT_BID}/ndk/attempts/latest/artifacts/ndk_platform.tar.bz2/url && \
+    tar xf ndk_platform.tar.bz2 && \
+    rm ndk_platform.tar.bz2
+
+# Install Docker. Mark the binary setuid so it can be run without prefixing it
+# with sudo. Adding the container user to the docker group doesn't work because
+# /var/run/docker.sock is owned by the host's docker GID, not the container's
+# docker GID.
+RUN apt-get update && apt-get install -y gpg && \
+    install -m 0755 -d /etc/apt/keyrings && \
+    curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \
+    chmod a+r /etc/apt/keyrings/docker.gpg && \
+    echo "deb [arch="$(dpkg --print-architecture)" signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
+         "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \
+         tee /etc/apt/sources.list.d/docker.list > /dev/null && \
+    apt-get update && apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin && \
+    chmod u+s /usr/bin/docker
+
+COPY ./container-setup.sh /opt/android/container-setup.sh
+
+USER libcxx-builder
+WORKDIR /home/libcxx-builder
+
+# Add Android platform-tools to the PATH.
+ENV PATH="/opt/android/sdk/platform-tools:${PATH}"
+
+# Reset the buildkite-agent.cfg file. The tags are provided by an environment
+# variable instead.
+RUN cp /home/libcxx-builder/.buildkite-agent/buildkite-agent.dist.cfg \
+       /home/libcxx-builder/.buildkite-agent/buildkite-agent.cfg
+
+# Modify the Buildkite agent cmdline to do Android setup stuff first.
+CMD /opt/android/container-setup.sh && buildkite-agent start
diff --git a/libcxx/utils/ci/vendor/android/container-setup.sh b/libcxx/utils/ci/vendor/android/container-setup.sh
new file mode 100755
index 000000000000000..56bc232fefa1e6b
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/container-setup.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+set -e
+
+# Different versions of adb can sometimes be incompatible (i.e. "adb server
+# version (nn) doesn't match this client (mm); killing..."). Ensure that the adb
+# in the main builder image matches that in the emulator by sharing the
+# platform-tools from the main image.
+if [ -d /mnt/android-platform-tools ]; then
+    sudo rm -fr /mnt/android-platform-tools/platform-tools
+    sudo cp -r /opt/android/sdk/platform-tools /mnt/android-platform-tools
+fi
diff --git a/libcxx/utils/ci/vendor/android/run-buildbot-container b/libcxx/utils/ci/vendor/android/run-buildbot-container
new file mode 100755
index 000000000000000..4ab83194c05d5c4
--- /dev/null
+++ b/libcxx/utils/ci/vendor/android/run-buildbot-container
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# Similar to libcxx/utils/ci/run-buildbot-container, but adds additional options
+# needed for running Android tests.
+
+set -e
+
+MONOREPO_ROOT="$(git rev-parse --show-toplevel)"
+if [[ ! -d "${MONOREPO_ROOT}/libcxx/utils/ci/vendor/android" ]]; then
+    echo "Was unable to find the root of the LLVM monorepo; are you running from within the monorepo?"
+    exit 1
+fi
+
+DOCKER_OPTIONS=(-it)
+DOCKER_OPTIONS+=(--volume "${MONOREPO_ROOT}:/llvm")
+DOCKER_OPTIONS+=(--workdir "/llvm")
+DOCKER_OPTIONS+=(--cap-add=SYS_PTRACE)
+
+# Mount this volume to allow the main image to share its copy of the Android
+# platform tools with the emulator image, ensuring that the adb versions match.
+# This argument will create a new volume if it doesn't already exist.
+DOCKER_OPTIONS+=(--volume android-platform-tools:/mnt/android-platform-tools)
+
+# Pass through the Docker socket so that the buildbot can start a sibling
+# container running an Android emulator.
+if [ -S /var/run/docker.sock ]; then
+    DOCKER_OPTIONS+=(--volume /var/run/docker.sock:/var/run/docker.sock)
+fi
+
+docker run "${DOCKER_OPTIONS[@]}" libcxx-builder-android \
+    bash -c 'git config --global --add safe.directory /llvm; (/opt/android/container-setup.sh && exec bash)'
diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt
index 82b1bc920d1db56..664e9f51e9ba7fd 100644
--- a/libcxx/utils/data/ignore_format.txt
+++ b/libcxx/utils/data/ignore_format.txt
@@ -7298,4 +7298,6 @@ libcxx/utils/ci/Dockerfile
 libcxx/utils/ci/macos-ci-setup
 libcxx/utils/ci/run-buildbot
 libcxx/utils/ci/run-buildbot-container
+libcxx/utils/ci/vendor/android/Dockerfile
+libcxx/utils/ci/vendor/android/run-buildbot-container
 libcxx/utils/libcxx-lit

>From a2288a8944c310fcad1196302f16513797e1fcbc Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental at gmail.com>
Date: Thu, 19 Oct 2023 16:20:14 -0500
Subject: [PATCH 59/76] [mlir][python] remove mixins (#68853)

This PR replaces the mixin `OpView` extension mechanism with the
standard inheritance mechanism.

Why? Firstly, mixins are not very pythonic (inheritance is usually used
for this), a little convoluted, and too "tight" (can only be used in the
immediately adjacent `_ext.py`). Secondly, it (mixins) are now blocking
are correct implementation of "value builders" (see
[here](https://github.com/llvm/llvm-project/pull/68764)) where the
problem becomes how to choose the correct base class that the value
builder should call.

This PR looks big/complicated but appearances are deceiving; 4 things
were needed to make this work:

1. Drop `skipDefaultBuilders` in
`OpPythonBindingGen::emitDefaultOpBuilders`
2. Former mixin extension classes are converted to inherit from the
generated `OpView` instead of being "mixins"
a. extension classes that simply were calling into an already generated
`super().__init__` continue to do so
b. (almost all) extension classes that were calling `self.build_generic`
because of a lack of default builder being generated can now also just
call `super().__init__`
3. To handle the [lone single
use-case](https://sourcegraph.com/search?q=context%3Aglobal+select_opview_mixin&patternType=standard&sm=1&groupBy=repo)
of `select_opview_mixin`, namely
[linalg](https://github.com/llvm/llvm-project/blob/main/mlir/python/mlir/dialects/_linalg_ops_ext.py#L38),
only a small change was necessary in `opdsl/lang/emitter.py` (thanks to
the emission/generation of default builders/`__init__`s)
4. since the `extend_opview_class` decorator is removed, we need a way
to register extension classes as the desired `OpView` that `op.opview`
conjures into existence; so we do the standard thing and just enable
replacing the existing registered `OpView` i.e.,
`register_operation(_Dialect, replace=True)`.

Note, the upgrade path for the common case is to change an extension to
inherit from the generated builder and decorate it with
`register_operation(_Dialect, replace=True)`. In the slightly more
complicated case where `super().__init(self.build_generic(...))` is
called in the extension's `__init__`, this needs to be updated to call
`__init__` in `OpView`, i.e., the grandparent (see updated docs).
Note, also `<DIALECT>_ext.py` files/modules will no longer be automatically loaded.

Note, the PR has 3 base commits that look funny but this was done for
the purpose of tracking the line history of moving the
`<DIALECT>_ops_ext.py` class into `<DIALECT>.py` and updating (commit
labeled "fix").
---
 mlir/docs/Bindings/Python.md                  | 127 ++-
 mlir/lib/Bindings/Python/Globals.h            |   4 +-
 mlir/lib/Bindings/Python/IRModule.cpp         |   4 +-
 mlir/lib/Bindings/Python/MainModule.cpp       |  11 +-
 mlir/python/CMakeLists.txt                    |  19 -
 mlir/python/mlir/dialects/_affine_ops_ext.py  |  56 --
 mlir/python/mlir/dialects/_arith_ops_ext.py   |  69 --
 .../mlir/dialects/_bufferization_ops_ext.py   |  41 -
 .../_bufferization_transform_ops_ext.py       | 128 ---
 mlir/python/mlir/dialects/_builtin_ops_ext.py |  20 -
 mlir/python/mlir/dialects/_func_ops_ext.py    | 319 --------
 .../mlir/dialects/_gpu_transform_ops_ext.py   | 124 ---
 mlir/python/mlir/dialects/_linalg_ops_ext.py  |  47 --
 .../mlir/dialects/_loop_transform_ops_ext.py  | 134 ---
 mlir/python/mlir/dialects/_memref_ops_ext.py  |  36 -
 .../dialects/_memref_transform_ops_ext.py     | 114 ---
 .../mlir/dialects/_ml_program_ops_ext.py      | 113 ---
 mlir/python/mlir/dialects/_ods_common.py      |  59 --
 mlir/python/mlir/dialects/_pdl_ops_ext.py     | 271 ------
 mlir/python/mlir/dialects/_scf_ops_ext.py     | 107 ---
 .../dialects/_structured_transform_ops_ext.py | 759 -----------------
 mlir/python/mlir/dialects/_tensor_ops_ext.py  |  44 -
 .../dialects/_tensor_transform_ops_ext.py     |  64 --
 .../mlir/dialects/_transform_ops_ext.py       | 176 ----
 .../_transform_pdl_extension_ops_ext.py       |  55 --
 mlir/python/mlir/dialects/affine.py           |  51 +-
 mlir/python/mlir/dialects/arith.py            |  71 ++
 mlir/python/mlir/dialects/bufferization.py    |  36 +
 mlir/python/mlir/dialects/builtin.py          |  20 +
 mlir/python/mlir/dialects/func.py             | 323 ++++++++
 .../dialects/linalg/opdsl/lang/emitter.py     |   2 +-
 .../linalg/opdsl/ops/core_named_ops.py        | 107 +--
 mlir/python/mlir/dialects/memref.py           |  38 +
 mlir/python/mlir/dialects/ml_program.py       | 114 +++
 mlir/python/mlir/dialects/pdl.py              | 285 +++++++
 mlir/python/mlir/dialects/python_test.py      |   7 +-
 mlir/python/mlir/dialects/scf.py              | 115 ++-
 mlir/python/mlir/dialects/tensor.py           |  37 +
 .../mlir/dialects/transform/__init__.py       | 170 ++++
 .../mlir/dialects/transform/bufferization.py  | 129 +++
 mlir/python/mlir/dialects/transform/gpu.py    | 125 +++
 mlir/python/mlir/dialects/transform/loop.py   | 140 ++++
 mlir/python/mlir/dialects/transform/memref.py | 115 +++
 mlir/python/mlir/dialects/transform/pdl.py    |  50 ++
 .../mlir/dialects/transform/structured.py     | 773 ++++++++++++++++++
 mlir/python/mlir/dialects/transform/tensor.py |  64 ++
 mlir/python/mlir/runtime/np_to_memref.py      |   8 +-
 mlir/test/python/dialects/arith_dialect.py    |  13 +
 mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp |  40 +-
 49 files changed, 2814 insertions(+), 2920 deletions(-)
 delete mode 100644 mlir/python/mlir/dialects/_affine_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_arith_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_bufferization_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_bufferization_transform_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_builtin_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_func_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_gpu_transform_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_linalg_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_loop_transform_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_memref_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_memref_transform_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_ml_program_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_pdl_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_scf_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_structured_transform_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_tensor_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_tensor_transform_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_transform_ops_ext.py
 delete mode 100644 mlir/python/mlir/dialects/_transform_pdl_extension_ops_ext.py

diff --git a/mlir/docs/Bindings/Python.md b/mlir/docs/Bindings/Python.md
index bf54efee1f14e0c..bc2e676a878c0f4 100644
--- a/mlir/docs/Bindings/Python.md
+++ b/mlir/docs/Bindings/Python.md
@@ -1017,90 +1017,79 @@ very generic signature.
 
 #### Extending Generated Op Classes
 
-Note that this is a rather complex mechanism and this section errs on the side
-of explicitness. Users are encouraged to find an example and duplicate it if
-they don't feel the need to understand the subtlety. The `builtin` dialect
-provides some relatively simple examples.
-
 As mentioned above, the build system generates Python sources like
 `_{DIALECT_NAMESPACE}_ops_gen.py` for each dialect with Python bindings. It is
-often desirable to to use these generated classes as a starting point for
-further customization, so an extension mechanism is provided to make this easy
-(you are always free to do ad-hoc patching in your `{DIALECT_NAMESPACE}.py` file
-but we prefer a more standard mechanism that is applied uniformly).
+often desirable to use these generated classes as a starting point for
+further customization, so an extension mechanism is provided to make this easy.
+This mechanism uses conventional inheritance combined with `OpView` registration.
+For example, the default builder for `arith.constant`
+
+```python
+class ConstantOp(_ods_ir.OpView):
+  OPERATION_NAME = "arith.constant"
+
+  _ODS_REGIONS = (0, True)
+
+  def __init__(self, value, *, loc=None, ip=None):
+    ...
+```
 
-To provide extensions, add a `_{DIALECT_NAMESPACE}_ops_ext.py` file to the
-`dialects` module (i.e. adjacent to your `{DIALECT_NAMESPACE}.py` top-level and
-the `*_ops_gen.py` file). Using the `builtin` dialect and `FuncOp` as an
-example, the generated code will include an import like this:
+expects `value` to be a `TypedAttr` (e.g., `IntegerAttr` or `FloatAttr`). 
+Thus, a natural extension is a builder that accepts a MLIR type and a Python value and instantiates the appropriate `TypedAttr`:
 
 ```python
-try:
-  from . import _builtin_ops_ext as _ods_ext_module
-except ImportError:
-  _ods_ext_module = None
+from typing import Union
+
+from mlir.ir import Type, IntegerAttr, FloatAttr
+from mlir.dialects._arith_ops_gen import _Dialect, ConstantOp
+from mlir.dialects._ods_common import _cext
+
+ at _cext.register_operation(_Dialect, replace=True)
+class ConstantOpExt(ConstantOp):
+    def __init__(
+        self, result: Type, value: Union[int, float], *, loc=None, ip=None
+    ):
+        if isinstance(value, int):
+            super().__init__(IntegerAttr.get(result, value), loc=loc, ip=ip)
+        elif isinstance(value, float):
+            super().__init__(FloatAttr.get(result, value), loc=loc, ip=ip)
+        else:
+            raise NotImplementedError(f"Building `arith.constant` not supported for {result=} {value=}")
 ```
 
-Then for each generated concrete `OpView` subclass, it will apply a decorator
-like:
+which enables building an instance of `arith.constant` like so:
 
 ```python
- at _ods_cext.register_operation(_Dialect)
- at _ods_extend_opview_class(_ods_ext_module)
-class FuncOp(_ods_ir.OpView):
+from mlir.ir import F32Type
+
+a = ConstantOpExt(F32Type.get(), 42.42)
+b = ConstantOpExt(IntegerType.get_signless(32), 42)
 ```
 
-See the `_ods_common.py` `extend_opview_class` function for details of the
-mechanism. At a high level:
-
-*   If the extension module exists, locate an extension class for the op (in
-    this example, `FuncOp`):
-    *   First by looking for an attribute with the exact name in the extension
-        module.
-    *   Falling back to calling a `select_opview_mixin(parent_opview_cls)`
-        function defined in the extension module.
-*   If a mixin class is found, a new subclass is dynamically created that
-    multiply inherits from `({_builtin_ops_ext.FuncOp},
-    _builtin_ops_gen.FuncOp)`.
-
-The mixin class should not inherit from anything (i.e. directly extends `object`
-only). The facility is typically used to define custom `__init__` methods,
-properties, instance methods and static methods. Due to the inheritance
-ordering, the mixin class can act as though it extends the generated `OpView`
-subclass in most contexts (i.e. `issubclass(_builtin_ops_ext.FuncOp, OpView)`
-will return `False` but usage generally allows you treat it as duck typed as an
-`OpView`).
-
-There are a couple of recommendations, given how the class hierarchy is defined:
-
-*   For static methods that need to instantiate the actual "leaf" op (which is
-    dynamically generated and would result in circular dependencies to try to
-    reference by name), prefer to use `@classmethod` and the concrete subclass
-    will be provided as your first `cls` argument. See
-    `_builtin_ops_ext.FuncOp.from_py_func` as an example.
-*   If seeking to replace the generated `__init__` method entirely, you may
-    actually want to invoke the super-super-class `mlir.ir.OpView` constructor
-    directly, as it takes an `mlir.ir.Operation`, which is likely what you are
-    constructing (i.e. the generated `__init__` method likely adds more API
-    constraints than you want to expose in a custom builder).
-
-A pattern that comes up frequently is wanting to provide a sugared `__init__`
-method which has optional or type-polymorphism/implicit conversions but to
-otherwise want to invoke the default op building logic. For such cases, it is
-recommended to use an idiom such as:
+Note, three key aspects of the extension mechanism in this example:
+
+1. `ConstantOpExt` directly inherits from the generated `ConstantOp`;
+2. in this, simplest, case all that's required is a call to the super class' initializer, i.e., `super().__init__(...)`;
+3. in order to register `ConstantOpExt` as the preferred `OpView` that is returned by `mlir.ir.Operation.opview` (see [Operations, Regions and Blocks](#operations-regions-and-blocks))
+   we decorate the class with `@_cext.register_operation(_Dialect, replace=True)`, **where the `replace=True` must be used**.
+
+In some more complex cases it might be necessary to explicitly build the `OpView` through `OpView.build_generic` (see [Default Builder](#default-builder)), just as is performed by the generated builders.
+I.e., we must call `OpView.build_generic` **and pass the result to `OpView.__init__`**, where the small issue becomes that the latter is already overridden by the generated builder.
+Thus, we must call a method of a super class' super class (the "grandparent"); for example:
 
 ```python
-  def __init__(self, sugar, spice, *, loc=None, ip=None):
-    ... massage into result_type, operands, attributes ...
-    OpView.__init__(self, self.build_generic(
-        results=[result_type],
-        operands=operands,
-        attributes=attributes,
-        loc=loc,
-        ip=ip))
+from mlir.dialects._scf_ops_gen import _Dialect, ForOp
+from mlir.dialects._ods_common import _cext
+
+ at _cext.register_operation(_Dialect, replace=True)
+class ForOpExt(ForOp):
+    def __init__(self, lower_bound, upper_bound, step, iter_args, *, loc=None, ip=None):
+        ...
+        super(ForOp, self).__init__(self.build_generic(...))
 ```
 
-Refer to the documentation for `build_generic` for more information.
+where `OpView.__init__` is called via `super(ForOp, self).__init__`.
+Note, there are alternatives ways to implement this (e.g., explicitly writing `OpView.__init__`); see any discussion on Python inheritance.
 
 ## Providing Python bindings for a dialect
 
diff --git a/mlir/lib/Bindings/Python/Globals.h b/mlir/lib/Bindings/Python/Globals.h
index 97cd70089a2e965..21899bdce22e810 100644
--- a/mlir/lib/Bindings/Python/Globals.h
+++ b/mlir/lib/Bindings/Python/Globals.h
@@ -77,10 +77,10 @@ class PyGlobals {
                            pybind11::object pyClass);
 
   /// Adds a concrete implementation operation class.
-  /// Raises an exception if the mapping already exists.
+  /// Raises an exception if the mapping already exists and replace == false.
   /// This is intended to be called by implementation code.
   void registerOperationImpl(const std::string &operationName,
-                             pybind11::object pyClass);
+                             pybind11::object pyClass, bool replace = false);
 
   /// Returns the custom Attribute builder for Attribute kind.
   std::optional<pybind11::function>
diff --git a/mlir/lib/Bindings/Python/IRModule.cpp b/mlir/lib/Bindings/Python/IRModule.cpp
index 2cc66277abee0f0..a1c8ab7a09ce155 100644
--- a/mlir/lib/Bindings/Python/IRModule.cpp
+++ b/mlir/lib/Bindings/Python/IRModule.cpp
@@ -96,9 +96,9 @@ void PyGlobals::registerDialectImpl(const std::string &dialectNamespace,
 }
 
 void PyGlobals::registerOperationImpl(const std::string &operationName,
-                                      py::object pyClass) {
+                                      py::object pyClass, bool replace) {
   py::object &found = operationClassMap[operationName];
-  if (found) {
+  if (found && !replace) {
     throw std::runtime_error((llvm::Twine("Operation '") + operationName +
                               "' is already registered.")
                                  .str());
diff --git a/mlir/lib/Bindings/Python/MainModule.cpp b/mlir/lib/Bindings/Python/MainModule.cpp
index cdddfbe50606d05..a936becf67bea75 100644
--- a/mlir/lib/Bindings/Python/MainModule.cpp
+++ b/mlir/lib/Bindings/Python/MainModule.cpp
@@ -41,7 +41,7 @@ PYBIND11_MODULE(_mlir, m) {
            "dialect_namespace"_a, "dialect_class"_a,
            "Testing hook for directly registering a dialect")
       .def("_register_operation_impl", &PyGlobals::registerOperationImpl,
-           "operation_name"_a, "operation_class"_a,
+           "operation_name"_a, "operation_class"_a, "replace"_a = false,
            "Testing hook for directly registering an operation");
 
   // Aside from making the globals accessible to python, having python manage
@@ -63,12 +63,13 @@ PYBIND11_MODULE(_mlir, m) {
       "Class decorator for registering a custom Dialect wrapper");
   m.def(
       "register_operation",
-      [](const py::object &dialectClass) -> py::cpp_function {
+      [](const py::object &dialectClass, bool replace) -> py::cpp_function {
         return py::cpp_function(
-            [dialectClass](py::object opClass) -> py::object {
+            [dialectClass, replace](py::object opClass) -> py::object {
               std::string operationName =
                   opClass.attr("OPERATION_NAME").cast<std::string>();
-              PyGlobals::get().registerOperationImpl(operationName, opClass);
+              PyGlobals::get().registerOperationImpl(operationName, opClass,
+                                                     replace);
 
               // Dict-stuff the new opClass by name onto the dialect class.
               py::object opClassName = opClass.attr("__name__");
@@ -76,7 +77,7 @@ PYBIND11_MODULE(_mlir, m) {
               return opClass;
             });
       },
-      "dialect_class"_a,
+      "dialect_class"_a, "replace"_a = false,
       "Produce a class decorator for registering an Operation class as part of "
       "a dialect");
   m.def(
diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt
index c7b3c283a6b6dc1..88e6e13602d291a 100644
--- a/mlir/python/CMakeLists.txt
+++ b/mlir/python/CMakeLists.txt
@@ -52,7 +52,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/AffineOps.td
   SOURCES
     dialects/affine.py
-    dialects/_affine_ops_ext.py
   DIALECT_NAME affine
   GEN_ENUM_BINDINGS)
 
@@ -78,7 +77,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/BufferizationOps.td
   SOURCES
     dialects/bufferization.py
-    dialects/_bufferization_ops_ext.py
   DIALECT_NAME bufferization
   GEN_ENUM_BINDINGS_TD_FILE
     "../../include/mlir/Dialect/Bufferization/IR/BufferizationEnums.td"
@@ -90,7 +88,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/BuiltinOps.td
   SOURCES
     dialects/builtin.py
-    dialects/_builtin_ops_ext.py
   DIALECT_NAME builtin)
 
 declare_mlir_dialect_python_bindings(
@@ -115,7 +112,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/FuncOps.td
   SOURCES
     dialects/func.py
-    dialects/_func_ops_ext.py
   DIALECT_NAME func)
 
 declare_mlir_dialect_python_bindings(
@@ -131,7 +127,6 @@ declare_mlir_dialect_python_bindings(
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
   TD_FILE dialects/LinalgOps.td
   SOURCES
-    dialects/_linalg_ops_ext.py
   SOURCES_GLOB
     dialects/linalg/*.py
   DIALECT_NAME linalg
@@ -152,7 +147,6 @@ ADD_TO_PARENT MLIRPythonSources.Dialects
 ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
   TD_FILE dialects/TransformPDLExtensionOps.td
   SOURCES
-    dialects/_transform_pdl_extension_ops_ext.py
     dialects/transform/pdl.py
   DIALECT_NAME transform
   EXTENSION_NAME transform_pdl_extension)
@@ -162,7 +156,6 @@ declare_mlir_dialect_python_bindings(
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
   TD_FILE dialects/TransformOps.td
   SOURCES
-    dialects/_transform_ops_ext.py
     dialects/transform/__init__.py
     _mlir_libs/_mlir/dialects/transform/__init__.pyi
   DIALECT_NAME transform
@@ -175,7 +168,6 @@ declare_mlir_dialect_extension_python_bindings(
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
   TD_FILE dialects/BufferizationTransformOps.td
   SOURCES
-    dialects/_bufferization_transform_ops_ext.py
     dialects/transform/bufferization.py
   DIALECT_NAME transform
   EXTENSION_NAME bufferization_transform)
@@ -185,7 +177,6 @@ declare_mlir_dialect_extension_python_bindings(
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
   TD_FILE dialects/GPUTransformOps.td
   SOURCES
-    dialects/_gpu_transform_ops_ext.py
     dialects/transform/gpu.py
   DIALECT_NAME transform
   EXTENSION_NAME gpu_transform)
@@ -195,7 +186,6 @@ declare_mlir_dialect_extension_python_bindings(
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
   TD_FILE dialects/SCFLoopTransformOps.td
   SOURCES
-    dialects/_loop_transform_ops_ext.py
     dialects/transform/loop.py
   DIALECT_NAME transform
   EXTENSION_NAME loop_transform)
@@ -205,7 +195,6 @@ declare_mlir_dialect_extension_python_bindings(
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
   TD_FILE dialects/MemRefTransformOps.td
   SOURCES
-    dialects/_memref_transform_ops_ext.py
     dialects/transform/memref.py
   DIALECT_NAME transform
   EXTENSION_NAME memref_transform)
@@ -224,7 +213,6 @@ declare_mlir_dialect_extension_python_bindings(
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
   TD_FILE dialects/LinalgStructuredTransformOps.td
   SOURCES
-    dialects/_structured_transform_ops_ext.py
     dialects/transform/structured.py
   DIALECT_NAME transform
   EXTENSION_NAME structured_transform
@@ -246,7 +234,6 @@ declare_mlir_dialect_extension_python_bindings(
   ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir"
   TD_FILE dialects/TensorTransformOps.td
   SOURCES
-    dialects/_tensor_transform_ops_ext.py
     dialects/transform/tensor.py
   DIALECT_NAME transform
   EXTENSION_NAME tensor_transform)
@@ -276,7 +263,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/ArithOps.td
   SOURCES
     dialects/arith.py
-    dialects/_arith_ops_ext.py
   DIALECT_NAME arith
   GEN_ENUM_BINDINGS)
 
@@ -286,7 +272,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/MemRefOps.td
   SOURCES
     dialects/memref.py
-    dialects/_memref_ops_ext.py
   DIALECT_NAME memref)
 
 declare_mlir_dialect_python_bindings(
@@ -295,7 +280,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/MLProgramOps.td
   SOURCES
     dialects/ml_program.py
-    dialects/_ml_program_ops_ext.py
   DIALECT_NAME ml_program)
 
 declare_mlir_dialect_python_bindings(
@@ -339,7 +323,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/PDLOps.td
   SOURCES
     dialects/pdl.py
-    dialects/_pdl_ops_ext.py
     _mlir_libs/_mlir/dialects/pdl.pyi
   DIALECT_NAME pdl)
 
@@ -357,7 +340,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/SCFOps.td
   SOURCES
     dialects/scf.py
-    dialects/_scf_ops_ext.py
   DIALECT_NAME scf)
 
 declare_mlir_dialect_python_bindings(
@@ -383,7 +365,6 @@ declare_mlir_dialect_python_bindings(
   TD_FILE dialects/TensorOps.td
   SOURCES
     dialects/tensor.py
-    dialects/_tensor_ops_ext.py
   DIALECT_NAME tensor)
 
 declare_mlir_dialect_python_bindings(
diff --git a/mlir/python/mlir/dialects/_affine_ops_ext.py b/mlir/python/mlir/dialects/_affine_ops_ext.py
deleted file mode 100644
index dc465ce7aa1e5f9..000000000000000
--- a/mlir/python/mlir/dialects/_affine_ops_ext.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ._ods_common import get_op_result_or_value as _get_op_result_or_value
-    from ._ods_common import get_op_results_or_values as _get_op_results_or_values
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, Sequence, Union
-
-
-class AffineStoreOp:
-    """Specialization for the Affine store operation."""
-
-    def __init__(
-        self,
-        value: Union[Operation, OpView, Value],
-        memref: Union[Operation, OpView, Value],
-        map: AffineMap=None,
-        *,
-        map_operands=None,
-        loc=None,
-        ip=None
-    ):
-        """Creates an affine store operation.
-
-        - `value`: the value to store into the memref.
-        - `memref`: the buffer to store into.
-        - `map`: the affine map that maps the map_operands to the index of the 
-          memref.
-        - `map_operands`: the list of arguments to substitute the dimensions, 
-          then symbols in the affine map, in increasing order.
-        """
-        map = map if map is not None else []
-        map_operands = map_operands if map_operands is not None else []
-        operands = [
-            _get_op_result_or_value(value),
-            _get_op_result_or_value(memref),
-            *[_get_op_result_or_value(op) for op in map_operands]
-        ]
-        results = []
-        attributes = {"map": AffineMapAttr.get(map)}
-        regions = None
-        _ods_successors = None
-        super().__init__(self.build_generic(
-            attributes=attributes,
-            results=results,
-            operands=operands,
-            successors=_ods_successors,
-            regions=regions,
-            loc=loc,
-            ip=ip
-        ))
diff --git a/mlir/python/mlir/dialects/_arith_ops_ext.py b/mlir/python/mlir/dialects/_arith_ops_ext.py
deleted file mode 100644
index df38f871710fe8f..000000000000000
--- a/mlir/python/mlir/dialects/_arith_ops_ext.py
+++ /dev/null
@@ -1,69 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ._ods_common import get_default_loc_context as _get_default_loc_context
-
-    from typing import Any, List, Union
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-
-def _isa(obj: Any, cls: type):
-    try:
-        cls(obj)
-    except ValueError:
-        return False
-    return True
-
-
-def _is_any_of(obj: Any, classes: List[type]):
-    return any(_isa(obj, cls) for cls in classes)
-
-
-def _is_integer_like_type(type: Type):
-    return _is_any_of(type, [IntegerType, IndexType])
-
-
-def _is_float_type(type: Type):
-    return _is_any_of(type, [BF16Type, F16Type, F32Type, F64Type])
-
-
-class ConstantOp:
-    """Specialization for the constant op class."""
-
-    def __init__(
-        self, result: Type, value: Union[int, float, Attribute], *, loc=None, ip=None
-    ):
-        if isinstance(value, int):
-            super().__init__(IntegerAttr.get(result, value), loc=loc, ip=ip)
-        elif isinstance(value, float):
-            super().__init__(FloatAttr.get(result, value), loc=loc, ip=ip)
-        else:
-            super().__init__(value, loc=loc, ip=ip)
-
-    @classmethod
-    def create_index(cls, value: int, *, loc=None, ip=None):
-        """Create an index-typed constant."""
-        return cls(
-            IndexType.get(context=_get_default_loc_context(loc)), value, loc=loc, ip=ip
-        )
-
-    @property
-    def type(self):
-        return self.results[0].type
-
-    @property
-    def value(self):
-        return Attribute(self.operation.attributes["value"])
-
-    @property
-    def literal_value(self) -> Union[int, float]:
-        if _is_integer_like_type(self.type):
-            return IntegerAttr(self.value).value
-        elif _is_float_type(self.type):
-            return FloatAttr(self.value).value
-        else:
-            raise ValueError("only integer and float constants have literal values")
diff --git a/mlir/python/mlir/dialects/_bufferization_ops_ext.py b/mlir/python/mlir/dialects/_bufferization_ops_ext.py
deleted file mode 100644
index 1066cb4c775cab9..000000000000000
--- a/mlir/python/mlir/dialects/_bufferization_ops_ext.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from typing import Sequence, Union
-    from ..ir import *
-    from ._ods_common import get_default_loc_context
-
-    from typing import Any, List, Union
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-
-class AllocTensorOp:
-    """Extends the bufferization.alloc_tensor op."""
-
-    def __init__(
-        self,
-        tensor_type: Type,
-        dynamic_sizes: Sequence[Value],
-        copy: Value,
-        size_hint: Value,
-        escape: BoolAttr,
-        *,
-        loc=None,
-        ip=None
-    ):
-        """Constructs an `alloc_tensor` with static and/or dynamic sizes."""
-        context = get_default_loc_context(loc)
-        attributes = {}
-        if escape:
-            attributes["escape"] = escape
-        op = self.build_generic(
-            results=[tensor_type],
-            operands=[dynamic_sizes, copy, size_hint],
-            attributes=attributes,
-            loc=loc,
-            ip=ip,
-        )
-        OpView.__init__(self, op)
diff --git a/mlir/python/mlir/dialects/_bufferization_transform_ops_ext.py b/mlir/python/mlir/dialects/_bufferization_transform_ops_ext.py
deleted file mode 100644
index 7e6c1b81cb350b7..000000000000000
--- a/mlir/python/mlir/dialects/_bufferization_transform_ops_ext.py
+++ /dev/null
@@ -1,128 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ..dialects import transform
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from enum import Enum
-from typing import Optional, overload, Union
-
-
-class EmptyTensorToAllocTensorOp:
-    """Specialization for EmptyTensorToAllocTensorOp class."""
-
-    @overload
-    def __init__(
-        self,
-        transformed_type: Type,
-        target: Union[Operation, OpView, Value],
-        *,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    @overload
-    def __init__(self, target: Union[Operation, OpView, Value], *, loc=None, ip=None):
-        ...
-
-    def __init__(
-        self,
-        transformed_type_or_target: Type,
-        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
-        *,
-        loc=None,
-        ip=None
-    ):
-        if isinstance(transformed_type_or_target, Type):
-            transformed_type = transformed_type_or_target
-            target = target_or_none
-        else:
-            transformed_type = transform.OperationType.get("bufferization.alloc_tensor")
-            target = transformed_type_or_target
-
-        super().__init__(
-            transformed_type,
-            target,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class OneShotBufferizeOp:
-    """Specialization for OneShotBufferizeOp class."""
-
-    @overload
-    def __init__(
-        self,
-        transformed_type: Type,
-        target: Union[Operation, OpView, Value],
-        *,
-        allow_return_allocs_from_loops: Optional[bool] = None,
-        allow_unknown_ops: Optional[bool] = None,
-        bufferize_function_boundaries: Optional[bool] = None,
-        function_boundary_type_conversion: Optional[Enum] = None,
-        memcpy_op: Optional[str] = None,
-        print_conflicts: Optional[bool] = None,
-        test_analysis_only: Optional[bool] = None,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    @overload
-    def __init__(
-        self,
-        target: Union[Operation, OpView, Value],
-        *,
-        allow_return_allocs_from_loops: Optional[bool] = None,
-        allow_unknown_ops: Optional[bool] = None,
-        bufferize_function_boundaries: Optional[bool] = None,
-        function_boundary_type_conversion: Optional[Enum] = None,
-        memcpy_op: Optional[str] = None,
-        print_conflicts: Optional[bool] = None,
-        test_analysis_only: Optional[bool] = None,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    def __init__(
-        self,
-        transformed_type_or_target: Type,
-        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
-        *,
-        allow_return_allocs_from_loops: Optional[bool] = None,
-        allow_unknown_ops: Optional[bool] = None,
-        bufferize_function_boundaries: Optional[bool] = None,
-        function_boundary_type_conversion: Optional[Enum] = None,
-        memcpy_op: Optional[str] = None,
-        print_conflicts: Optional[bool] = None,
-        test_analysis_only: Optional[bool] = None,
-        loc=None,
-        ip=None
-    ):
-        if isinstance(transformed_type_or_target, Type):
-            transformed_type = transformed_type_or_target
-            target = target_or_none
-        else:
-            transformed_type = transform.AnyOpType.get()
-            target = transformed_type_or_target
-
-        super().__init__(
-            transformed_type,
-            target,
-            allow_return_allocs_from_loops=allow_return_allocs_from_loops,
-            allow_unknown_ops=allow_unknown_ops,
-            bufferize_function_boundaries=bufferize_function_boundaries,
-            function_boundary_type_conversion=function_boundary_type_conversion,
-            memcpy_op=memcpy_op,
-            print_conflicts=print_conflicts,
-            test_analysis_only=test_analysis_only,
-            loc=loc,
-            ip=ip,
-        )
diff --git a/mlir/python/mlir/dialects/_builtin_ops_ext.py b/mlir/python/mlir/dialects/_builtin_ops_ext.py
deleted file mode 100644
index 27a60123050acb4..000000000000000
--- a/mlir/python/mlir/dialects/_builtin_ops_ext.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-
-class ModuleOp:
-    """Specialization for the module op class."""
-
-    def __init__(self, *, loc=None, ip=None):
-        super().__init__(self.build_generic(results=[], operands=[], loc=loc, ip=ip))
-        body = self.regions[0].blocks.append()
-
-    @property
-    def body(self):
-        return self.regions[0].blocks[0]
diff --git a/mlir/python/mlir/dialects/_func_ops_ext.py b/mlir/python/mlir/dialects/_func_ops_ext.py
deleted file mode 100644
index 6d264c33f1f9dae..000000000000000
--- a/mlir/python/mlir/dialects/_func_ops_ext.py
+++ /dev/null
@@ -1,319 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ._ods_common import get_default_loc_context as _get_default_loc_context
-
-    import inspect
-
-    from typing import Any, List, Optional, Sequence, Union
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-ARGUMENT_ATTRIBUTE_NAME = "arg_attrs"
-RESULT_ATTRIBUTE_NAME = "res_attrs"
-
-
-class ConstantOp:
-    """Specialization for the constant op class."""
-
-    def __init__(self, result: Type, value: Attribute, *, loc=None, ip=None):
-        super().__init__(result, value, loc=loc, ip=ip)
-
-    @property
-    def type(self):
-        return self.results[0].type
-
-
-class FuncOp:
-    """Specialization for the func op class."""
-
-    def __init__(
-        self, name, type, *, visibility=None, body_builder=None, loc=None, ip=None
-    ):
-        """
-        Create a FuncOp with the provided `name`, `type`, and `visibility`.
-        - `name` is a string representing the function name.
-        - `type` is either a FunctionType or a pair of list describing inputs and
-          results.
-        - `visibility` is a string matching `public`, `private`, or `nested`. None
-          implies private visibility.
-        - `body_builder` is an optional callback, when provided a new entry block
-          is created and the callback is invoked with the new op as argument within
-          an InsertionPoint context already set for the block. The callback is
-          expected to insert a terminator in the block.
-        """
-        sym_name = StringAttr.get(str(name))
-
-        # If the type is passed as a tuple, build a FunctionType on the fly.
-        if isinstance(type, tuple):
-            type = FunctionType.get(inputs=type[0], results=type[1])
-
-        type = TypeAttr.get(type)
-        sym_visibility = (
-            StringAttr.get(str(visibility)) if visibility is not None else None
-        )
-        super().__init__(sym_name, type, sym_visibility=sym_visibility, loc=loc, ip=ip)
-        if body_builder:
-            entry_block = self.add_entry_block()
-            with InsertionPoint(entry_block):
-                body_builder(self)
-
-    @property
-    def is_external(self):
-        return len(self.regions[0].blocks) == 0
-
-    @property
-    def body(self):
-        return self.regions[0]
-
-    @property
-    def type(self):
-        return FunctionType(TypeAttr(self.attributes["function_type"]).value)
-
-    @property
-    def visibility(self):
-        return self.attributes["sym_visibility"]
-
-    @property
-    def name(self) -> StringAttr:
-        return StringAttr(self.attributes["sym_name"])
-
-    @property
-    def entry_block(self):
-        if self.is_external:
-            raise IndexError("External function does not have a body")
-        return self.regions[0].blocks[0]
-
-    def add_entry_block(self, arg_locs: Optional[Sequence[Location]] = None):
-        """
-        Add an entry block to the function body using the function signature to
-        infer block arguments.
-        Returns the newly created block
-        """
-        if not self.is_external:
-            raise IndexError("The function already has an entry block!")
-        self.body.blocks.append(*self.type.inputs, arg_locs=arg_locs)
-        return self.body.blocks[0]
-
-    @property
-    def arg_attrs(self):
-        return ArrayAttr(self.attributes[ARGUMENT_ATTRIBUTE_NAME])
-
-    @arg_attrs.setter
-    def arg_attrs(self, attribute: Union[ArrayAttr, list]):
-        if isinstance(attribute, ArrayAttr):
-            self.attributes[ARGUMENT_ATTRIBUTE_NAME] = attribute
-        else:
-            self.attributes[ARGUMENT_ATTRIBUTE_NAME] = ArrayAttr.get(
-                attribute, context=self.context
-            )
-
-    @property
-    def arguments(self):
-        return self.entry_block.arguments
-
-    @property
-    def result_attrs(self):
-        return self.attributes[RESULT_ATTRIBUTE_NAME]
-
-    @result_attrs.setter
-    def result_attrs(self, attribute: ArrayAttr):
-        self.attributes[RESULT_ATTRIBUTE_NAME] = attribute
-
-    @classmethod
-    def from_py_func(
-        FuncOp,
-        *inputs: Type,
-        results: Optional[Sequence[Type]] = None,
-        name: Optional[str] = None,
-    ):
-        """Decorator to define an MLIR FuncOp specified as a python function.
-
-        Requires that an `mlir.ir.InsertionPoint` and `mlir.ir.Location` are
-        active for the current thread (i.e. established in a `with` block).
-
-        When applied as a decorator to a Python function, an entry block will
-        be constructed for the FuncOp with types as specified in `*inputs`. The
-        block arguments will be passed positionally to the Python function. In
-        addition, if the Python function accepts keyword arguments generally or
-        has a corresponding keyword argument, the following will be passed:
-          * `func_op`: The `func` op being defined.
-
-        By default, the function name will be the Python function `__name__`. This
-        can be overriden by passing the `name` argument to the decorator.
-
-        If `results` is not specified, then the decorator will implicitly
-        insert a `ReturnOp` with the `Value`'s returned from the decorated
-        function. It will also set the `FuncOp` type with the actual return
-        value types. If `results` is specified, then the decorated function
-        must return `None` and no implicit `ReturnOp` is added (nor are the result
-        types updated). The implicit behavior is intended for simple, single-block
-        cases, and users should specify result types explicitly for any complicated
-        cases.
-
-        The decorated function can further be called from Python and will insert
-        a `CallOp` at the then-current insertion point, returning either None (
-        if no return values), a unary Value (for one result), or a list of Values).
-        This mechanism cannot be used to emit recursive calls (by construction).
-        """
-
-        def decorator(f):
-            from . import func
-
-            # Introspect the callable for optional features.
-            sig = inspect.signature(f)
-            has_arg_func_op = False
-            for param in sig.parameters.values():
-                if param.kind == param.VAR_KEYWORD:
-                    has_arg_func_op = True
-                if param.name == "func_op" and (
-                    param.kind == param.POSITIONAL_OR_KEYWORD
-                    or param.kind == param.KEYWORD_ONLY
-                ):
-                    has_arg_func_op = True
-
-            # Emit the FuncOp.
-            implicit_return = results is None
-            symbol_name = name or f.__name__
-            function_type = FunctionType.get(
-                inputs=inputs, results=[] if implicit_return else results
-            )
-            func_op = FuncOp(name=symbol_name, type=function_type)
-            with InsertionPoint(func_op.add_entry_block()):
-                func_args = func_op.entry_block.arguments
-                func_kwargs = {}
-                if has_arg_func_op:
-                    func_kwargs["func_op"] = func_op
-                return_values = f(*func_args, **func_kwargs)
-                if not implicit_return:
-                    return_types = list(results)
-                    assert return_values is None, (
-                        "Capturing a python function with explicit `results=` "
-                        "requires that the wrapped function returns None."
-                    )
-                else:
-                    # Coerce return values, add ReturnOp and rewrite func type.
-                    if return_values is None:
-                        return_values = []
-                    elif isinstance(return_values, tuple):
-                        return_values = list(return_values)
-                    elif isinstance(return_values, Value):
-                        # Returning a single value is fine, coerce it into a list.
-                        return_values = [return_values]
-                    elif isinstance(return_values, OpView):
-                        # Returning a single operation is fine, coerce its results a list.
-                        return_values = return_values.operation.results
-                    elif isinstance(return_values, Operation):
-                        # Returning a single operation is fine, coerce its results a list.
-                        return_values = return_values.results
-                    else:
-                        return_values = list(return_values)
-                    func.ReturnOp(return_values)
-                    # Recompute the function type.
-                    return_types = [v.type for v in return_values]
-                    function_type = FunctionType.get(
-                        inputs=inputs, results=return_types
-                    )
-                    func_op.attributes["function_type"] = TypeAttr.get(function_type)
-
-            def emit_call_op(*call_args):
-                call_op = func.CallOp(
-                    return_types, FlatSymbolRefAttr.get(symbol_name), call_args
-                )
-                if return_types is None:
-                    return None
-                elif len(return_types) == 1:
-                    return call_op.result
-                else:
-                    return call_op.results
-
-            wrapped = emit_call_op
-            wrapped.__name__ = f.__name__
-            wrapped.func_op = func_op
-            return wrapped
-
-        return decorator
-
-
-class CallOp:
-    """Specialization for the call op class."""
-
-    def __init__(
-        self,
-        calleeOrResults: Union[FuncOp, List[Type]],
-        argumentsOrCallee: Union[List, FlatSymbolRefAttr, str],
-        arguments: Optional[List] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        """Creates an call operation.
-
-        The constructor accepts three different forms:
-
-          1. A function op to be called followed by a list of arguments.
-          2. A list of result types, followed by the name of the function to be
-             called as string, following by a list of arguments.
-          3. A list of result types, followed by the name of the function to be
-             called as symbol reference attribute, followed by a list of arguments.
-
-        For example
-
-            f = func.FuncOp("foo", ...)
-            func.CallOp(f, [args])
-            func.CallOp([result_types], "foo", [args])
-
-        In all cases, the location and insertion point may be specified as keyword
-        arguments if not provided by the surrounding context managers.
-        """
-
-        # TODO: consider supporting constructor "overloads", e.g., through a custom
-        # or pybind-provided metaclass.
-        if isinstance(calleeOrResults, FuncOp):
-            if not isinstance(argumentsOrCallee, list):
-                raise ValueError(
-                    "when constructing a call to a function, expected "
-                    + "the second argument to be a list of call arguments, "
-                    + f"got {type(argumentsOrCallee)}"
-                )
-            if arguments is not None:
-                raise ValueError(
-                    "unexpected third argument when constructing a call"
-                    + "to a function"
-                )
-
-            super().__init__(
-                calleeOrResults.type.results,
-                FlatSymbolRefAttr.get(
-                    calleeOrResults.name.value, context=_get_default_loc_context(loc)
-                ),
-                argumentsOrCallee,
-                loc=loc,
-                ip=ip,
-            )
-            return
-
-        if isinstance(argumentsOrCallee, list):
-            raise ValueError(
-                "when constructing a call to a function by name, "
-                + "expected the second argument to be a string or a "
-                + f"FlatSymbolRefAttr, got {type(argumentsOrCallee)}"
-            )
-
-        if isinstance(argumentsOrCallee, FlatSymbolRefAttr):
-            super().__init__(
-                calleeOrResults, argumentsOrCallee, arguments, loc=loc, ip=ip
-            )
-        elif isinstance(argumentsOrCallee, str):
-            super().__init__(
-                calleeOrResults,
-                FlatSymbolRefAttr.get(
-                    argumentsOrCallee, context=_get_default_loc_context(loc)
-                ),
-                arguments,
-                loc=loc,
-                ip=ip,
-            )
diff --git a/mlir/python/mlir/dialects/_gpu_transform_ops_ext.py b/mlir/python/mlir/dialects/_gpu_transform_ops_ext.py
deleted file mode 100644
index ba72bac3a15264d..000000000000000
--- a/mlir/python/mlir/dialects/_gpu_transform_ops_ext.py
+++ /dev/null
@@ -1,124 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ..dialects import transform
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, Sequence, Union, overload
-
-
-class MapForallToBlocks:
-    """Specialization for MapForallToBlocks class."""
-
-    @overload
-    def __init__(
-        self,
-        result_type: Type,
-        target: Union[Operation, OpView, Value],
-        *,
-        grid_dims: Optional[Union[Sequence[int], Attribute]] = None,
-        generate_gpu_launch: Optional[Union[bool, Attribute]] = None,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    @overload
-    def __init__(
-        self,
-        target: Union[Operation, OpView, Value],
-        *,
-        grid_dims: Optional[Union[Sequence[int], Attribute]] = None,
-        generate_gpu_launch: Optional[Union[bool, Attribute]] = None,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    def __init__(
-        self,
-        result_type_or_target: Union[Operation, OpView, Type, Value],
-        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
-        *,
-        grid_dims: Optional[Union[Sequence[int], Attribute]] = None,
-        generate_gpu_launch: Optional[Union[bool, Attribute]] = None,
-        loc=None,
-        ip=None
-    ):
-        if isinstance(result_type_or_target, Type):
-            result_type = result_type_or_target
-            target = target_or_none
-        else:
-            result_type = transform.AnyOpType.get()
-            target = result_type_or_target
-
-        super().__init__(
-            result_type,
-            target,
-            grid_dims=grid_dims,
-            generate_gpu_launch=generate_gpu_launch,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class MapNestedForallToThreads:
-    """Specialization for MapNestedForallToThreads class."""
-
-    @overload
-    def __init__(
-        self,
-        result_type: Type,
-        target: Union[Operation, OpView, Value],
-        *,
-        block_dims: Optional[Sequence[int]] = None,
-        warp_size: Optional[Sequence[int]] = None,
-        sync_after_distribute: Optional[bool] = None,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    @overload
-    def __init__(
-        self,
-        target: Union[Operation, OpView, Value],
-        *,
-        block_dims: Optional[Sequence[int]] = None,
-        warp_size: Optional[Sequence[int]] = None,
-        sync_after_distribute: Optional[bool] = None,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    def __init__(
-        self,
-        result_type_or_target: Union[Operation, OpView, Value, Type],
-        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
-        *,
-        block_dims: Optional[Union[Sequence[int], Attribute]] = None,
-        warp_size: Optional[Union[Sequence[int], Attribute]] = None,
-        sync_after_distribute: Optional[bool] = None,
-        loc=None,
-        ip=None
-    ):
-        if isinstance(result_type_or_target, Type):
-            result_type = result_type_or_target
-            target = target_or_none
-        else:
-            result_type = result_type_or_target.type
-            target = result_type_or_target
-        super().__init__(
-            result_type,
-            target,
-            block_dims=block_dims,
-            warp_size=warp_size,
-            sync_after_distribute=sync_after_distribute,
-            loc=loc,
-            ip=ip,
-        )
diff --git a/mlir/python/mlir/dialects/_linalg_ops_ext.py b/mlir/python/mlir/dialects/_linalg_ops_ext.py
deleted file mode 100644
index 3f6d854ca3e2b14..000000000000000
--- a/mlir/python/mlir/dialects/_linalg_ops_ext.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from typing import Optional, Sequence, Union
-    from ..ir import *
-    from ._ods_common import get_default_loc_context
-    from .._mlir_libs._mlirDialectsLinalg import fill_builtin_region
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from ._ods_common import get_op_result_or_value as _get_op_result_or_value
-
-
-def isa(cls: Type, ty: Type):
-    try:
-        cls(ty)
-        return True
-    except ValueError:
-        return False
-
-
-class StructuredOpMixin:
-    """All structured ops use the same mixin class."""
-
-    def __init__(self, inputs, outputs=(), results=(), loc=None, ip=None):
-        super().__init__(
-            self.build_generic(
-                results=list(results),
-                operands=[list(inputs), list(outputs)],
-                loc=loc,
-                ip=ip,
-            )
-        )
-
-
-def select_opview_mixin(parent_opview_cls):
-    # TODO: This shouldn't be a heuristic: we should have a way to annotate
-    # the OpView to note that it is a structured op.
-    if (
-        "__init__" not in parent_opview_cls.__dict__
-        and hasattr(parent_opview_cls, "inputs")
-        and hasattr(parent_opview_cls, "outputs")
-        and hasattr(parent_opview_cls, "result_tensors")
-    ):
-        return StructuredOpMixin
diff --git a/mlir/python/mlir/dialects/_loop_transform_ops_ext.py b/mlir/python/mlir/dialects/_loop_transform_ops_ext.py
deleted file mode 100644
index 1cdb2b9e77b5afe..000000000000000
--- a/mlir/python/mlir/dialects/_loop_transform_ops_ext.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ._ods_common import get_op_result_or_value as _get_op_result_or_value
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, Union
-
-
-class GetParentForOp:
-    """Extension for GetParentForOp."""
-
-    def __init__(
-        self,
-        result_type: Type,
-        target: Union[Operation, Value],
-        *,
-        num_loops: Optional[int] = None,
-        ip=None,
-        loc=None,
-    ):
-        if num_loops is None:
-            num_loops = 1
-        super().__init__(
-            result_type,
-            _get_op_result_or_value(target),
-            num_loops=num_loops,
-            ip=ip,
-            loc=loc,
-        )
-
-
-class LoopOutlineOp:
-    """Extension for LoopOutlineOp."""
-
-    def __init__(
-        self,
-        function_type: Type,
-        call_type: Type,
-        target: Union[Operation, Value],
-        *,
-        func_name: Union[str, StringAttr],
-        ip=None,
-        loc=None,
-    ):
-        super().__init__(
-            function_type,
-            call_type,
-            _get_op_result_or_value(target),
-            func_name=(
-                func_name
-                if isinstance(func_name, StringAttr)
-                else StringAttr.get(func_name)
-            ),
-            ip=ip,
-            loc=loc,
-        )
-
-
-class LoopPeelOp:
-    """Extension for LoopPeelOp."""
-
-    def __init__(
-        self,
-        main_loop_type: Type,
-        remainder_loop_type: Type,
-        target: Union[Operation, Value],
-        *,
-        fail_if_already_divisible: Union[bool, BoolAttr] = False,
-        ip=None,
-        loc=None,
-    ):
-        super().__init__(
-            main_loop_type,
-            remainder_loop_type,
-            _get_op_result_or_value(target),
-            fail_if_already_divisible=(
-                fail_if_already_divisible
-                if isinstance(fail_if_already_divisible, BoolAttr)
-                else BoolAttr.get(fail_if_already_divisible)
-            ),
-            ip=ip,
-            loc=loc,
-        )
-
-
-class LoopPipelineOp:
-    """Extension for LoopPipelineOp."""
-
-    def __init__(
-        self,
-        result_type: Type,
-        target: Union[Operation, Value],
-        *,
-        iteration_interval: Optional[Union[int, IntegerAttr]] = None,
-        read_latency: Optional[Union[int, IntegerAttr]] = None,
-        ip=None,
-        loc=None,
-    ):
-        if iteration_interval is None:
-            iteration_interval = 1
-        if read_latency is None:
-            read_latency = 10
-        super().__init__(
-            result_type,
-            _get_op_result_or_value(target),
-            iteration_interval=iteration_interval,
-            read_latency=read_latency,
-            ip=ip,
-            loc=loc,
-        )
-
-
-class LoopUnrollOp:
-    """Extension for LoopUnrollOp."""
-
-    def __init__(
-        self,
-        target: Union[Operation, Value],
-        *,
-        factor: Union[int, IntegerAttr],
-        ip=None,
-        loc=None,
-    ):
-        super().__init__(
-            _get_op_result_or_value(target),
-            factor=factor,
-            ip=ip,
-            loc=loc,
-        )
diff --git a/mlir/python/mlir/dialects/_memref_ops_ext.py b/mlir/python/mlir/dialects/_memref_ops_ext.py
deleted file mode 100644
index 825f1a0a7a6faf4..000000000000000
--- a/mlir/python/mlir/dialects/_memref_ops_ext.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ._ods_common import get_op_result_or_value as _get_op_result_or_value
-    from ._ods_common import get_op_results_or_values as _get_op_results_or_values
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, Sequence, Union
-
-
-class LoadOp:
-    """Specialization for the MemRef load operation."""
-
-    def __init__(
-        self,
-        memref: Union[Operation, OpView, Value],
-        indices: Optional[Union[Operation, OpView, Sequence[Value]]] = None,
-        *,
-        loc=None,
-        ip=None
-    ):
-        """Creates a memref load operation.
-
-        Args:
-          memref: the buffer to load from.
-          indices: the list of subscripts, may be empty for zero-dimensional
-            buffers.
-          loc: user-visible location of the operation.
-          ip: insertion point.
-        """
-        indices_resolved = [] if indices is None else _get_op_results_or_values(indices)
-        super().__init__(memref, indices_resolved, loc=loc, ip=ip)
diff --git a/mlir/python/mlir/dialects/_memref_transform_ops_ext.py b/mlir/python/mlir/dialects/_memref_transform_ops_ext.py
deleted file mode 100644
index 1cc00bdcbf381c9..000000000000000
--- a/mlir/python/mlir/dialects/_memref_transform_ops_ext.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ..dialects import transform
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, overload, Union
-
-
-class MemRefAllocaToGlobalOp:
-    """Specialization for MemRefAllocaToGlobalOp class."""
-
-    @overload
-    def __init__(
-        self,
-        get_global_type: Type,
-        global_type: Type,
-        alloca: Union[Operation, OpView, Value],
-        *,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    @overload
-    def __init__(self, alloca: Union[Operation, OpView, Value], *, loc=None, ip=None):
-        ...
-
-    def __init__(
-        self,
-        get_global_type_or_alloca: Union[Operation, OpView, Type, Value],
-        global_type_or_none: Optional[Type] = None,
-        alloca_or_none: Optional[Union[Operation, OpView, Value]] = None,
-        *,
-        loc=None,
-        ip=None
-    ):
-        if isinstance(get_global_type_or_alloca, Type):
-            get_global_type = get_global_type_or_alloca
-            global_type = global_type_or_none
-            alloca = alloca_or_none
-        else:
-            get_global_type = transform.AnyOpType.get()
-            global_type = transform.AnyOpType.get()
-            alloca = get_global_type_or_alloca
-
-        super().__init__(
-            get_global_type,
-            global_type,
-            alloca,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class MemRefMultiBufferOp:
-    """Specialization for MemRefMultiBufferOp class."""
-
-    @overload
-    def __init__(
-        self,
-        transformed_type: Type,
-        target: Union[Operation, OpView, Value],
-        factor: Union[int, IntegerAttr],
-        *,
-        skip_analysis: Optional[bool] = None,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    @overload
-    def __init__(
-        self,
-        target: Union[Operation, OpView, Value],
-        factor: Union[int, IntegerAttr],
-        *,
-        skip_analysis: Optional[bool] = None,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    def __init__(
-        self,
-        transformed_type_or_target: Type,
-        target_or_factor: Union[int, IntegerAttr, Operation, OpView, Value] = None,
-        factor_or_none: Optional[Union[int, IntegerAttr]] = None,
-        *,
-        skip_analysis: Optional[bool] = None,
-        loc=None,
-        ip=None
-    ):
-        if isinstance(transformed_type_or_target, Type):
-            transformed_type = transformed_type_or_target
-            target = target_or_factor
-            factor = factor_or_none
-        else:
-            transformed_type = transform.AnyOpType.get()
-            target = transformed_type_or_target
-            factor = target_or_factor
-
-        super().__init__(
-            transformed_type,
-            target,
-            factor,
-            skip_analysis=skip_analysis,
-            loc=loc,
-            ip=ip,
-        )
diff --git a/mlir/python/mlir/dialects/_ml_program_ops_ext.py b/mlir/python/mlir/dialects/_ml_program_ops_ext.py
deleted file mode 100644
index c84d23c16ef93ab..000000000000000
--- a/mlir/python/mlir/dialects/_ml_program_ops_ext.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from typing import Union
-    from ..ir import *
-    from ._ods_common import get_default_loc_context as _get_default_loc_context
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from ._ml_program_ops_gen import *
-
-
-ARGUMENT_ATTRIBUTE_NAME = "arg_attrs"
-RESULT_ATTRIBUTE_NAME = "res_attrs"
-
-
-class FuncOp:
-    """Specialization for the func op class."""
-
-    def __init__(
-        self, name, type, *, visibility=None, body_builder=None, loc=None, ip=None
-    ):
-        """
-        Create a FuncOp with the provided `name`, `type`, and `visibility`.
-        - `name` is a string representing the function name.
-        - `type` is either a FunctionType or a pair of list describing inputs and
-          results.
-        - `visibility` is a string matching `public`, `private`, or `nested`. None
-          implies private visibility.
-        - `body_builder` is an optional callback, when provided a new entry block
-          is created and the callback is invoked with the new op as argument within
-          an InsertionPoint context already set for the block. The callback is
-          expected to insert a terminator in the block.
-        """
-        sym_name = StringAttr.get(str(name))
-
-        # If the type is passed as a tuple, build a FunctionType on the fly.
-        if isinstance(type, tuple):
-            type = FunctionType.get(inputs=type[0], results=type[1])
-
-        type = TypeAttr.get(type)
-        sym_visibility = (
-            StringAttr.get(str(visibility)) if visibility is not None else None
-        )
-        super().__init__(sym_name, type, sym_visibility=sym_visibility, loc=loc, ip=ip)
-        if body_builder:
-            entry_block = self.add_entry_block()
-            with InsertionPoint(entry_block):
-                body_builder(self)
-
-    @property
-    def is_external(self):
-        return len(self.regions[0].blocks) == 0
-
-    @property
-    def body(self):
-        return self.regions[0]
-
-    @property
-    def type(self):
-        return FunctionType(TypeAttr(self.attributes["function_type"]).value)
-
-    @property
-    def visibility(self):
-        return self.attributes["sym_visibility"]
-
-    @property
-    def name(self) -> StringAttr:
-        return StringAttr(self.attributes["sym_name"])
-
-    @property
-    def entry_block(self):
-        if self.is_external:
-            raise IndexError("External function does not have a body")
-        return self.regions[0].blocks[0]
-
-    def add_entry_block(self):
-        """
-        Add an entry block to the function body using the function signature to
-        infer block arguments.
-        Returns the newly created block
-        """
-        if not self.is_external:
-            raise IndexError("The function already has an entry block!")
-        self.body.blocks.append(*self.type.inputs)
-        return self.body.blocks[0]
-
-    @property
-    def arg_attrs(self):
-        return ArrayAttr(self.attributes[ARGUMENT_ATTRIBUTE_NAME])
-
-    @arg_attrs.setter
-    def arg_attrs(self, attribute: Union[ArrayAttr, list]):
-        if isinstance(attribute, ArrayAttr):
-            self.attributes[ARGUMENT_ATTRIBUTE_NAME] = attribute
-        else:
-            self.attributes[ARGUMENT_ATTRIBUTE_NAME] = ArrayAttr.get(
-                attribute, context=self.context
-            )
-
-    @property
-    def arguments(self):
-        return self.entry_block.arguments
-
-    @property
-    def result_attrs(self):
-        return self.attributes[RESULT_ATTRIBUTE_NAME]
-
-    @result_attrs.setter
-    def result_attrs(self, attribute: ArrayAttr):
-        self.attributes[RESULT_ATTRIBUTE_NAME] = attribute
diff --git a/mlir/python/mlir/dialects/_ods_common.py b/mlir/python/mlir/dialects/_ods_common.py
index 895c3228139b392..9cca7d659ec8cb3 100644
--- a/mlir/python/mlir/dialects/_ods_common.py
+++ b/mlir/python/mlir/dialects/_ods_common.py
@@ -9,7 +9,6 @@
 
 __all__ = [
     "equally_sized_accessor",
-    "extend_opview_class",
     "get_default_loc_context",
     "get_op_result_or_value",
     "get_op_results_or_values",
@@ -18,64 +17,6 @@
 ]
 
 
-def extend_opview_class(ext_module):
-    """Decorator to extend an OpView class from an extension module.
-
-    Extension modules can expose various entry-points:
-      Stand-alone class with the same name as a parent OpView class (i.e.
-      "ReturnOp"). A name-based match is attempted first before falling back
-      to a below mechanism.
-
-      def select_opview_mixin(parent_opview_cls):
-        If defined, allows an appropriate mixin class to be selected dynamically
-        based on the parent OpView class. Should return NotImplemented if a
-        decision is not made.
-
-    Args:
-      ext_module: A module from which to locate extensions. Can be None if not
-        available.
-
-    Returns:
-      A decorator that takes an OpView subclass and further extends it as
-      needed.
-    """
-
-    def class_decorator(parent_opview_cls: type):
-        if ext_module is None:
-            return parent_opview_cls
-        mixin_cls = NotImplemented
-        # First try to resolve by name.
-        try:
-            mixin_cls = getattr(ext_module, parent_opview_cls.__name__)
-        except AttributeError:
-            # Fall back to a select_opview_mixin hook.
-            try:
-                select_mixin = getattr(ext_module, "select_opview_mixin")
-            except AttributeError:
-                pass
-            else:
-                mixin_cls = select_mixin(parent_opview_cls)
-
-        if mixin_cls is NotImplemented or mixin_cls is None:
-            return parent_opview_cls
-
-        # Have a mixin_cls. Create an appropriate subclass.
-        try:
-
-            class LocalOpView(mixin_cls, parent_opview_cls):
-                pass
-
-        except TypeError as e:
-            raise TypeError(
-                f"Could not mixin {mixin_cls} into {parent_opview_cls}"
-            ) from e
-        LocalOpView.__name__ = parent_opview_cls.__name__
-        LocalOpView.__qualname__ = parent_opview_cls.__qualname__
-        return LocalOpView
-
-    return class_decorator
-
-
 def segmented_accessor(elements, raw_segments, idx):
     """
     Returns a slice of elements corresponding to the idx-th segment.
diff --git a/mlir/python/mlir/dialects/_pdl_ops_ext.py b/mlir/python/mlir/dialects/_pdl_ops_ext.py
deleted file mode 100644
index fc9de0b7f7db69c..000000000000000
--- a/mlir/python/mlir/dialects/_pdl_ops_ext.py
+++ /dev/null
@@ -1,271 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ..dialects import pdl
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Union, Optional, Sequence, Mapping
-from ._ods_common import (
-    get_op_result_or_value as _get_value,
-    get_op_results_or_values as _get_values,
-)
-
-
-class ApplyNativeConstraintOp:
-    """Specialization for PDL apply native constraint op class."""
-
-    def __init__(
-        self,
-        name: Union[str, StringAttr],
-        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if args is None:
-            args = []
-        args = _get_values(args)
-        super().__init__(name, args, loc=loc, ip=ip)
-
-
-class ApplyNativeRewriteOp:
-    """Specialization for PDL apply native rewrite op class."""
-
-    def __init__(
-        self,
-        results: Sequence[Type],
-        name: Union[str, StringAttr],
-        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if args is None:
-            args = []
-        args = _get_values(args)
-        super().__init__(results, name, args, loc=loc, ip=ip)
-
-
-class AttributeOp:
-    """Specialization for PDL attribute op class."""
-
-    def __init__(
-        self,
-        valueType: Optional[Union[OpView, Operation, Value]] = None,
-        value: Optional[Attribute] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        valueType = valueType if valueType is None else _get_value(valueType)
-        result = pdl.AttributeType.get()
-        super().__init__(result, valueType=valueType, value=value, loc=loc, ip=ip)
-
-
-class EraseOp:
-    """Specialization for PDL erase op class."""
-
-    def __init__(
-        self,
-        operation: Optional[Union[OpView, Operation, Value]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        operation = _get_value(operation)
-        super().__init__(operation, loc=loc, ip=ip)
-
-
-class OperandOp:
-    """Specialization for PDL operand op class."""
-
-    def __init__(
-        self,
-        type: Optional[Union[OpView, Operation, Value]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        type = type if type is None else _get_value(type)
-        result = pdl.ValueType.get()
-        super().__init__(result, valueType=type, loc=loc, ip=ip)
-
-
-class OperandsOp:
-    """Specialization for PDL operands op class."""
-
-    def __init__(
-        self,
-        types: Optional[Union[OpView, Operation, Value]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        types = types if types is None else _get_value(types)
-        result = pdl.RangeType.get(pdl.ValueType.get())
-        super().__init__(result, valueType=types, loc=loc, ip=ip)
-
-
-class OperationOp:
-    """Specialization for PDL operand op class."""
-
-    def __init__(
-        self,
-        name: Optional[Union[str, StringAttr]] = None,
-        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
-        attributes: Optional[Mapping[str, Union[OpView, Operation, Value]]] = None,
-        types: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if types is None:
-            types = []
-        if attributes is None:
-            attributes = {}
-        if args is None:
-            args = []
-        args = _get_values(args)
-        attrNames = []
-        attrValues = []
-        for attrName, attrValue in attributes.items():
-            attrNames.append(StringAttr.get(attrName))
-            attrValues.append(_get_value(attrValue))
-        attrNames = ArrayAttr.get(attrNames)
-        types = _get_values(types)
-        result = pdl.OperationType.get()
-        super().__init__(
-            result, args, attrValues, attrNames, types, opName=name, loc=loc, ip=ip
-        )
-
-
-class PatternOp:
-    """Specialization for PDL pattern op class."""
-
-    def __init__(
-        self,
-        benefit: Union[IntegerAttr, int],
-        name: Optional[Union[StringAttr, str]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        """Creates an PDL `pattern` operation."""
-        super().__init__(benefit, sym_name=name, loc=loc, ip=ip)
-        self.regions[0].blocks.append()
-
-    @property
-    def body(self):
-        """Return the body (block) of the pattern."""
-        return self.regions[0].blocks[0]
-
-
-class ReplaceOp:
-    """Specialization for PDL replace op class."""
-
-    def __init__(
-        self,
-        op: Union[OpView, Operation, Value],
-        *,
-        with_op: Optional[Union[OpView, Operation, Value]] = None,
-        with_values: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
-        loc=None,
-        ip=None,
-    ):
-        if with_values is None:
-            with_values = []
-        op = _get_value(op)
-        with_op = with_op if with_op is None else _get_value(with_op)
-        with_values = _get_values(with_values)
-        super().__init__(op, with_values, replOperation=with_op, loc=loc, ip=ip)
-
-
-class ResultOp:
-    """Specialization for PDL result op class."""
-
-    def __init__(
-        self,
-        parent: Union[OpView, Operation, Value],
-        index: Union[IntegerAttr, int],
-        *,
-        loc=None,
-        ip=None,
-    ):
-        parent = _get_value(parent)
-        result = pdl.ValueType.get()
-        super().__init__(result, parent, index, loc=loc, ip=ip)
-
-
-class ResultsOp:
-    """Specialization for PDL results op class."""
-
-    def __init__(
-        self,
-        result: Type,
-        parent: Union[OpView, Operation, Value],
-        index: Optional[Union[IntegerAttr, int]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        parent = _get_value(parent)
-        super().__init__(result, parent, index=index, loc=loc, ip=ip)
-
-
-class RewriteOp:
-    """Specialization for PDL rewrite op class."""
-
-    def __init__(
-        self,
-        root: Optional[Union[OpView, Operation, Value]] = None,
-        name: Optional[Union[StringAttr, str]] = None,
-        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if args is None:
-            args = []
-        root = root if root is None else _get_value(root)
-        args = _get_values(args)
-        super().__init__(args, root=root, name=name, loc=loc, ip=ip)
-
-    def add_body(self):
-        """Add body (block) to the rewrite."""
-        self.regions[0].blocks.append()
-        return self.body
-
-    @property
-    def body(self):
-        """Return the body (block) of the rewrite."""
-        return self.regions[0].blocks[0]
-
-
-class TypeOp:
-    """Specialization for PDL type op class."""
-
-    def __init__(
-        self, constantType: Optional[Union[TypeAttr, Type]] = None, *, loc=None, ip=None
-    ):
-        result = pdl.TypeType.get()
-        super().__init__(result, constantType=constantType, loc=loc, ip=ip)
-
-
-class TypesOp:
-    """Specialization for PDL types op class."""
-
-    def __init__(
-        self,
-        constantTypes: Optional[Sequence[Union[TypeAttr, Type]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if constantTypes is None:
-            constantTypes = []
-        result = pdl.RangeType.get(pdl.TypeType.get())
-        super().__init__(result, constantTypes=constantTypes, loc=loc, ip=ip)
diff --git a/mlir/python/mlir/dialects/_scf_ops_ext.py b/mlir/python/mlir/dialects/_scf_ops_ext.py
deleted file mode 100644
index 89cc8a19895c7b4..000000000000000
--- a/mlir/python/mlir/dialects/_scf_ops_ext.py
+++ /dev/null
@@ -1,107 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, Sequence, Union
-
-from ._ods_common import (
-    get_op_result_or_value as _get_op_result_or_value,
-    get_op_results_or_values as _get_op_results_or_values,
-)
-
-
-class ForOp:
-    """Specialization for the SCF for op class."""
-
-    def __init__(
-        self,
-        lower_bound,
-        upper_bound,
-        step,
-        iter_args: Optional[Union[Operation, OpView, Sequence[Value]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        """Creates an SCF `for` operation.
-
-        - `lower_bound` is the value to use as lower bound of the loop.
-        - `upper_bound` is the value to use as upper bound of the loop.
-        - `step` is the value to use as loop step.
-        - `iter_args` is a list of additional loop-carried arguments or an operation
-          producing them as results.
-        """
-        if iter_args is None:
-            iter_args = []
-        iter_args = _get_op_results_or_values(iter_args)
-
-        results = [arg.type for arg in iter_args]
-        super().__init__(
-            self.build_generic(
-                regions=1,
-                results=results,
-                operands=[
-                    _get_op_result_or_value(o) for o in [lower_bound, upper_bound, step]
-                ]
-                + list(iter_args),
-                loc=loc,
-                ip=ip,
-            )
-        )
-        self.regions[0].blocks.append(self.operands[0].type, *results)
-
-    @property
-    def body(self):
-        """Returns the body (block) of the loop."""
-        return self.regions[0].blocks[0]
-
-    @property
-    def induction_variable(self):
-        """Returns the induction variable of the loop."""
-        return self.body.arguments[0]
-
-    @property
-    def inner_iter_args(self):
-        """Returns the loop-carried arguments usable within the loop.
-
-        To obtain the loop-carried operands, use `iter_args`.
-        """
-        return self.body.arguments[1:]
-
-
-class IfOp:
-    """Specialization for the SCF if op class."""
-
-    def __init__(self, cond, results_=[], *, hasElse=False, loc=None, ip=None):
-        """Creates an SCF `if` operation.
-
-        - `cond` is a MLIR value of 'i1' type to determine which regions of code will be executed.
-        - `hasElse` determines whether the if operation has the else branch.
-        """
-        operands = []
-        operands.append(cond)
-        results = []
-        results.extend(results_)
-        super().__init__(
-            self.build_generic(
-                regions=2, results=results, operands=operands, loc=loc, ip=ip
-            )
-        )
-        self.regions[0].blocks.append(*[])
-        if hasElse:
-            self.regions[1].blocks.append(*[])
-
-    @property
-    def then_block(self):
-        """Returns the then block of the if operation."""
-        return self.regions[0].blocks[0]
-
-    @property
-    def else_block(self):
-        """Returns the else block of the if operation."""
-        return self.regions[1].blocks[0]
diff --git a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py
deleted file mode 100644
index 3757a3d3b4cce85..000000000000000
--- a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py
+++ /dev/null
@@ -1,759 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ..dialects import transform
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import List, Optional, Sequence, Tuple, Union, overload
-
-StaticIntLike = Union[int, IntegerAttr]
-ValueLike = Union[Operation, OpView, Value]
-MixedInt = Union[StaticIntLike, ValueLike]
-
-IntOrAttrList = Sequence[Union[IntegerAttr, int]]
-OptionalIntList = Optional[Union[ArrayAttr, IntOrAttrList]]
-
-BoolOrAttrList = Sequence[Union[BoolAttr, bool]]
-OptionalBoolList = Optional[Union[ArrayAttr, BoolOrAttrList]]
-
-MixedValues = Union[Sequence[Union[StaticIntLike, ValueLike]], ArrayAttr, ValueLike]
-
-DynamicIndexList = Sequence[Union[MixedInt, Sequence[MixedInt]]]
-
-
-def _dispatch_dynamic_index_list(
-    indices: Union[DynamicIndexList, ArrayAttr],
-) -> Tuple[List[ValueLike], Union[List[int], ArrayAttr], List[bool]]:
-    """Dispatches a list of indices to the appropriate form.
-
-    This is similar to the custom `DynamicIndexList` directive upstream:
-    provided indices may be in the form of dynamic SSA values or static values,
-    and they may be scalable (i.e., as a singleton list) or not. This function
-    dispatches each index into its respective form. It also extracts the SSA
-    values and static indices from various similar structures, respectively.
-    """
-    dynamic_indices = []
-    static_indices = [ShapedType.get_dynamic_size()] * len(indices)
-    scalable_indices = [False] * len(indices)
-
-    # ArrayAttr: Extract index values.
-    if isinstance(indices, ArrayAttr):
-        indices = [idx for idx in indices]
-
-    def process_nonscalable_index(i, index):
-        """Processes any form of non-scalable index.
-
-        Returns False if the given index was scalable and thus remains
-        unprocessed; True otherwise.
-        """
-        if isinstance(index, int):
-            static_indices[i] = index
-        elif isinstance(index, IntegerAttr):
-            static_indices[i] = index.value  # pytype: disable=attribute-error
-        elif isinstance(index, (Operation, Value, OpView)):
-            dynamic_indices.append(index)
-        else:
-            return False
-        return True
-
-    # Process each index at a time.
-    for i, index in enumerate(indices):
-        if not process_nonscalable_index(i, index):
-            # If it wasn't processed, it must be a scalable index, which is
-            # provided as a Sequence of one value, so extract and process that.
-            scalable_indices[i] = True
-            assert len(index) == 1
-            ret = process_nonscalable_index(i, index[0])
-            assert ret
-
-    return dynamic_indices, static_indices, scalable_indices
-
-
-# Dispatches `MixedValues` that all represents integers in various forms into
-# the following three categories:
-#   - `dynamic_values`: a list of `Value`s, potentially from op results;
-#   - `packed_values`: a value handle, potentially from an op result, associated
-#                      to one or more payload operations of integer type;
-#   - `static_values`: an `ArrayAttr` of `i64`s with static values, from Python
-#                      `int`s, from `IntegerAttr`s, or from an `ArrayAttr`.
-# The input is in the form for `packed_values`, only that result is set and the
-# other two are empty. Otherwise, the input can be a mix of the other two forms,
-# and for each dynamic value, a special value is added to the `static_values`.
-def _dispatch_mixed_values(
-    values: MixedValues,
-) -> Tuple[List[Value], Union[Operation, Value, OpView], DenseI64ArrayAttr]:
-    dynamic_values = []
-    packed_values = None
-    static_values = None
-    if isinstance(values, ArrayAttr):
-        static_values = values
-    elif isinstance(values, (Operation, Value, OpView)):
-        packed_values = values
-    else:
-        static_values = []
-        for size in values or []:
-            if isinstance(size, int):
-                static_values.append(size)
-            else:
-                static_values.append(ShapedType.get_dynamic_size())
-                dynamic_values.append(size)
-        static_values = DenseI64ArrayAttr.get(static_values)
-
-    return (dynamic_values, packed_values, static_values)
-
-
-def _get_value_or_attribute_value(
-    value_or_attr: Union[any, Attribute, ArrayAttr]
-) -> any:
-    if isinstance(value_or_attr, Attribute) and hasattr(value_or_attr, "value"):
-        return value_or_attr.value
-    if isinstance(value_or_attr, ArrayAttr):
-        return _get_value_list(value_or_attr)
-    return value_or_attr
-
-
-def _get_value_list(
-    sequence_or_array_attr: Union[Sequence[any], ArrayAttr]
-) -> Sequence[any]:
-    return [_get_value_or_attribute_value(v) for v in sequence_or_array_attr]
-
-
-def _get_int_array_attr(values: Optional[Union[ArrayAttr, IntOrAttrList]]) -> ArrayAttr:
-    if values is None:
-        return None
-
-    # Turn into a Python list of Python ints.
-    values = _get_value_list(values)
-
-    # Make an ArrayAttr of IntegerAttrs out of it.
-    return ArrayAttr.get(
-        [IntegerAttr.get(IntegerType.get_signless(64), v) for v in values]
-    )
-
-
-def _get_int_array_array_attr(
-    values: Optional[Union[ArrayAttr, Sequence[Union[ArrayAttr, IntOrAttrList]]]]
-) -> ArrayAttr:
-    """Creates an ArrayAttr of ArrayAttrs of IntegerAttrs.
-
-    The input has to be a collection of collection of integers, where any
-    Python Sequence and ArrayAttr are admissible collections and Python ints and
-    any IntegerAttr are admissible integers. Both levels of collections are
-    turned into ArrayAttr; the inner level is turned into IntegerAttrs of i64s.
-    If the input is None, an empty ArrayAttr is returned.
-    """
-    if values is None:
-        return None
-
-    # Make sure the outer level is a list.
-    values = _get_value_list(values)
-
-    # The inner level is now either invalid or a mixed sequence of ArrayAttrs and
-    # Sequences. Make sure the nested values are all lists.
-    values = [_get_value_list(nested) for nested in values]
-
-    # Turn each nested list into an ArrayAttr.
-    values = [_get_int_array_attr(nested) for nested in values]
-
-    # Turn the outer list into an ArrayAttr.
-    return ArrayAttr.get(values)
-
-
-class BufferizeToAllocationOp:
-    """Specialization for BufferizeToAllocationOp class."""
-
-    def __init__(
-        self,
-        target: Union[Operation, OpView, Value],
-        *,
-        memory_space: Optional[Union[int, str, Attribute]] = None,
-        memcpy_op: Optional[str] = None,
-        alloc_op: Optional[str] = None,
-        bufferize_destination_only: Optional[bool] = None,
-        loc=None,
-        ip=None,
-    ):
-        # No other types are allowed, so hard-code those here.
-        allocated_buffer_type = transform.AnyValueType.get()
-        new_ops_type = transform.AnyOpType.get()
-
-        if isinstance(memory_space, int):
-            memory_space = str(memory_space)
-        if isinstance(memory_space, str):
-            memory_space = Attribute.parse(memory_space)
-
-        super().__init__(
-            allocated_buffer_type,
-            new_ops_type,
-            target,
-            memory_space=memory_space,
-            memcpy_op=memcpy_op,
-            alloc_op=alloc_op,
-            bufferize_destination_only=bufferize_destination_only,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class DecomposeOp:
-    """Specialization for DecomposeOp class."""
-
-    def __init__(self, target: Union[Operation, Value], *, loc=None, ip=None):
-        transformed_type = transform.AnyOpType.get()
-        super().__init__(transformed_type, target, loc=loc, ip=ip)
-
-
-class FuseIntoContainingOp:
-    """Specialization for FuseIntoContainingOp class."""
-
-    @overload
-    def __init__(
-        self,
-        fused_op_type: Type,
-        new_containing_op_type: Type,
-        producer_op: Union[Operation, OpView, Value],
-        containing_op: Union[Operation, OpView, Value],
-        *,
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    @overload
-    def __init__(
-        self,
-        producer_op: Union[Operation, OpView, Value],
-        containing_op: Union[Operation, OpView, Value],
-        *,
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    def __init__(
-        self,
-        fused_op_type_or_producer_op: Union[Operation, OpView, Type, Value],
-        new_containing_op_type_or_containing_op: Union[Operation, OpView, Type, Value],
-        producer_op_or_none: Optional[Union[Operation, OpView, Value]] = None,
-        containing_op_or_none: Optional[Union[Operation, OpView, Value]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if isinstance(fused_op_type_or_producer_op, Type):
-            if not isinstance(new_containing_op_type_or_containing_op, Type):
-                raise TypeError(
-                    "If 'fused_op_type_or_producer_op' is a type, then "
-                    "'new_containing_op_type_or_containing_op' is expected "
-                    "to be one as well."
-                )
-            fused_op_type = fused_op_type_or_producer_op
-            new_containing_op_type = new_containing_op_type_or_containing_op
-            producer_op = producer_op_or_none
-            containing_op = containing_op_or_none
-        else:
-            fused_op_type = transform.AnyOpType.get()
-            new_containing_op_type = transform.AnyOpType.get()
-            producer_op = fused_op_type_or_producer_op
-            containing_op = new_containing_op_type_or_containing_op
-
-        super().__init__(
-            fused_op_type,
-            new_containing_op_type,
-            producer_op,
-            containing_op,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class GeneralizeOp:
-    """Specialization for GeneralizeOp class."""
-
-    def __init__(self, target: Union[Operation, Value], *, loc=None, ip=None):
-        transformed_type = transform.AnyOpType.get()
-        super().__init__(transformed_type, target, loc=loc, ip=ip)
-
-
-class InterchangeOp:
-    """Specialization for InterchangeOp class."""
-
-    def __init__(
-        self,
-        target: Union[Operation, Value],
-        *,
-        iterator_interchange: OptionalIntList = None,
-        loc=None,
-        ip=None,
-    ):
-        transformed_type = transform.AnyOpType.get()
-        super().__init__(
-            transformed_type,
-            target,
-            iterator_interchange=iterator_interchange,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class MapCopyToThreadsOp:
-    """Specialization for MapCopyToThreadsOp class."""
-
-    @overload
-    def __init__(
-        self,
-        forall_op_type: Type,
-        tiled_op_type: Type,
-        target: Union[Operation, OpView, Value],
-        *,
-        total_num_threads: Union[int, IntegerAttr],
-        desired_bit_alignment: Union[int, IntegerAttr],
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    @overload
-    def __init__(
-        self,
-        target: Union[Operation, OpView, Value],
-        *,
-        total_num_threads: Union[int, IntegerAttr],
-        desired_bit_alignment: Union[int, IntegerAttr],
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    def __init__(
-        self,
-        forall_op_type_or_target: Union[Operation, OpView, Type, Value],
-        tiled_op_type_or_none: Optional[Type] = None,
-        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
-        *,
-        total_num_threads: Union[int, IntegerAttr],
-        desired_bit_alignment: Union[int, IntegerAttr],
-        loc=None,
-        ip=None,
-    ):
-        if isinstance(forall_op_type_or_target, Type):
-            forall_op_type = forall_op_type_or_target
-            tiled_op_type = tiled_op_type_or_none
-            target = target_or_none
-        else:
-            forall_op_type = transform.AnyOpType.get()
-            tiled_op_type = transform.AnyOpType.get()
-            target = forall_op_type_or_target
-
-        super().__init__(
-            forall_op_type,
-            tiled_op_type,
-            target,
-            total_num_threads=total_num_threads,
-            desired_bit_alignment=desired_bit_alignment,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class VectorizeOp:
-    """Specialization for VectorizeOp class."""
-
-    def __init__(
-        self,
-        target: Union[Operation, OpView, Value],
-        vector_sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
-        *,
-        vectorize_nd_extract: Optional[bool] = None,
-        scalable_sizes: OptionalBoolList = None,
-        static_vector_sizes: OptionalIntList = None,
-        loc=None,
-        ip=None,
-    ):
-        if (
-            scalable_sizes is None
-            and static_vector_sizes is None
-            and vector_sizes is None
-        ):
-            dynamic_vector_sizes = []
-        elif scalable_sizes is None and static_vector_sizes is None:
-            (
-                dynamic_vector_sizes,
-                static_vector_sizes,
-                scalable_sizes,
-            ) = _dispatch_dynamic_index_list(vector_sizes)
-        elif scalable_sizes is None or static_vector_sizes is None:
-            raise TypeError(
-                "'scalable_sizes' and 'static_vector_sizes' must either both "
-                "be given explicitly or both be given as part of 'vector_sizes'."
-            )
-        else:
-            dynamic_vector_sizes = vector_sizes
-
-        super().__init__(
-            target,
-            vector_sizes=dynamic_vector_sizes,
-            static_vector_sizes=static_vector_sizes,
-            scalable_sizes=scalable_sizes,
-            vectorize_nd_extract=vectorize_nd_extract,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class MatchOp:
-    """Specialization for MatchOp class."""
-
-    @overload
-    @classmethod
-    def match_op_names(
-        cls,
-        target: Union[Operation, Value],
-        names: Union[str, Sequence[str]],
-        *,
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    @overload
-    @classmethod
-    def match_op_names(
-        cls,
-        result_type: Type,
-        target: Union[Operation, Value],
-        names: Union[str, Sequence[str]],
-        *,
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    @classmethod
-    def match_op_names(
-        cls,
-        result_type_or_target: Union[Type, Operation, Value],
-        target_or_names: Union[Operation, Value, Sequence[str], str],
-        names_or_none: Optional[Union[Sequence[str], str]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if isinstance(result_type_or_target, Type):
-            result_type = result_type_or_target
-            target = target_or_names
-            names = names_or_none
-        else:
-            result_type = transform.AnyOpType.get()
-            target = result_type_or_target
-            names = target_or_names
-
-        if isinstance(names, str):
-            names = [names]
-
-        return cls(
-            result_type,
-            target,
-            ops=ArrayAttr.get(list(map(lambda s: StringAttr.get(s), names))),
-            loc=loc,
-            ip=ip,
-        )
-
-
-class MultiTileSizesOp:
-    """Specialization for MultiTileSizesOp class."""
-
-    def __init__(
-        self,
-        result_type: Type,
-        target: Union[Operation, Value],
-        *,
-        dimension: Union[int, IntegerAttr],
-        target_size: Union[int, IntegerAttr],
-        divisor: Optional[Optional[Union[int, IntegerAttr]]] = None,
-        loc=None,
-        ip=None,
-    ):
-        super().__init__(
-            result_type,
-            result_type,
-            result_type,
-            target,
-            dimension=dimension,
-            target_size=target_size,
-            divisor=divisor,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class PadOp:
-    """Specialization for PadOp class."""
-
-    def __init__(
-        self,
-        target: Union[Operation, OpView, Value],
-        *,
-        padding_values: Optional[Union[ArrayAttr, Sequence[Attribute]]] = None,
-        padding_dimensions: OptionalIntList = None,
-        pad_to_multiple_of: OptionalIntList = None,
-        pack_paddings: OptionalIntList = None,
-        transpose_paddings: Optional[
-            Union[ArrayAttr, Sequence[Union[ArrayAttr, IntOrAttrList]]]
-        ] = None,
-        copy_back_op: Optional[Union[str, StringAttr]] = None,
-        loc=None,
-        ip=None,
-    ):
-        transpose_paddings = _get_int_array_array_attr(transpose_paddings)
-
-        any_op_type = transform.AnyOpType.get()
-        super().__init__(
-            any_op_type,
-            any_op_type,
-            any_op_type,
-            target,
-            padding_values=padding_values,
-            padding_dimensions=padding_dimensions,
-            pad_to_multiple_of=pad_to_multiple_of,
-            pack_paddings=pack_paddings,
-            transpose_paddings=transpose_paddings,
-            copy_back_op=copy_back_op,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class ScalarizeOp:
-    """Specialization for ScalarizeOp class."""
-
-    def __init__(self, target: Union[Operation, Value], *, loc=None, ip=None):
-        result_type = transform.AnyOpType.get()
-        super().__init__(result_type, target, loc=loc, ip=ip)
-
-
-class SplitOp:
-    """Specialization for SplitOp class."""
-
-    def __init__(
-        self,
-        target: Union[Operation, Value],
-        dimension: Union[int, Attribute],
-        split_point: Union[int, Operation, Value, Attribute],
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if isinstance(split_point, int):
-            static_split_point = split_point
-            dynamic_split_point = None
-        else:
-            static_split_point = ShapedType.get_dynamic_size()
-            dynamic_split_point = split_point
-
-        super().__init__(
-            target.type,
-            target.type,
-            target,
-            dimension=dimension,
-            static_split_point=static_split_point,
-            dynamic_split_point=dynamic_split_point,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class TileUsingForOp:
-    """Specialization for TileUsingForOp class."""
-
-    @overload
-    def __init__(
-        self,
-        loop_types: Union[Type, List[Type]],
-        target: Union[Operation, Value],
-        *,
-        sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
-        interchange: OptionalIntList = None,
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    @overload
-    def __init__(
-        self,
-        target: Union[Operation, Value, OpView],
-        *,
-        sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
-        interchange: OptionalIntList = None,
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    def __init__(
-        self,
-        loop_types_or_target: Union[Type, List[Type], Operation, Value],
-        target_or_none: Optional[Union[Operation, Value, OpView]] = None,
-        *,
-        sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
-        interchange: OptionalIntList = None,
-        loc=None,
-        ip=None,
-    ):
-        (
-            dynamic_sizes,
-            static_sizes,
-            scalable_sizes,
-        ) = _dispatch_dynamic_index_list(sizes)
-
-        num_loops = sum(v if v == 0 else 1 for v in static_sizes)
-
-        if isinstance(loop_types_or_target, (Operation, Value, OpView)):
-            loop_types = [transform.AnyOpType.get()] * num_loops
-            target = loop_types_or_target
-            assert (
-                target_or_none is None
-            ), "Cannot construct TileUsingForOp with two targets."
-        else:
-            loop_types = (
-                ([loop_types_or_target] * num_loops)
-                if isinstance(loop_types_or_target, Type)
-                else loop_types_or_target
-            )
-            target = target_or_none
-
-        super().__init__(
-            target.type,
-            loop_types,
-            target,
-            dynamic_sizes=dynamic_sizes,
-            static_sizes=static_sizes,
-            interchange=interchange,
-            scalable_sizes=scalable_sizes,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class TileUsingForallOp:
-    """Specialization for TileUsingForallOp class."""
-
-    @overload
-    def __init__(
-        self,
-        loops_type: Type,
-        tiled_op_type: Type,
-        target: Union[Operation, Value, OpView],
-        *,
-        num_threads: Optional[MixedValues] = None,
-        tile_sizes: MixedValues = None,
-        mapping=None,
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    @overload
-    def __init__(
-        self,
-        target: Union[Operation, Value, OpView],
-        *,
-        num_threads: Optional[MixedValues] = None,
-        tile_sizes: MixedValues = None,
-        mapping=None,
-        loc=None,
-        ip=None,
-    ):
-        ...
-
-    def __init__(
-        self,
-        loops_type_or_target: Union[
-            Type, Union[Operation, Value, OpView]  # loops_type
-        ],  # target
-        tiled_op_type_or_none: Optional[Type] = None,
-        target_or_none: Optional[Union[Operation, Value, OpView]] = None,
-        *,
-        num_threads: MixedValues = None,
-        tile_sizes: MixedValues = None,
-        mapping=None,
-        loc=None,
-        ip=None,
-    ):
-        # `Type` arguments in the front are optional: add default values to front.
-        if isinstance(loops_type_or_target, Type):
-            # First overload: type arguments provided.
-            if not isinstance(tiled_op_type_or_none, Type):
-                raise TypeError(
-                    "If 'loops_type_or_target' is a type, then "
-                    "'tiled_op_type_or_none' is expected to be one as well."
-                )
-            loops_type = loops_type_or_target
-            tiled_op_type = tiled_op_type_or_none
-            target = target_or_none
-        else:
-            # Last overload: type arguments missing.
-            loops_type = transform.AnyOpType.get()
-            tiled_op_type = transform.AnyOpType.get()
-            target = loops_type_or_target
-
-        # Unpack mixed num_threads.
-        (
-            dynamic_num_threads,
-            packed_num_threads,
-            num_threads_attr,
-        ) = _dispatch_mixed_values(num_threads)
-
-        # Unpack mixed tile_sizes.
-        (
-            dynamic_tile_sizes,
-            packed_tile_sizes,
-            tile_sizes_attr,
-        ) = _dispatch_mixed_values(tile_sizes)
-
-        super().__init__(
-            loops_type,
-            tiled_op_type,
-            target=target,
-            tile_sizes=dynamic_tile_sizes,
-            packed_tile_sizes=packed_tile_sizes,
-            static_tile_sizes=tile_sizes_attr,
-            num_threads=dynamic_num_threads,
-            packed_num_threads=packed_num_threads,
-            static_num_threads=num_threads_attr,
-            mapping=mapping,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class VectorizeChildrenAndApplyPatternsOp:
-    """Specialization for VectorizeChildrenAndApplyPatternsOp class."""
-
-    def __init__(
-        self,
-        target: Union[Operation, Value],
-        *,
-        disable_multi_reduction_to_contract_patterns: bool = False,
-        disable_transfer_permutation_map_lowering_patterns: bool = False,
-        vectorize_nd_extract: bool = False,
-        vectorize_padding: bool = False,
-        loc=None,
-        ip=None,
-    ):
-        transformed_type = transform.AnyOpType.get()
-        super().__init__(
-            transformed_type,
-            target,
-            disable_multi_reduction_to_contract_patterns=disable_multi_reduction_to_contract_patterns,
-            disable_transfer_permutation_map_lowering_patterns=disable_transfer_permutation_map_lowering_patterns,
-            vectorize_nd_extract=vectorize_nd_extract,
-            vectorize_padding=vectorize_padding,
-            loc=loc,
-            ip=ip,
-        )
diff --git a/mlir/python/mlir/dialects/_tensor_ops_ext.py b/mlir/python/mlir/dialects/_tensor_ops_ext.py
deleted file mode 100644
index 09b9ec68db7d9c7..000000000000000
--- a/mlir/python/mlir/dialects/_tensor_ops_ext.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Any, Optional, Sequence, Union
-from ._ods_common import (
-    get_op_result_or_value as _get_op_result_or_value,
-    get_op_results_or_values as _get_op_results_or_values,
-)
-
-
-class EmptyOp:
-    """Extends the tensor.empty op."""
-
-    def __init__(
-        self,
-        sizes: Sequence[Union[int, Value]],
-        element_type: Type,
-        *,
-        loc=None,
-        ip=None
-    ):
-        """Constructs an `empty` with mixed static/dynamic sizes."""
-        # TODO: Refactor the EmptyOp to take an element type attribute and
-        # then use normal result type inference, unifying the Python and C++ side
-        # with a standard mechanism (versus stashing that in builders).
-        dynamic_sizes = []
-        static_sizes = []
-        for s in sizes:
-            if isinstance(s, int):
-                static_sizes.append(s)
-            else:
-                static_sizes.append(ShapedType.get_dynamic_size())
-                dynamic_sizes.append(s)
-        result_type = RankedTensorType.get(static_sizes, element_type)
-        op = self.build_generic(
-            results=[result_type], operands=dynamic_sizes, attributes={}, loc=loc, ip=ip
-        )
-        OpView.__init__(self, op)
diff --git a/mlir/python/mlir/dialects/_tensor_transform_ops_ext.py b/mlir/python/mlir/dialects/_tensor_transform_ops_ext.py
deleted file mode 100644
index 996093fbc913e8a..000000000000000
--- a/mlir/python/mlir/dialects/_tensor_transform_ops_ext.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ..dialects import transform
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, overload, Union
-
-
-class MakeLoopIndependentOp:
-    """Specialization for MakeLoopIndependentOp class."""
-
-    @overload
-    def __init__(
-        self,
-        transformed_type: Type,
-        target: Union[Operation, OpView, Value],
-        num_loops: Union[int, IntegerAttr],
-        *,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    @overload
-    def __init__(
-        self,
-        target: Union[Operation, OpView, Value],
-        num_loops: Union[int, IntegerAttr],
-        *,
-        loc=None,
-        ip=None
-    ):
-        ...
-
-    def __init__(
-        self,
-        transformed_type_or_target: Type,
-        target_or_num_loops: Union[int, IntegerAttr, Operation, OpView, Value] = None,
-        num_loops_or_none: Optional[Union[int, IntegerAttr]] = None,
-        *,
-        loc=None,
-        ip=None
-    ):
-        if isinstance(transformed_type_or_target, Type):
-            transformed_type = transformed_type_or_target
-            target = target_or_num_loops
-            num_loops = num_loops_or_none
-        else:
-            transformed_type = transform.AnyOpType.get()
-            target = transformed_type_or_target
-            num_loops = target_or_num_loops
-
-        super().__init__(
-            transformed_type,
-            target,
-            num_loops,
-            loc=loc,
-            ip=ip,
-        )
diff --git a/mlir/python/mlir/dialects/_transform_ops_ext.py b/mlir/python/mlir/dialects/_transform_ops_ext.py
deleted file mode 100644
index b1e7b892536f4a1..000000000000000
--- a/mlir/python/mlir/dialects/_transform_ops_ext.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-    from ..ir import *
-    from ._ods_common import (
-        get_op_result_or_value as _get_op_result_or_value,
-        get_op_results_or_values as _get_op_results_or_values,
-    )
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, Sequence, Union
-
-
-class CastOp:
-    def __init__(
-        self,
-        result_type: Type,
-        target: Union[Operation, Value],
-        *,
-        loc=None,
-        ip=None,
-    ):
-        super().__init__(result_type, _get_op_result_or_value(target), loc=loc, ip=ip)
-
-
-class ApplyPatternsOp:
-    def __init__(
-        self,
-        target: Union[Operation, Value, OpView],
-        *,
-        loc=None,
-        ip=None,
-    ):
-        operands = []
-        operands.append(_get_op_result_or_value(target))
-        super().__init__(
-            self.build_generic(
-                attributes={},
-                results=[],
-                operands=operands,
-                successors=None,
-                regions=None,
-                loc=loc,
-                ip=ip,
-            )
-        )
-        self.regions[0].blocks.append()
-
-    @property
-    def patterns(self) -> Block:
-        return self.regions[0].blocks[0]
-
-
-class testGetParentOp:
-    def __init__(
-        self,
-        result_type: Type,
-        target: Union[Operation, Value],
-        *,
-        isolated_from_above: bool = False,
-        op_name: Optional[str] = None,
-        deduplicate: bool = False,
-        loc=None,
-        ip=None,
-    ):
-        super().__init__(
-            result_type,
-            _get_op_result_or_value(target),
-            isolated_from_above=isolated_from_above,
-            op_name=op_name,
-            deduplicate=deduplicate,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class MergeHandlesOp:
-    def __init__(
-        self,
-        handles: Sequence[Union[Operation, Value]],
-        *,
-        deduplicate: bool = False,
-        loc=None,
-        ip=None,
-    ):
-        super().__init__(
-            [_get_op_result_or_value(h) for h in handles],
-            deduplicate=deduplicate,
-            loc=loc,
-            ip=ip,
-        )
-
-
-class ReplicateOp:
-    def __init__(
-        self,
-        pattern: Union[Operation, Value],
-        handles: Sequence[Union[Operation, Value]],
-        *,
-        loc=None,
-        ip=None,
-    ):
-        super().__init__(
-            [_get_op_result_or_value(h).type for h in handles],
-            _get_op_result_or_value(pattern),
-            [_get_op_result_or_value(h) for h in handles],
-            loc=loc,
-            ip=ip,
-        )
-
-
-class SequenceOp:
-    def __init__(
-        self,
-        failure_propagation_mode,
-        results: Sequence[Type],
-        target: Union[Operation, Value, Type],
-        extra_bindings: Optional[
-            Union[Sequence[Value], Sequence[Type], Operation, OpView]
-        ] = None,
-    ):
-        root = (
-            _get_op_result_or_value(target)
-            if isinstance(target, (Operation, Value))
-            else None
-        )
-        root_type = root.type if not isinstance(target, Type) else target
-
-        if extra_bindings is None:
-            extra_bindings = []
-        if isinstance(extra_bindings, (Operation, OpView)):
-            extra_bindings = _get_op_results_or_values(extra_bindings)
-
-        extra_binding_types = []
-        if len(extra_bindings) != 0:
-            if isinstance(extra_bindings[0], Type):
-                extra_binding_types = extra_bindings
-                extra_bindings = []
-            else:
-                extra_binding_types = [v.type for v in extra_bindings]
-
-        super().__init__(
-            results_=results,
-            failure_propagation_mode=failure_propagation_mode,
-            root=root,
-            extra_bindings=extra_bindings,
-        )
-        self.regions[0].blocks.append(*tuple([root_type] + extra_binding_types))
-
-    @property
-    def body(self) -> Block:
-        return self.regions[0].blocks[0]
-
-    @property
-    def bodyTarget(self) -> Value:
-        return self.body.arguments[0]
-
-    @property
-    def bodyExtraArgs(self) -> BlockArgumentList:
-        return self.body.arguments[1:]
-
-
-class YieldOp:
-    def __init__(
-        self,
-        operands: Optional[Union[Operation, Sequence[Value]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if operands is None:
-            operands = []
-        super().__init__(_get_op_results_or_values(operands), loc=loc, ip=ip)
diff --git a/mlir/python/mlir/dialects/_transform_pdl_extension_ops_ext.py b/mlir/python/mlir/dialects/_transform_pdl_extension_ops_ext.py
deleted file mode 100644
index c4e4b4b4254b038..000000000000000
--- a/mlir/python/mlir/dialects/_transform_pdl_extension_ops_ext.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-#  See https://llvm.org/LICENSE.txt for license information.
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-try:
-  from ..ir import *
-  from ._ods_common import (
-      get_op_result_or_value as _get_op_result_or_value,
-      get_op_results_or_values as _get_op_results_or_values,
-  )
-except ImportError as e:
-  raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Union
-
-class PDLMatchOp:
-
-  def __init__(
-      self,
-      result_type: Type,
-      target: Union[Operation, Value],
-      pattern_name: Union[Attribute, str],
-      *,
-      loc=None,
-      ip=None,
-  ):
-    super().__init__(
-        result_type,
-        _get_op_result_or_value(target),
-        pattern_name,
-        loc=loc,
-        ip=ip,
-    )
-
-
-class WithPDLPatternsOp:
-
-  def __init__(self,
-               target: Union[Operation, Value, Type],
-               *,
-               loc=None,
-               ip=None):
-    root = _get_op_result_or_value(target) if not isinstance(target,
-                                                             Type) else None
-    root_type = target if isinstance(target, Type) else root.type
-    super().__init__(root=root, loc=loc, ip=ip)
-    self.regions[0].blocks.append(root_type)
-
-  @property
-  def body(self) -> Block:
-    return self.regions[0].blocks[0]
-
-  @property
-  def bodyTarget(self) -> Value:
-    return self.body.arguments[0]
diff --git a/mlir/python/mlir/dialects/affine.py b/mlir/python/mlir/dialects/affine.py
index 8a2a64c7c40d190..1eaccfa73a85cbf 100644
--- a/mlir/python/mlir/dialects/affine.py
+++ b/mlir/python/mlir/dialects/affine.py
@@ -1,5 +1,50 @@
-#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.                                        
-#  See https://llvm.org/LICENSE.txt for license information.                                                            
-#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception    
+#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+#  See https://llvm.org/LICENSE.txt for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._affine_ops_gen import *
+from ._affine_ops_gen import _Dialect
+
+try:
+    from ..ir import *
+    from ._ods_common import (
+        get_op_result_or_value as _get_op_result_or_value,
+        get_op_results_or_values as _get_op_results_or_values,
+        _cext as _ods_cext,
+    )
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Optional, Sequence, Union
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class AffineStoreOp(AffineStoreOp):
+    """Specialization for the Affine store operation."""
+
+    def __init__(
+        self,
+        value: Union[Operation, OpView, Value],
+        memref: Union[Operation, OpView, Value],
+        map: AffineMap = None,
+        *,
+        map_operands=None,
+        loc=None,
+        ip=None,
+    ):
+        """Creates an affine store operation.
+
+        - `value`: the value to store into the memref.
+        - `memref`: the buffer to store into.
+        - `map`: the affine map that maps the map_operands to the index of the
+          memref.
+        - `map_operands`: the list of arguments to substitute the dimensions,
+          then symbols in the affine map, in increasing order.
+        """
+        map = map if map is not None else []
+        map_operands = map_operands if map_operands is not None else []
+        indicies = [_get_op_result_or_value(op) for op in map_operands]
+        _ods_successors = None
+        super().__init__(
+            value, memref, indicies, AffineMapAttr.get(map), loc=loc, ip=ip
+        )
diff --git a/mlir/python/mlir/dialects/arith.py b/mlir/python/mlir/dialects/arith.py
index fb13beb63ca66c3..83aca0d58bf2cef 100644
--- a/mlir/python/mlir/dialects/arith.py
+++ b/mlir/python/mlir/dialects/arith.py
@@ -3,4 +3,75 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._arith_ops_gen import *
+from ._arith_ops_gen import _Dialect
 from ._arith_enum_gen import *
+
+try:
+    from ..ir import *
+    from ._ods_common import (
+        get_default_loc_context as _get_default_loc_context,
+        _cext as _ods_cext,
+    )
+
+    from typing import Any, List, Union
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+
+def _isa(obj: Any, cls: type):
+    try:
+        cls(obj)
+    except ValueError:
+        return False
+    return True
+
+
+def _is_any_of(obj: Any, classes: List[type]):
+    return any(_isa(obj, cls) for cls in classes)
+
+
+def _is_integer_like_type(type: Type):
+    return _is_any_of(type, [IntegerType, IndexType])
+
+
+def _is_float_type(type: Type):
+    return _is_any_of(type, [BF16Type, F16Type, F32Type, F64Type])
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ConstantOp(ConstantOp):
+    """Specialization for the constant op class."""
+
+    def __init__(
+        self, result: Type, value: Union[int, float, Attribute], *, loc=None, ip=None
+    ):
+        if isinstance(value, int):
+            super().__init__(IntegerAttr.get(result, value), loc=loc, ip=ip)
+        elif isinstance(value, float):
+            super().__init__(FloatAttr.get(result, value), loc=loc, ip=ip)
+        else:
+            super().__init__(value, loc=loc, ip=ip)
+
+    @classmethod
+    def create_index(cls, value: int, *, loc=None, ip=None):
+        """Create an index-typed constant."""
+        return cls(
+            IndexType.get(context=_get_default_loc_context(loc)), value, loc=loc, ip=ip
+        )
+
+    @property
+    def type(self):
+        return self.results[0].type
+
+    @property
+    def value(self):
+        return Attribute(self.operation.attributes["value"])
+
+    @property
+    def literal_value(self) -> Union[int, float]:
+        if _is_integer_like_type(self.type):
+            return IntegerAttr(self.value).value
+        elif _is_float_type(self.type):
+            return FloatAttr(self.value).value
+        else:
+            raise ValueError("only integer and float constants have literal values")
diff --git a/mlir/python/mlir/dialects/bufferization.py b/mlir/python/mlir/dialects/bufferization.py
index 759b6aa24a9ff73..0ce5448ace4b14c 100644
--- a/mlir/python/mlir/dialects/bufferization.py
+++ b/mlir/python/mlir/dialects/bufferization.py
@@ -3,4 +3,40 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._bufferization_ops_gen import *
+from ._bufferization_ops_gen import _Dialect
 from ._bufferization_enum_gen import *
+
+try:
+    from typing import Sequence, Union
+    from ..ir import *
+    from ._ods_common import get_default_loc_context, _cext as _ods_cext
+
+    from typing import Any, List, Union
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class AllocTensorOp(AllocTensorOp):
+    """Extends the bufferization.alloc_tensor op."""
+
+    def __init__(
+        self,
+        tensor_type: Type,
+        dynamic_sizes: Sequence[Value],
+        copy: Value,
+        size_hint: Value,
+        escape: BoolAttr,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        """Constructs an `alloc_tensor` with static and/or dynamic sizes."""
+        super().__init__(
+            tensor_type,
+            dynamic_sizes,
+            copy=copy,
+            size_hint=size_hint,
+            loc=loc,
+            ip=ip,
+        )
diff --git a/mlir/python/mlir/dialects/builtin.py b/mlir/python/mlir/dialects/builtin.py
index 30279e1611f99aa..b71cc2466d464b3 100644
--- a/mlir/python/mlir/dialects/builtin.py
+++ b/mlir/python/mlir/dialects/builtin.py
@@ -3,3 +3,23 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._builtin_ops_gen import *
+from ._builtin_ops_gen import _Dialect
+
+try:
+    from ..ir import *
+    from ._ods_common import _cext as _ods_cext
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ModuleOp(ModuleOp):
+    """Specialization for the module op class."""
+
+    def __init__(self, *, loc=None, ip=None):
+        super().__init__(loc=loc, ip=ip)
+        body = self.regions[0].blocks.append()
+
+    @property
+    def body(self):
+        return self.regions[0].blocks[0]
diff --git a/mlir/python/mlir/dialects/func.py b/mlir/python/mlir/dialects/func.py
index dc554c22173bc60..9c6c4c9092c7a88 100644
--- a/mlir/python/mlir/dialects/func.py
+++ b/mlir/python/mlir/dialects/func.py
@@ -3,3 +3,326 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._func_ops_gen import *
+from ._func_ops_gen import _Dialect
+
+try:
+    from ..ir import *
+    from ._ods_common import (
+        get_default_loc_context as _get_default_loc_context,
+        _cext as _ods_cext,
+    )
+
+    import inspect
+
+    from typing import Any, List, Optional, Sequence, Union
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+ARGUMENT_ATTRIBUTE_NAME = "arg_attrs"
+RESULT_ATTRIBUTE_NAME = "res_attrs"
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ConstantOp(ConstantOp):
+    """Specialization for the constant op class."""
+
+    def __init__(self, result: Type, value: Attribute, *, loc=None, ip=None):
+        super().__init__(result, value, loc=loc, ip=ip)
+
+    @property
+    def type(self):
+        return self.results[0].type
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class FuncOp(FuncOp):
+    """Specialization for the func op class."""
+
+    def __init__(
+        self, name, type, *, visibility=None, body_builder=None, loc=None, ip=None
+    ):
+        """
+        Create a FuncOp with the provided `name`, `type`, and `visibility`.
+        - `name` is a string representing the function name.
+        - `type` is either a FunctionType or a pair of list describing inputs and
+          results.
+        - `visibility` is a string matching `public`, `private`, or `nested`. None
+          implies private visibility.
+        - `body_builder` is an optional callback, when provided a new entry block
+          is created and the callback is invoked with the new op as argument within
+          an InsertionPoint context already set for the block. The callback is
+          expected to insert a terminator in the block.
+        """
+        sym_name = StringAttr.get(str(name))
+
+        # If the type is passed as a tuple, build a FunctionType on the fly.
+        if isinstance(type, tuple):
+            type = FunctionType.get(inputs=type[0], results=type[1])
+
+        type = TypeAttr.get(type)
+        sym_visibility = (
+            StringAttr.get(str(visibility)) if visibility is not None else None
+        )
+        super().__init__(sym_name, type, sym_visibility=sym_visibility, loc=loc, ip=ip)
+        if body_builder:
+            entry_block = self.add_entry_block()
+            with InsertionPoint(entry_block):
+                body_builder(self)
+
+    @property
+    def is_external(self):
+        return len(self.regions[0].blocks) == 0
+
+    @property
+    def body(self):
+        return self.regions[0]
+
+    @property
+    def type(self):
+        return FunctionType(TypeAttr(self.attributes["function_type"]).value)
+
+    @property
+    def visibility(self):
+        return self.attributes["sym_visibility"]
+
+    @property
+    def name(self) -> StringAttr:
+        return StringAttr(self.attributes["sym_name"])
+
+    @property
+    def entry_block(self):
+        if self.is_external:
+            raise IndexError("External function does not have a body")
+        return self.regions[0].blocks[0]
+
+    def add_entry_block(self, arg_locs: Optional[Sequence[Location]] = None):
+        """
+        Add an entry block to the function body using the function signature to
+        infer block arguments.
+        Returns the newly created block
+        """
+        if not self.is_external:
+            raise IndexError("The function already has an entry block!")
+        self.body.blocks.append(*self.type.inputs, arg_locs=arg_locs)
+        return self.body.blocks[0]
+
+    @property
+    def arg_attrs(self):
+        return ArrayAttr(self.attributes[ARGUMENT_ATTRIBUTE_NAME])
+
+    @arg_attrs.setter
+    def arg_attrs(self, attribute: Union[ArrayAttr, list]):
+        if isinstance(attribute, ArrayAttr):
+            self.attributes[ARGUMENT_ATTRIBUTE_NAME] = attribute
+        else:
+            self.attributes[ARGUMENT_ATTRIBUTE_NAME] = ArrayAttr.get(
+                attribute, context=self.context
+            )
+
+    @property
+    def arguments(self):
+        return self.entry_block.arguments
+
+    @property
+    def result_attrs(self):
+        return self.attributes[RESULT_ATTRIBUTE_NAME]
+
+    @result_attrs.setter
+    def result_attrs(self, attribute: ArrayAttr):
+        self.attributes[RESULT_ATTRIBUTE_NAME] = attribute
+
+    @classmethod
+    def from_py_func(
+        FuncOp,
+        *inputs: Type,
+        results: Optional[Sequence[Type]] = None,
+        name: Optional[str] = None,
+    ):
+        """Decorator to define an MLIR FuncOp specified as a python function.
+
+        Requires that an `mlir.ir.InsertionPoint` and `mlir.ir.Location` are
+        active for the current thread (i.e. established in a `with` block).
+
+        When applied as a decorator to a Python function, an entry block will
+        be constructed for the FuncOp with types as specified in `*inputs`. The
+        block arguments will be passed positionally to the Python function. In
+        addition, if the Python function accepts keyword arguments generally or
+        has a corresponding keyword argument, the following will be passed:
+          * `func_op`: The `func` op being defined.
+
+        By default, the function name will be the Python function `__name__`. This
+        can be overriden by passing the `name` argument to the decorator.
+
+        If `results` is not specified, then the decorator will implicitly
+        insert a `ReturnOp` with the `Value`'s returned from the decorated
+        function. It will also set the `FuncOp` type with the actual return
+        value types. If `results` is specified, then the decorated function
+        must return `None` and no implicit `ReturnOp` is added (nor are the result
+        types updated). The implicit behavior is intended for simple, single-block
+        cases, and users should specify result types explicitly for any complicated
+        cases.
+
+        The decorated function can further be called from Python and will insert
+        a `CallOp` at the then-current insertion point, returning either None (
+        if no return values), a unary Value (for one result), or a list of Values).
+        This mechanism cannot be used to emit recursive calls (by construction).
+        """
+
+        def decorator(f):
+            from . import func
+
+            # Introspect the callable for optional features.
+            sig = inspect.signature(f)
+            has_arg_func_op = False
+            for param in sig.parameters.values():
+                if param.kind == param.VAR_KEYWORD:
+                    has_arg_func_op = True
+                if param.name == "func_op" and (
+                    param.kind == param.POSITIONAL_OR_KEYWORD
+                    or param.kind == param.KEYWORD_ONLY
+                ):
+                    has_arg_func_op = True
+
+            # Emit the FuncOp.
+            implicit_return = results is None
+            symbol_name = name or f.__name__
+            function_type = FunctionType.get(
+                inputs=inputs, results=[] if implicit_return else results
+            )
+            func_op = FuncOp(name=symbol_name, type=function_type)
+            with InsertionPoint(func_op.add_entry_block()):
+                func_args = func_op.entry_block.arguments
+                func_kwargs = {}
+                if has_arg_func_op:
+                    func_kwargs["func_op"] = func_op
+                return_values = f(*func_args, **func_kwargs)
+                if not implicit_return:
+                    return_types = list(results)
+                    assert return_values is None, (
+                        "Capturing a python function with explicit `results=` "
+                        "requires that the wrapped function returns None."
+                    )
+                else:
+                    # Coerce return values, add ReturnOp and rewrite func type.
+                    if return_values is None:
+                        return_values = []
+                    elif isinstance(return_values, tuple):
+                        return_values = list(return_values)
+                    elif isinstance(return_values, Value):
+                        # Returning a single value is fine, coerce it into a list.
+                        return_values = [return_values]
+                    elif isinstance(return_values, OpView):
+                        # Returning a single operation is fine, coerce its results a list.
+                        return_values = return_values.operation.results
+                    elif isinstance(return_values, Operation):
+                        # Returning a single operation is fine, coerce its results a list.
+                        return_values = return_values.results
+                    else:
+                        return_values = list(return_values)
+                    func.ReturnOp(return_values)
+                    # Recompute the function type.
+                    return_types = [v.type for v in return_values]
+                    function_type = FunctionType.get(
+                        inputs=inputs, results=return_types
+                    )
+                    func_op.attributes["function_type"] = TypeAttr.get(function_type)
+
+            def emit_call_op(*call_args):
+                call_op = func.CallOp(
+                    return_types, FlatSymbolRefAttr.get(symbol_name), call_args
+                )
+                if return_types is None:
+                    return None
+                elif len(return_types) == 1:
+                    return call_op.result
+                else:
+                    return call_op.results
+
+            wrapped = emit_call_op
+            wrapped.__name__ = f.__name__
+            wrapped.func_op = func_op
+            return wrapped
+
+        return decorator
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class CallOp(CallOp):
+    """Specialization for the call op class."""
+
+    def __init__(
+        self,
+        calleeOrResults: Union[FuncOp, List[Type]],
+        argumentsOrCallee: Union[List, FlatSymbolRefAttr, str],
+        arguments: Optional[List] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        """Creates an call operation.
+
+        The constructor accepts three different forms:
+
+          1. A function op to be called followed by a list of arguments.
+          2. A list of result types, followed by the name of the function to be
+             called as string, following by a list of arguments.
+          3. A list of result types, followed by the name of the function to be
+             called as symbol reference attribute, followed by a list of arguments.
+
+        For example
+
+            f = func.FuncOp("foo", ...)
+            func.CallOp(f, [args])
+            func.CallOp([result_types], "foo", [args])
+
+        In all cases, the location and insertion point may be specified as keyword
+        arguments if not provided by the surrounding context managers.
+        """
+
+        # TODO: consider supporting constructor "overloads", e.g., through a custom
+        # or pybind-provided metaclass.
+        if isinstance(calleeOrResults, FuncOp):
+            if not isinstance(argumentsOrCallee, list):
+                raise ValueError(
+                    "when constructing a call to a function, expected "
+                    + "the second argument to be a list of call arguments, "
+                    + f"got {type(argumentsOrCallee)}"
+                )
+            if arguments is not None:
+                raise ValueError(
+                    "unexpected third argument when constructing a call"
+                    + "to a function"
+                )
+
+            super().__init__(
+                calleeOrResults.type.results,
+                FlatSymbolRefAttr.get(
+                    calleeOrResults.name.value, context=_get_default_loc_context(loc)
+                ),
+                argumentsOrCallee,
+                loc=loc,
+                ip=ip,
+            )
+            return
+
+        if isinstance(argumentsOrCallee, list):
+            raise ValueError(
+                "when constructing a call to a function by name, "
+                + "expected the second argument to be a string or a "
+                + f"FlatSymbolRefAttr, got {type(argumentsOrCallee)}"
+            )
+
+        if isinstance(argumentsOrCallee, FlatSymbolRefAttr):
+            super().__init__(
+                calleeOrResults, argumentsOrCallee, arguments, loc=loc, ip=ip
+            )
+        elif isinstance(argumentsOrCallee, str):
+            super().__init__(
+                calleeOrResults,
+                FlatSymbolRefAttr.get(
+                    argumentsOrCallee, context=_get_default_loc_context(loc)
+                ),
+                arguments,
+                loc=loc,
+                ip=ip,
+            )
diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py
index 6f9d72164429eea..f91fc8b7160089b 100644
--- a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py
+++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py
@@ -310,7 +310,7 @@ def emit_named_structured_op(
         )
 
     # Set the index attributes used to compute the indexing maps.
-    named_op = getattr(linalg, op_class_name)(ins, outs, result_types)
+    named_op = getattr(linalg, op_class_name)(result_types, ins, outs)
     for name, value in index_attrs.items():
         named_op.operation.attributes[name] = value
 
diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
index a8f8f8e0fbd68b4..19734a80a107bfe 100644
--- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
+++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py
@@ -296,35 +296,39 @@ def quantized_matmul(
 
 
 @linalg_structured_op
-def matmul_transpose_a(A=TensorDef(T1, S.K, S.N),
-                       B=TensorDef(T2, S.K, S.M),
-                       C=TensorDef(U, S.M, S.N, output=True),
-                       cast=TypeFnAttrDef(default=TypeFn.cast_signed)):
-  """Performs a matrix multiplication of two 2D inputs with lhs operand
-  transposed.
+def matmul_transpose_a(
+    A=TensorDef(T1, S.K, S.N),
+    B=TensorDef(T2, S.K, S.M),
+    C=TensorDef(U, S.M, S.N, output=True),
+    cast=TypeFnAttrDef(default=TypeFn.cast_signed),
+):
+    """Performs a matrix multiplication of two 2D inputs with lhs operand
+    transposed.
 
-  Numeric casting is performed on the operands to the inner multiply, promoting
-  them to the same data type as the accumulator/output.
-  """
-  domain(D.m, D.n, D.k)
-  implements(ContractionOpInterface)
-  C[D.m, D.n] += cast(U, A[D.k, D.m]) * cast(U, B[D.k, D.n])
+    Numeric casting is performed on the operands to the inner multiply, promoting
+    them to the same data type as the accumulator/output.
+    """
+    domain(D.m, D.n, D.k)
+    implements(ContractionOpInterface)
+    C[D.m, D.n] += cast(U, A[D.k, D.m]) * cast(U, B[D.k, D.n])
 
 
 @linalg_structured_op
-def matmul_transpose_b(A=TensorDef(T1, S.M, S.K),
-                       B=TensorDef(T2, S.N, S.K),
-                       C=TensorDef(U, S.M, S.N, output=True),
-                       cast=TypeFnAttrDef(default=TypeFn.cast_signed)):
-  """Performs a matrix multiplication of two 2D inputs with rhs operand
-  transposed.
+def matmul_transpose_b(
+    A=TensorDef(T1, S.M, S.K),
+    B=TensorDef(T2, S.N, S.K),
+    C=TensorDef(U, S.M, S.N, output=True),
+    cast=TypeFnAttrDef(default=TypeFn.cast_signed),
+):
+    """Performs a matrix multiplication of two 2D inputs with rhs operand
+    transposed.
 
-  Numeric casting is performed on the operands to the inner multiply, promoting
-  them to the same data type as the accumulator/output.
-  """
-  domain(D.m, D.n, D.k)
-  implements(ContractionOpInterface)
-  C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.n, D.k])
+    Numeric casting is performed on the operands to the inner multiply, promoting
+    them to the same data type as the accumulator/output.
+    """
+    domain(D.m, D.n, D.k)
+    implements(ContractionOpInterface)
+    C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.n, D.k])
 
 
 @linalg_structured_op
@@ -390,36 +394,41 @@ def batch_matmul(
 
 
 @linalg_structured_op
-def batch_matmul_transpose_a(A=TensorDef(T1, Batch, S.K, S.M),
-                             B=TensorDef(T2, Batch, S.K, S.N),
-                             C=TensorDef(U, Batch, S.M, S.N, output=True)):
-  """Performs a batched matrix multiplication of two 3D inputs where lhs operand
-  has its non-batch dimensions transposed.
+def batch_matmul_transpose_a(
+    A=TensorDef(T1, Batch, S.K, S.M),
+    B=TensorDef(T2, Batch, S.K, S.N),
+    C=TensorDef(U, Batch, S.M, S.N, output=True),
+):
+    """Performs a batched matrix multiplication of two 3D inputs where lhs operand
+    has its non-batch dimensions transposed.
 
-  Numeric casting is performed on the operands to the inner multiply, promoting
-  them to the same data type as the accumulator/output.
-  """
-  domain(D.b, D.m, D.n, D.k)
-  implements(ContractionOpInterface)
-  C[D.b, D.m, D.n] += TypeFn.cast_signed(U, A[D.b, D.k, D.m]) \
-                    * TypeFn.cast_signed(U, B[D.b, D.k, D.n])
+    Numeric casting is performed on the operands to the inner multiply, promoting
+    them to the same data type as the accumulator/output.
+    """
+    domain(D.b, D.m, D.n, D.k)
+    implements(ContractionOpInterface)
+    C[D.b, D.m, D.n] += TypeFn.cast_signed(U, A[D.b, D.k, D.m]) * TypeFn.cast_signed(
+        U, B[D.b, D.k, D.n]
+    )
 
 
 @linalg_structured_op
-def batch_matmul_transpose_b(A=TensorDef(T1, Batch, S.M, S.K),
-                             B=TensorDef(T2, Batch, S.N, S.K),
-                             C=TensorDef(U, Batch, S.M, S.N, output=True)):
-  """Performs a batched matrix multiplication of two 3D inputs where rhs operand
-  has its non-batch dimensions transposed.
+def batch_matmul_transpose_b(
+    A=TensorDef(T1, Batch, S.M, S.K),
+    B=TensorDef(T2, Batch, S.N, S.K),
+    C=TensorDef(U, Batch, S.M, S.N, output=True),
+):
+    """Performs a batched matrix multiplication of two 3D inputs where rhs operand
+    has its non-batch dimensions transposed.
 
-  Numeric casting is performed on the operands to the inner multiply, promoting
-  them to the same data type as the accumulator/output.
-  """
-  domain(D.b, D.m, D.n, D.k)
-  implements(ContractionOpInterface)
-  C[D.b, D.m,
-    D.n] += TypeFn.cast_signed(U, A[D.b, D.m, D.k]) * TypeFn.cast_signed(
-        U, B[D.b, D.n, D.k])
+    Numeric casting is performed on the operands to the inner multiply, promoting
+    them to the same data type as the accumulator/output.
+    """
+    domain(D.b, D.m, D.n, D.k)
+    implements(ContractionOpInterface)
+    C[D.b, D.m, D.n] += TypeFn.cast_signed(U, A[D.b, D.m, D.k]) * TypeFn.cast_signed(
+        U, B[D.b, D.n, D.k]
+    )
 
 
 @linalg_structured_op
diff --git a/mlir/python/mlir/dialects/memref.py b/mlir/python/mlir/dialects/memref.py
index 3afb6a70cb9e0db..111ad2178703d28 100644
--- a/mlir/python/mlir/dialects/memref.py
+++ b/mlir/python/mlir/dialects/memref.py
@@ -3,3 +3,41 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._memref_ops_gen import *
+from ._memref_ops_gen import _Dialect
+
+try:
+    from ..ir import *
+    from ._ods_common import (
+        get_op_result_or_value as _get_op_result_or_value,
+        get_op_results_or_values as _get_op_results_or_values,
+        _cext as _ods_cext,
+    )
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Optional, Sequence, Union
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class LoadOp(LoadOp):
+    """Specialization for the MemRef load operation."""
+
+    def __init__(
+        self,
+        memref: Union[Operation, OpView, Value],
+        indices: Optional[Union[Operation, OpView, Sequence[Value]]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        """Creates a memref load operation.
+
+        Args:
+          memref: the buffer to load from.
+          indices: the list of subscripts, may be empty for zero-dimensional
+            buffers.
+          loc: user-visible location of the operation.
+          ip: insertion point.
+        """
+        indices_resolved = [] if indices is None else _get_op_results_or_values(indices)
+        super().__init__(memref, indices_resolved, loc=loc, ip=ip)
diff --git a/mlir/python/mlir/dialects/ml_program.py b/mlir/python/mlir/dialects/ml_program.py
index a654529b4bb8843..dfb6d7f2c03b1cf 100644
--- a/mlir/python/mlir/dialects/ml_program.py
+++ b/mlir/python/mlir/dialects/ml_program.py
@@ -2,4 +2,118 @@
 #  See https://llvm.org/LICENSE.txt for license information.
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+from typing import Union
+
 from ._ml_program_ops_gen import *
+from ._ml_program_ops_gen import _Dialect
+
+try:
+    from ..ir import *
+    from ._ods_common import (
+        get_default_loc_context as _get_default_loc_context,
+        _cext as _ods_cext,
+    )
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+
+ARGUMENT_ATTRIBUTE_NAME = "arg_attrs"
+RESULT_ATTRIBUTE_NAME = "res_attrs"
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class FuncOp(FuncOp):
+    """Specialization for the func op class."""
+
+    def __init__(
+        self, name, type, *, visibility=None, body_builder=None, loc=None, ip=None
+    ):
+        """
+        Create a FuncOp with the provided `name`, `type`, and `visibility`.
+        - `name` is a string representing the function name.
+        - `type` is either a FunctionType or a pair of list describing inputs and
+          results.
+        - `visibility` is a string matching `public`, `private`, or `nested`. None
+          implies private visibility.
+        - `body_builder` is an optional callback, when provided a new entry block
+          is created and the callback is invoked with the new op as argument within
+          an InsertionPoint context already set for the block. The callback is
+          expected to insert a terminator in the block.
+        """
+        sym_name = StringAttr.get(str(name))
+
+        # If the type is passed as a tuple, build a FunctionType on the fly.
+        if isinstance(type, tuple):
+            type = FunctionType.get(inputs=type[0], results=type[1])
+
+        type = TypeAttr.get(type)
+        sym_visibility = (
+            StringAttr.get(str(visibility)) if visibility is not None else None
+        )
+        super().__init__(sym_name, type, sym_visibility=sym_visibility, loc=loc, ip=ip)
+        if body_builder:
+            entry_block = self.add_entry_block()
+            with InsertionPoint(entry_block):
+                body_builder(self)
+
+    @property
+    def is_external(self):
+        return len(self.regions[0].blocks) == 0
+
+    @property
+    def body(self):
+        return self.regions[0]
+
+    @property
+    def type(self):
+        return FunctionType(TypeAttr(self.attributes["function_type"]).value)
+
+    @property
+    def visibility(self):
+        return self.attributes["sym_visibility"]
+
+    @property
+    def name(self) -> StringAttr:
+        return StringAttr(self.attributes["sym_name"])
+
+    @property
+    def entry_block(self):
+        if self.is_external:
+            raise IndexError("External function does not have a body")
+        return self.regions[0].blocks[0]
+
+    def add_entry_block(self):
+        """
+        Add an entry block to the function body using the function signature to
+        infer block arguments.
+        Returns the newly created block
+        """
+        if not self.is_external:
+            raise IndexError("The function already has an entry block!")
+        self.body.blocks.append(*self.type.inputs)
+        return self.body.blocks[0]
+
+    @property
+    def arg_attrs(self):
+        return ArrayAttr(self.attributes[ARGUMENT_ATTRIBUTE_NAME])
+
+    @arg_attrs.setter
+    def arg_attrs(self, attribute: Union[ArrayAttr, list]):
+        if isinstance(attribute, ArrayAttr):
+            self.attributes[ARGUMENT_ATTRIBUTE_NAME] = attribute
+        else:
+            self.attributes[ARGUMENT_ATTRIBUTE_NAME] = ArrayAttr.get(
+                attribute, context=self.context
+            )
+
+    @property
+    def arguments(self):
+        return self.entry_block.arguments
+
+    @property
+    def result_attrs(self):
+        return self.attributes[RESULT_ATTRIBUTE_NAME]
+
+    @result_attrs.setter
+    def result_attrs(self, attribute: ArrayAttr):
+        self.attributes[RESULT_ATTRIBUTE_NAME] = attribute
diff --git a/mlir/python/mlir/dialects/pdl.py b/mlir/python/mlir/dialects/pdl.py
index dda2b7d6521965f..a8d9c56f4233d9e 100644
--- a/mlir/python/mlir/dialects/pdl.py
+++ b/mlir/python/mlir/dialects/pdl.py
@@ -3,4 +3,289 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._pdl_ops_gen import *
+from ._pdl_ops_gen import _Dialect
 from .._mlir_libs._mlirDialectsPDL import *
+
+
+try:
+    from ..ir import *
+    from ..dialects import pdl
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Union, Optional, Sequence, Mapping
+from ._ods_common import (
+    get_op_result_or_value as _get_value,
+    get_op_results_or_values as _get_values,
+    _cext as _ods_cext,
+)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ApplyNativeConstraintOp(ApplyNativeConstraintOp):
+    """Specialization for PDL apply native constraint op class."""
+
+    def __init__(
+        self,
+        name: Union[str, StringAttr],
+        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if args is None:
+            args = []
+        args = _get_values(args)
+        super().__init__(name, args, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ApplyNativeRewriteOp(ApplyNativeRewriteOp):
+    """Specialization for PDL apply native rewrite op class."""
+
+    def __init__(
+        self,
+        results: Sequence[Type],
+        name: Union[str, StringAttr],
+        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if args is None:
+            args = []
+        args = _get_values(args)
+        super().__init__(results, name, args, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class AttributeOp(AttributeOp):
+    """Specialization for PDL attribute op class."""
+
+    def __init__(
+        self,
+        valueType: Optional[Union[OpView, Operation, Value]] = None,
+        value: Optional[Attribute] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        valueType = valueType if valueType is None else _get_value(valueType)
+        result = pdl.AttributeType.get()
+        super().__init__(result, valueType=valueType, value=value, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class EraseOp(EraseOp):
+    """Specialization for PDL erase op class."""
+
+    def __init__(
+        self,
+        operation: Optional[Union[OpView, Operation, Value]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        operation = _get_value(operation)
+        super().__init__(operation, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class OperandOp(OperandOp):
+    """Specialization for PDL operand op class."""
+
+    def __init__(
+        self,
+        type: Optional[Union[OpView, Operation, Value]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        type = type if type is None else _get_value(type)
+        result = pdl.ValueType.get()
+        super().__init__(result, valueType=type, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class OperandsOp(OperandsOp):
+    """Specialization for PDL operands op class."""
+
+    def __init__(
+        self,
+        types: Optional[Union[OpView, Operation, Value]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        types = types if types is None else _get_value(types)
+        result = pdl.RangeType.get(pdl.ValueType.get())
+        super().__init__(result, valueType=types, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class OperationOp(OperationOp):
+    """Specialization for PDL operand op class."""
+
+    def __init__(
+        self,
+        name: Optional[Union[str, StringAttr]] = None,
+        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
+        attributes: Optional[Mapping[str, Union[OpView, Operation, Value]]] = None,
+        types: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if types is None:
+            types = []
+        if attributes is None:
+            attributes = {}
+        if args is None:
+            args = []
+        args = _get_values(args)
+        attrNames = []
+        attrValues = []
+        for attrName, attrValue in attributes.items():
+            attrNames.append(StringAttr.get(attrName))
+            attrValues.append(_get_value(attrValue))
+        attrNames = ArrayAttr.get(attrNames)
+        types = _get_values(types)
+        result = pdl.OperationType.get()
+        super().__init__(
+            result, args, attrValues, attrNames, types, opName=name, loc=loc, ip=ip
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class PatternOp(PatternOp):
+    """Specialization for PDL pattern op class."""
+
+    def __init__(
+        self,
+        benefit: Union[IntegerAttr, int],
+        name: Optional[Union[StringAttr, str]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        """Creates an PDL `pattern` operation."""
+        super().__init__(benefit, sym_name=name, loc=loc, ip=ip)
+        self.regions[0].blocks.append()
+
+    @property
+    def body(self):
+        """Return the body (block) of the pattern."""
+        return self.regions[0].blocks[0]
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ReplaceOp(ReplaceOp):
+    """Specialization for PDL replace op class."""
+
+    def __init__(
+        self,
+        op: Union[OpView, Operation, Value],
+        *,
+        with_op: Optional[Union[OpView, Operation, Value]] = None,
+        with_values: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
+        loc=None,
+        ip=None,
+    ):
+        if with_values is None:
+            with_values = []
+        op = _get_value(op)
+        with_op = with_op if with_op is None else _get_value(with_op)
+        with_values = _get_values(with_values)
+        super().__init__(op, with_values, replOperation=with_op, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ResultOp(ResultOp):
+    """Specialization for PDL result op class."""
+
+    def __init__(
+        self,
+        parent: Union[OpView, Operation, Value],
+        index: Union[IntegerAttr, int],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        parent = _get_value(parent)
+        result = pdl.ValueType.get()
+        super().__init__(result, parent, index, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ResultsOp(ResultsOp):
+    """Specialization for PDL results op class."""
+
+    def __init__(
+        self,
+        result: Type,
+        parent: Union[OpView, Operation, Value],
+        index: Optional[Union[IntegerAttr, int]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        parent = _get_value(parent)
+        super().__init__(result, parent, index=index, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class RewriteOp(RewriteOp):
+    """Specialization for PDL rewrite op class."""
+
+    def __init__(
+        self,
+        root: Optional[Union[OpView, Operation, Value]] = None,
+        name: Optional[Union[StringAttr, str]] = None,
+        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if args is None:
+            args = []
+        root = root if root is None else _get_value(root)
+        args = _get_values(args)
+        super().__init__(args, root=root, name=name, loc=loc, ip=ip)
+
+    def add_body(self):
+        """Add body (block) to the rewrite."""
+        self.regions[0].blocks.append()
+        return self.body
+
+    @property
+    def body(self):
+        """Return the body (block) of the rewrite."""
+        return self.regions[0].blocks[0]
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class TypeOp(TypeOp):
+    """Specialization for PDL type op class."""
+
+    def __init__(
+        self, constantType: Optional[Union[TypeAttr, Type]] = None, *, loc=None, ip=None
+    ):
+        result = pdl.TypeType.get()
+        super().__init__(result, constantType=constantType, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class TypesOp(TypesOp):
+    """Specialization for PDL types op class."""
+
+    def __init__(
+        self,
+        constantTypes: Optional[Sequence[Union[TypeAttr, Type]]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if constantTypes is None:
+            constantTypes = []
+        result = pdl.RangeType.get(pdl.TypeType.get())
+        super().__init__(result, constantTypes=constantTypes, loc=loc, ip=ip)
diff --git a/mlir/python/mlir/dialects/python_test.py b/mlir/python/mlir/dialects/python_test.py
index 8465af048a28056..6579e02d8549efa 100644
--- a/mlir/python/mlir/dialects/python_test.py
+++ b/mlir/python/mlir/dialects/python_test.py
@@ -3,7 +3,12 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._python_test_ops_gen import *
-from .._mlir_libs._mlirPythonTest import TestAttr, TestType, TestTensorValue, TestIntegerRankedTensorType
+from .._mlir_libs._mlirPythonTest import (
+    TestAttr,
+    TestType,
+    TestTensorValue,
+    TestIntegerRankedTensorType,
+)
 
 
 def register_python_test_dialect(context, load=True):
diff --git a/mlir/python/mlir/dialects/scf.py b/mlir/python/mlir/dialects/scf.py
index 49685ca2271fc61..43ad9f4e2d65f51 100644
--- a/mlir/python/mlir/dialects/scf.py
+++ b/mlir/python/mlir/dialects/scf.py
@@ -2,11 +2,122 @@
 #  See https://llvm.org/LICENSE.txt for license information.
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-from typing import Optional, Sequence
 
 from ._scf_ops_gen import *
+from ._scf_ops_gen import _Dialect
 from .arith import constant
-from ..ir import *
+
+try:
+    from ..ir import *
+    from ._ods_common import (
+        get_op_result_or_value as _get_op_result_or_value,
+        get_op_results_or_values as _get_op_results_or_values,
+        _cext as _ods_cext,
+    )
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Optional, Sequence, Union
+
+
+_ForOp = ForOp
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ForOp(_ForOp):
+    """Specialization for the SCF for op class."""
+
+    def __init__(
+        self,
+        lower_bound,
+        upper_bound,
+        step,
+        iter_args: Optional[Union[Operation, OpView, Sequence[Value]]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        """Creates an SCF `for` operation.
+
+        - `lower_bound` is the value to use as lower bound of the loop.
+        - `upper_bound` is the value to use as upper bound of the loop.
+        - `step` is the value to use as loop step.
+        - `iter_args` is a list of additional loop-carried arguments or an operation
+          producing them as results.
+        """
+        if iter_args is None:
+            iter_args = []
+        iter_args = _get_op_results_or_values(iter_args)
+
+        results = [arg.type for arg in iter_args]
+        super(_ForOp, self).__init__(
+            self.build_generic(
+                regions=1,
+                results=results,
+                operands=[
+                    _get_op_result_or_value(o) for o in [lower_bound, upper_bound, step]
+                ]
+                + list(iter_args),
+                loc=loc,
+                ip=ip,
+            )
+        )
+        self.regions[0].blocks.append(self.operands[0].type, *results)
+
+    @property
+    def body(self):
+        """Returns the body (block) of the loop."""
+        return self.regions[0].blocks[0]
+
+    @property
+    def induction_variable(self):
+        """Returns the induction variable of the loop."""
+        return self.body.arguments[0]
+
+    @property
+    def inner_iter_args(self):
+        """Returns the loop-carried arguments usable within the loop.
+
+        To obtain the loop-carried operands, use `iter_args`.
+        """
+        return self.body.arguments[1:]
+
+
+_IfOp = IfOp
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class IfOp(_IfOp):
+    """Specialization for the SCF if op class."""
+
+    def __init__(self, cond, results_=[], *, hasElse=False, loc=None, ip=None):
+        """Creates an SCF `if` operation.
+
+        - `cond` is a MLIR value of 'i1' type to determine which regions of code will be executed.
+        - `hasElse` determines whether the if operation has the else branch.
+        """
+        operands = []
+        operands.append(cond)
+        results = []
+        results.extend(results_)
+        super(_IfOp, self).__init__(
+            self.build_generic(
+                regions=2, results=results, operands=operands, loc=loc, ip=ip
+            )
+        )
+        self.regions[0].blocks.append(*[])
+        if hasElse:
+            self.regions[1].blocks.append(*[])
+
+    @property
+    def then_block(self):
+        """Returns the then block of the if operation."""
+        return self.regions[0].blocks[0]
+
+    @property
+    def else_block(self):
+        """Returns the else block of the if operation."""
+        return self.regions[1].blocks[0]
 
 
 def for_(
diff --git a/mlir/python/mlir/dialects/tensor.py b/mlir/python/mlir/dialects/tensor.py
index 26edf6b6436dad5..67248748eaf3ada 100644
--- a/mlir/python/mlir/dialects/tensor.py
+++ b/mlir/python/mlir/dialects/tensor.py
@@ -3,3 +3,40 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._tensor_ops_gen import *
+from ._tensor_ops_gen import _Dialect
+
+try:
+    from ..ir import *
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Sequence, Union
+from ._ods_common import _cext as _ods_cext
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class EmptyOp(EmptyOp):
+    """Extends the tensor.empty op."""
+
+    def __init__(
+        self,
+        sizes: Sequence[Union[int, Value]],
+        element_type: Type,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        """Constructs an `empty` with mixed static/dynamic sizes."""
+        # TODO: Refactor the EmptyOp to take an element type attribute and
+        # then use normal result type inference, unifying the Python and C++ side
+        # with a standard mechanism (versus stashing that in builders).
+        dynamic_sizes = []
+        static_sizes = []
+        for s in sizes:
+            if isinstance(s, int):
+                static_sizes.append(s)
+            else:
+                static_sizes.append(ShapedType.get_dynamic_size())
+                dynamic_sizes.append(s)
+        result_type = RankedTensorType.get(static_sizes, element_type)
+        super().__init__(result_type, dynamic_sizes, loc=loc, ip=ip)
diff --git a/mlir/python/mlir/dialects/transform/__init__.py b/mlir/python/mlir/dialects/transform/__init__.py
index b020ad35fcf062f..f7a2026e800aeb0 100644
--- a/mlir/python/mlir/dialects/transform/__init__.py
+++ b/mlir/python/mlir/dialects/transform/__init__.py
@@ -4,4 +4,174 @@
 
 from .._transform_enum_gen import *
 from .._transform_ops_gen import *
+from .._transform_ops_gen import _Dialect
 from ..._mlir_libs._mlirDialectsTransform import *
+
+try:
+    from ...ir import *
+    from .._ods_common import (
+        get_op_result_or_value as _get_op_result_or_value,
+        get_op_results_or_values as _get_op_results_or_values,
+        _cext as _ods_cext,
+    )
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Optional, Sequence, Union
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class CastOp(CastOp):
+    def __init__(
+        self,
+        result_type: Type,
+        target: Union[Operation, Value],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        super().__init__(result_type, _get_op_result_or_value(target), loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ApplyPatternsOp(ApplyPatternsOp):
+    def __init__(
+        self,
+        target: Union[Operation, Value, OpView],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        super().__init__(target, loc=loc, ip=ip)
+        self.regions[0].blocks.append()
+
+    @property
+    def patterns(self) -> Block:
+        return self.regions[0].blocks[0]
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class GetParentOp(GetParentOp):
+    def __init__(
+        self,
+        result_type: Type,
+        target: Union[Operation, Value],
+        *,
+        isolated_from_above: bool = False,
+        op_name: Optional[str] = None,
+        deduplicate: bool = False,
+        loc=None,
+        ip=None,
+    ):
+        super().__init__(
+            result_type,
+            _get_op_result_or_value(target),
+            isolated_from_above=isolated_from_above,
+            op_name=op_name,
+            deduplicate=deduplicate,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class MergeHandlesOp(MergeHandlesOp):
+    def __init__(
+        self,
+        handles: Sequence[Union[Operation, Value]],
+        *,
+        deduplicate: bool = False,
+        loc=None,
+        ip=None,
+    ):
+        super().__init__(
+            [_get_op_result_or_value(h) for h in handles],
+            deduplicate=deduplicate,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ReplicateOp(ReplicateOp):
+    def __init__(
+        self,
+        pattern: Union[Operation, Value],
+        handles: Sequence[Union[Operation, Value]],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        super().__init__(
+            [_get_op_result_or_value(h).type for h in handles],
+            _get_op_result_or_value(pattern),
+            [_get_op_result_or_value(h) for h in handles],
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class SequenceOp(SequenceOp):
+    def __init__(
+        self,
+        failure_propagation_mode,
+        results: Sequence[Type],
+        target: Union[Operation, Value, Type],
+        extra_bindings: Optional[
+            Union[Sequence[Value], Sequence[Type], Operation, OpView]
+        ] = None,
+    ):
+        root = (
+            _get_op_result_or_value(target)
+            if isinstance(target, (Operation, Value))
+            else None
+        )
+        root_type = root.type if not isinstance(target, Type) else target
+
+        if extra_bindings is None:
+            extra_bindings = []
+        if isinstance(extra_bindings, (Operation, OpView)):
+            extra_bindings = _get_op_results_or_values(extra_bindings)
+
+        extra_binding_types = []
+        if len(extra_bindings) != 0:
+            if isinstance(extra_bindings[0], Type):
+                extra_binding_types = extra_bindings
+                extra_bindings = []
+            else:
+                extra_binding_types = [v.type for v in extra_bindings]
+
+        super().__init__(
+            results_=results,
+            failure_propagation_mode=failure_propagation_mode,
+            root=root,
+            extra_bindings=extra_bindings,
+        )
+        self.regions[0].blocks.append(*tuple([root_type] + extra_binding_types))
+
+    @property
+    def body(self) -> Block:
+        return self.regions[0].blocks[0]
+
+    @property
+    def bodyTarget(self) -> Value:
+        return self.body.arguments[0]
+
+    @property
+    def bodyExtraArgs(self) -> BlockArgumentList:
+        return self.body.arguments[1:]
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class YieldOp(YieldOp):
+    def __init__(
+        self,
+        operands: Optional[Union[Operation, Sequence[Value]]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if operands is None:
+            operands = []
+        super().__init__(_get_op_results_or_values(operands), loc=loc, ip=ip)
diff --git a/mlir/python/mlir/dialects/transform/bufferization.py b/mlir/python/mlir/dialects/transform/bufferization.py
index eb77b746cf864fa..485a8a36b6305e9 100644
--- a/mlir/python/mlir/dialects/transform/bufferization.py
+++ b/mlir/python/mlir/dialects/transform/bufferization.py
@@ -3,3 +3,132 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from .._bufferization_transform_ops_gen import *
+from .._bufferization_transform_ops_gen import _Dialect
+
+try:
+    from ...ir import *
+    from ...dialects import transform
+    from .._ods_common import _cext as _ods_cext
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from enum import Enum
+from typing import Optional, overload, Union
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class EmptyTensorToAllocTensorOp(EmptyTensorToAllocTensorOp):
+    """Specialization for EmptyTensorToAllocTensorOp class."""
+
+    @overload
+    def __init__(
+        self,
+        transformed_type: Type,
+        target: Union[Operation, OpView, Value],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(self, target: Union[Operation, OpView, Value], *, loc=None, ip=None):
+        ...
+
+    def __init__(
+        self,
+        transformed_type_or_target: Type,
+        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(transformed_type_or_target, Type):
+            transformed_type = transformed_type_or_target
+            target = target_or_none
+        else:
+            transformed_type = transform.OperationType.get("bufferization.alloc_tensor")
+            target = transformed_type_or_target
+
+        super().__init__(
+            transformed_type,
+            target,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class OneShotBufferizeOp(OneShotBufferizeOp):
+    """Specialization for OneShotBufferizeOp class."""
+
+    @overload
+    def __init__(
+        self,
+        transformed_type: Type,
+        target: Union[Operation, OpView, Value],
+        *,
+        allow_return_allocs_from_loops: Optional[bool] = None,
+        allow_unknown_ops: Optional[bool] = None,
+        bufferize_function_boundaries: Optional[bool] = None,
+        function_boundary_type_conversion: Optional[Enum] = None,
+        memcpy_op: Optional[str] = None,
+        print_conflicts: Optional[bool] = None,
+        test_analysis_only: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        target: Union[Operation, OpView, Value],
+        *,
+        allow_return_allocs_from_loops: Optional[bool] = None,
+        allow_unknown_ops: Optional[bool] = None,
+        bufferize_function_boundaries: Optional[bool] = None,
+        function_boundary_type_conversion: Optional[Enum] = None,
+        memcpy_op: Optional[str] = None,
+        print_conflicts: Optional[bool] = None,
+        test_analysis_only: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    def __init__(
+        self,
+        transformed_type_or_target: Type,
+        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
+        *,
+        allow_return_allocs_from_loops: Optional[bool] = None,
+        allow_unknown_ops: Optional[bool] = None,
+        bufferize_function_boundaries: Optional[bool] = None,
+        function_boundary_type_conversion: Optional[Enum] = None,
+        memcpy_op: Optional[str] = None,
+        print_conflicts: Optional[bool] = None,
+        test_analysis_only: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(transformed_type_or_target, Type):
+            transformed_type = transformed_type_or_target
+            target = target_or_none
+        else:
+            transformed_type = transform.AnyOpType.get()
+            target = transformed_type_or_target
+
+        super().__init__(
+            transformed_type,
+            target,
+            allow_return_allocs_from_loops=allow_return_allocs_from_loops,
+            allow_unknown_ops=allow_unknown_ops,
+            bufferize_function_boundaries=bufferize_function_boundaries,
+            function_boundary_type_conversion=function_boundary_type_conversion,
+            memcpy_op=memcpy_op,
+            print_conflicts=print_conflicts,
+            test_analysis_only=test_analysis_only,
+            loc=loc,
+            ip=ip,
+        )
diff --git a/mlir/python/mlir/dialects/transform/gpu.py b/mlir/python/mlir/dialects/transform/gpu.py
index 8c3de0de7ea3f19..00cf0840eeae9e1 100644
--- a/mlir/python/mlir/dialects/transform/gpu.py
+++ b/mlir/python/mlir/dialects/transform/gpu.py
@@ -3,3 +3,128 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from .._gpu_transform_ops_gen import *
+from .._gpu_transform_ops_gen import _Dialect
+
+try:
+    from ...ir import *
+    from ...dialects import transform
+    from .._ods_common import _cext as _ods_cext
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Optional, Sequence, Union, overload
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class MapForallToBlocks(MapForallToBlocks):
+    """Specialization for MapForallToBlocks class."""
+
+    @overload
+    def __init__(
+        self,
+        result_type: Type,
+        target: Union[Operation, OpView, Value],
+        *,
+        grid_dims: Optional[Union[Sequence[int], Attribute]] = None,
+        generate_gpu_launch: Optional[Union[bool, Attribute]] = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        target: Union[Operation, OpView, Value],
+        *,
+        grid_dims: Optional[Union[Sequence[int], Attribute]] = None,
+        generate_gpu_launch: Optional[Union[bool, Attribute]] = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    def __init__(
+        self,
+        result_type_or_target: Union[Operation, OpView, Type, Value],
+        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
+        *,
+        grid_dims: Optional[Union[Sequence[int], Attribute]] = None,
+        generate_gpu_launch: Optional[Union[bool, Attribute]] = None,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(result_type_or_target, Type):
+            result_type = result_type_or_target
+            target = target_or_none
+        else:
+            result_type = transform.AnyOpType.get()
+            target = result_type_or_target
+
+        super().__init__(
+            result_type,
+            target,
+            grid_dims=grid_dims,
+            generate_gpu_launch=generate_gpu_launch,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class MapNestedForallToThreads(MapNestedForallToThreads):
+    """Specialization for MapNestedForallToThreads class."""
+
+    @overload
+    def __init__(
+        self,
+        result_type: Type,
+        target: Union[Operation, OpView, Value],
+        *,
+        block_dims: Optional[Sequence[int]] = None,
+        warp_size: Optional[Sequence[int]] = None,
+        sync_after_distribute: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        target: Union[Operation, OpView, Value],
+        *,
+        block_dims: Optional[Sequence[int]] = None,
+        warp_size: Optional[Sequence[int]] = None,
+        sync_after_distribute: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    def __init__(
+        self,
+        result_type_or_target: Union[Operation, OpView, Value, Type],
+        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
+        *,
+        block_dims: Optional[Union[Sequence[int], Attribute]] = None,
+        warp_size: Optional[Union[Sequence[int], Attribute]] = None,
+        sync_after_distribute: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(result_type_or_target, Type):
+            result_type = result_type_or_target
+            target = target_or_none
+        else:
+            result_type = result_type_or_target.type
+            target = result_type_or_target
+        super().__init__(
+            result_type,
+            target,
+            block_dims=block_dims,
+            warp_size=warp_size,
+            sync_after_distribute=sync_after_distribute,
+            loc=loc,
+            ip=ip,
+        )
diff --git a/mlir/python/mlir/dialects/transform/loop.py b/mlir/python/mlir/dialects/transform/loop.py
index 86f72788d86c369..6c89025f413839e 100644
--- a/mlir/python/mlir/dialects/transform/loop.py
+++ b/mlir/python/mlir/dialects/transform/loop.py
@@ -3,3 +3,143 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from .._loop_transform_ops_gen import *
+from .._loop_transform_ops_gen import _Dialect
+
+try:
+    from ...ir import *
+    from .._ods_common import (
+        get_op_result_or_value as _get_op_result_or_value,
+        _cext as _ods_cext,
+    )
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Optional, Union
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class GetParentForOp(GetParentForOp):
+    """Extension for GetParentForOp."""
+
+    def __init__(
+        self,
+        result_type: Type,
+        target: Union[Operation, Value],
+        *,
+        num_loops: Optional[int] = None,
+        ip=None,
+        loc=None,
+    ):
+        if num_loops is None:
+            num_loops = 1
+        super().__init__(
+            result_type,
+            _get_op_result_or_value(target),
+            num_loops=num_loops,
+            ip=ip,
+            loc=loc,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class LoopOutlineOp(LoopOutlineOp):
+    """Extension for LoopOutlineOp."""
+
+    def __init__(
+        self,
+        function_type: Type,
+        call_type: Type,
+        target: Union[Operation, Value],
+        *,
+        func_name: Union[str, StringAttr],
+        ip=None,
+        loc=None,
+    ):
+        super().__init__(
+            function_type,
+            call_type,
+            _get_op_result_or_value(target),
+            func_name=(
+                func_name
+                if isinstance(func_name, StringAttr)
+                else StringAttr.get(func_name)
+            ),
+            ip=ip,
+            loc=loc,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class LoopPeelOp(LoopPeelOp):
+    """Extension for LoopPeelOp."""
+
+    def __init__(
+        self,
+        main_loop_type: Type,
+        remainder_loop_type: Type,
+        target: Union[Operation, Value],
+        *,
+        fail_if_already_divisible: Union[bool, BoolAttr] = False,
+        ip=None,
+        loc=None,
+    ):
+        super().__init__(
+            main_loop_type,
+            remainder_loop_type,
+            _get_op_result_or_value(target),
+            fail_if_already_divisible=(
+                fail_if_already_divisible
+                if isinstance(fail_if_already_divisible, BoolAttr)
+                else BoolAttr.get(fail_if_already_divisible)
+            ),
+            ip=ip,
+            loc=loc,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class LoopPipelineOp(LoopPipelineOp):
+    """Extension for LoopPipelineOp."""
+
+    def __init__(
+        self,
+        result_type: Type,
+        target: Union[Operation, Value],
+        *,
+        iteration_interval: Optional[Union[int, IntegerAttr]] = None,
+        read_latency: Optional[Union[int, IntegerAttr]] = None,
+        ip=None,
+        loc=None,
+    ):
+        if iteration_interval is None:
+            iteration_interval = 1
+        if read_latency is None:
+            read_latency = 10
+        super().__init__(
+            result_type,
+            _get_op_result_or_value(target),
+            iteration_interval=iteration_interval,
+            read_latency=read_latency,
+            ip=ip,
+            loc=loc,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class LoopUnrollOp(LoopUnrollOp):
+    """Extension for LoopUnrollOp."""
+
+    def __init__(
+        self,
+        target: Union[Operation, Value],
+        *,
+        factor: Union[int, IntegerAttr],
+        ip=None,
+        loc=None,
+    ):
+        super().__init__(
+            _get_op_result_or_value(target),
+            factor=factor,
+            ip=ip,
+            loc=loc,
+        )
diff --git a/mlir/python/mlir/dialects/transform/memref.py b/mlir/python/mlir/dialects/transform/memref.py
index 1ff04ef6a60a180..56ea61eb817f89c 100644
--- a/mlir/python/mlir/dialects/transform/memref.py
+++ b/mlir/python/mlir/dialects/transform/memref.py
@@ -3,3 +3,118 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from .._memref_transform_ops_gen import *
+from .._memref_transform_ops_gen import _Dialect
+
+try:
+    from ...ir import *
+    from ...dialects import transform
+    from .._ods_common import _cext as _ods_cext
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Optional, overload, Union
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class MemRefAllocaToGlobalOp(MemRefAllocaToGlobalOp):
+    """Specialization for MemRefAllocaToGlobalOp class."""
+
+    @overload
+    def __init__(
+        self,
+        get_global_type: Type,
+        global_type: Type,
+        alloca: Union[Operation, OpView, Value],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(self, alloca: Union[Operation, OpView, Value], *, loc=None, ip=None):
+        ...
+
+    def __init__(
+        self,
+        get_global_type_or_alloca: Union[Operation, OpView, Type, Value],
+        global_type_or_none: Optional[Type] = None,
+        alloca_or_none: Optional[Union[Operation, OpView, Value]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(get_global_type_or_alloca, Type):
+            get_global_type = get_global_type_or_alloca
+            global_type = global_type_or_none
+            alloca = alloca_or_none
+        else:
+            get_global_type = transform.AnyOpType.get()
+            global_type = transform.AnyOpType.get()
+            alloca = get_global_type_or_alloca
+
+        super().__init__(
+            get_global_type,
+            global_type,
+            alloca,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class MemRefMultiBufferOp(MemRefMultiBufferOp):
+    """Specialization for MemRefMultiBufferOp class."""
+
+    @overload
+    def __init__(
+        self,
+        transformed_type: Type,
+        target: Union[Operation, OpView, Value],
+        factor: Union[int, IntegerAttr],
+        *,
+        skip_analysis: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        target: Union[Operation, OpView, Value],
+        factor: Union[int, IntegerAttr],
+        *,
+        skip_analysis: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    def __init__(
+        self,
+        transformed_type_or_target: Type,
+        target_or_factor: Union[int, IntegerAttr, Operation, OpView, Value] = None,
+        factor_or_none: Optional[Union[int, IntegerAttr]] = None,
+        *,
+        skip_analysis: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(transformed_type_or_target, Type):
+            transformed_type = transformed_type_or_target
+            target = target_or_factor
+            factor = factor_or_none
+        else:
+            transformed_type = transform.AnyOpType.get()
+            target = transformed_type_or_target
+            factor = target_or_factor
+
+        super().__init__(
+            transformed_type,
+            target,
+            factor,
+            skip_analysis=skip_analysis,
+            loc=loc,
+            ip=ip,
+        )
diff --git a/mlir/python/mlir/dialects/transform/pdl.py b/mlir/python/mlir/dialects/transform/pdl.py
index b1515287a3f1ff0..bb5fa7ffd306583 100644
--- a/mlir/python/mlir/dialects/transform/pdl.py
+++ b/mlir/python/mlir/dialects/transform/pdl.py
@@ -3,3 +3,53 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from .._transform_pdl_extension_ops_gen import *
+from .._transform_pdl_extension_ops_gen import _Dialect
+
+try:
+    from ...ir import *
+    from .._ods_common import (
+        get_op_result_or_value as _get_op_result_or_value,
+        get_op_results_or_values as _get_op_results_or_values,
+        _cext as _ods_cext,
+    )
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Union
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class PDLMatchOp(PDLMatchOp):
+    def __init__(
+        self,
+        result_type: Type,
+        target: Union[Operation, Value],
+        pattern_name: Union[Attribute, str],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        super().__init__(
+            result_type,
+            _get_op_result_or_value(target),
+            pattern_name,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class WithPDLPatternsOp(WithPDLPatternsOp):
+    def __init__(self, target: Union[Operation, Value, Type], *, loc=None, ip=None):
+        root = _get_op_result_or_value(target) if not isinstance(target, Type) else None
+        root_type = target if isinstance(target, Type) else root.type
+        super().__init__(root=root, loc=loc, ip=ip)
+        self.regions[0].blocks.append(root_type)
+
+    @property
+    def body(self) -> Block:
+        return self.regions[0].blocks[0]
+
+    @property
+    def bodyTarget(self) -> Value:
+        return self.body.arguments[0]
diff --git a/mlir/python/mlir/dialects/transform/structured.py b/mlir/python/mlir/dialects/transform/structured.py
index cb3812301dbd4b5..284c93823acbd34 100644
--- a/mlir/python/mlir/dialects/transform/structured.py
+++ b/mlir/python/mlir/dialects/transform/structured.py
@@ -3,4 +3,777 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from .._structured_transform_ops_gen import *
+from .._structured_transform_ops_gen import _Dialect
 from .._structured_transform_enum_gen import *
+
+try:
+    from ...ir import *
+    from ...dialects import transform
+    from .._ods_common import _cext as _ods_cext
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import List, Optional, Sequence, Tuple, Union, overload
+
+StaticIntLike = Union[int, IntegerAttr]
+ValueLike = Union[Operation, OpView, Value]
+MixedInt = Union[StaticIntLike, ValueLike]
+
+IntOrAttrList = Sequence[Union[IntegerAttr, int]]
+OptionalIntList = Optional[Union[ArrayAttr, IntOrAttrList]]
+
+BoolOrAttrList = Sequence[Union[BoolAttr, bool]]
+OptionalBoolList = Optional[Union[ArrayAttr, BoolOrAttrList]]
+
+MixedValues = Union[Sequence[Union[StaticIntLike, ValueLike]], ArrayAttr, ValueLike]
+
+DynamicIndexList = Sequence[Union[MixedInt, Sequence[MixedInt]]]
+
+
+def _dispatch_dynamic_index_list(
+    indices: Union[DynamicIndexList, ArrayAttr],
+) -> Tuple[List[ValueLike], Union[List[int], ArrayAttr], List[bool]]:
+    """Dispatches a list of indices to the appropriate form.
+
+    This is similar to the custom `DynamicIndexList` directive upstream:
+    provided indices may be in the form of dynamic SSA values or static values,
+    and they may be scalable (i.e., as a singleton list) or not. This function
+    dispatches each index into its respective form. It also extracts the SSA
+    values and static indices from various similar structures, respectively.
+    """
+    dynamic_indices = []
+    static_indices = [ShapedType.get_dynamic_size()] * len(indices)
+    scalable_indices = [False] * len(indices)
+
+    # ArrayAttr: Extract index values.
+    if isinstance(indices, ArrayAttr):
+        indices = [idx for idx in indices]
+
+    def process_nonscalable_index(i, index):
+        """Processes any form of non-scalable index.
+
+        Returns False if the given index was scalable and thus remains
+        unprocessed; True otherwise.
+        """
+        if isinstance(index, int):
+            static_indices[i] = index
+        elif isinstance(index, IntegerAttr):
+            static_indices[i] = index.value  # pytype: disable=attribute-error
+        elif isinstance(index, (Operation, Value, OpView)):
+            dynamic_indices.append(index)
+        else:
+            return False
+        return True
+
+    # Process each index at a time.
+    for i, index in enumerate(indices):
+        if not process_nonscalable_index(i, index):
+            # If it wasn't processed, it must be a scalable index, which is
+            # provided as a Sequence of one value, so extract and process that.
+            scalable_indices[i] = True
+            assert len(index) == 1
+            ret = process_nonscalable_index(i, index[0])
+            assert ret
+
+    return dynamic_indices, static_indices, scalable_indices
+
+
+# Dispatches `MixedValues` that all represents integers in various forms into
+# the following three categories:
+#   - `dynamic_values`: a list of `Value`s, potentially from op results;
+#   - `packed_values`: a value handle, potentially from an op result, associated
+#                      to one or more payload operations of integer type;
+#   - `static_values`: an `ArrayAttr` of `i64`s with static values, from Python
+#                      `int`s, from `IntegerAttr`s, or from an `ArrayAttr`.
+# The input is in the form for `packed_values`, only that result is set and the
+# other two are empty. Otherwise, the input can be a mix of the other two forms,
+# and for each dynamic value, a special value is added to the `static_values`.
+def _dispatch_mixed_values(
+    values: MixedValues,
+) -> Tuple[List[Value], Union[Operation, Value, OpView], DenseI64ArrayAttr]:
+    dynamic_values = []
+    packed_values = None
+    static_values = None
+    if isinstance(values, ArrayAttr):
+        static_values = values
+    elif isinstance(values, (Operation, Value, OpView)):
+        packed_values = values
+    else:
+        static_values = []
+        for size in values or []:
+            if isinstance(size, int):
+                static_values.append(size)
+            else:
+                static_values.append(ShapedType.get_dynamic_size())
+                dynamic_values.append(size)
+        static_values = DenseI64ArrayAttr.get(static_values)
+
+    return (dynamic_values, packed_values, static_values)
+
+
+def _get_value_or_attribute_value(
+    value_or_attr: Union[any, Attribute, ArrayAttr]
+) -> any:
+    if isinstance(value_or_attr, Attribute) and hasattr(value_or_attr, "value"):
+        return value_or_attr.value
+    if isinstance(value_or_attr, ArrayAttr):
+        return _get_value_list(value_or_attr)
+    return value_or_attr
+
+
+def _get_value_list(
+    sequence_or_array_attr: Union[Sequence[any], ArrayAttr]
+) -> Sequence[any]:
+    return [_get_value_or_attribute_value(v) for v in sequence_or_array_attr]
+
+
+def _get_int_array_attr(values: Optional[Union[ArrayAttr, IntOrAttrList]]) -> ArrayAttr:
+    if values is None:
+        return None
+
+    # Turn into a Python list of Python ints.
+    values = _get_value_list(values)
+
+    # Make an ArrayAttr of IntegerAttrs out of it.
+    return ArrayAttr.get(
+        [IntegerAttr.get(IntegerType.get_signless(64), v) for v in values]
+    )
+
+
+def _get_int_array_array_attr(
+    values: Optional[Union[ArrayAttr, Sequence[Union[ArrayAttr, IntOrAttrList]]]]
+) -> ArrayAttr:
+    """Creates an ArrayAttr of ArrayAttrs of IntegerAttrs.
+
+    The input has to be a collection of collection of integers, where any
+    Python Sequence and ArrayAttr are admissible collections and Python ints and
+    any IntegerAttr are admissible integers. Both levels of collections are
+    turned into ArrayAttr; the inner level is turned into IntegerAttrs of i64s.
+    If the input is None, an empty ArrayAttr is returned.
+    """
+    if values is None:
+        return None
+
+    # Make sure the outer level is a list.
+    values = _get_value_list(values)
+
+    # The inner level is now either invalid or a mixed sequence of ArrayAttrs and
+    # Sequences. Make sure the nested values are all lists.
+    values = [_get_value_list(nested) for nested in values]
+
+    # Turn each nested list into an ArrayAttr.
+    values = [_get_int_array_attr(nested) for nested in values]
+
+    # Turn the outer list into an ArrayAttr.
+    return ArrayAttr.get(values)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class BufferizeToAllocationOp(BufferizeToAllocationOp):
+    """Specialization for BufferizeToAllocationOp class."""
+
+    def __init__(
+        self,
+        target: Union[Operation, OpView, Value],
+        *,
+        memory_space: Optional[Union[int, str, Attribute]] = None,
+        memcpy_op: Optional[str] = None,
+        alloc_op: Optional[str] = None,
+        bufferize_destination_only: Optional[bool] = None,
+        loc=None,
+        ip=None,
+    ):
+        # No other types are allowed, so hard-code those here.
+        allocated_buffer_type = transform.AnyValueType.get()
+        new_ops_type = transform.AnyOpType.get()
+
+        if isinstance(memory_space, int):
+            memory_space = str(memory_space)
+        if isinstance(memory_space, str):
+            memory_space = Attribute.parse(memory_space)
+
+        super().__init__(
+            allocated_buffer_type,
+            new_ops_type,
+            target,
+            memory_space=memory_space,
+            memcpy_op=memcpy_op,
+            alloc_op=alloc_op,
+            bufferize_destination_only=bufferize_destination_only,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class DecomposeOp(DecomposeOp):
+    """Specialization for DecomposeOp class."""
+
+    def __init__(self, target: Union[Operation, Value], *, loc=None, ip=None):
+        transformed_type = transform.AnyOpType.get()
+        super().__init__(transformed_type, target, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class FuseIntoContainingOp(FuseIntoContainingOp):
+    """Specialization for FuseIntoContainingOp class."""
+
+    @overload
+    def __init__(
+        self,
+        fused_op_type: Type,
+        new_containing_op_type: Type,
+        producer_op: Union[Operation, OpView, Value],
+        containing_op: Union[Operation, OpView, Value],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        producer_op: Union[Operation, OpView, Value],
+        containing_op: Union[Operation, OpView, Value],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    def __init__(
+        self,
+        fused_op_type_or_producer_op: Union[Operation, OpView, Type, Value],
+        new_containing_op_type_or_containing_op: Union[Operation, OpView, Type, Value],
+        producer_op_or_none: Optional[Union[Operation, OpView, Value]] = None,
+        containing_op_or_none: Optional[Union[Operation, OpView, Value]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(fused_op_type_or_producer_op, Type):
+            if not isinstance(new_containing_op_type_or_containing_op, Type):
+                raise TypeError(
+                    "If 'fused_op_type_or_producer_op' is a type, then "
+                    "'new_containing_op_type_or_containing_op' is expected "
+                    "to be one as well."
+                )
+            fused_op_type = fused_op_type_or_producer_op
+            new_containing_op_type = new_containing_op_type_or_containing_op
+            producer_op = producer_op_or_none
+            containing_op = containing_op_or_none
+        else:
+            fused_op_type = transform.AnyOpType.get()
+            new_containing_op_type = transform.AnyOpType.get()
+            producer_op = fused_op_type_or_producer_op
+            containing_op = new_containing_op_type_or_containing_op
+
+        super().__init__(
+            fused_op_type,
+            new_containing_op_type,
+            producer_op,
+            containing_op,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class GeneralizeOp(GeneralizeOp):
+    """Specialization for GeneralizeOp class."""
+
+    def __init__(self, target: Union[Operation, Value], *, loc=None, ip=None):
+        transformed_type = transform.AnyOpType.get()
+        super().__init__(transformed_type, target, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class InterchangeOp(InterchangeOp):
+    """Specialization for InterchangeOp class."""
+
+    def __init__(
+        self,
+        target: Union[Operation, Value],
+        *,
+        iterator_interchange: OptionalIntList = None,
+        loc=None,
+        ip=None,
+    ):
+        transformed_type = transform.AnyOpType.get()
+        super().__init__(
+            transformed_type,
+            target,
+            iterator_interchange=iterator_interchange,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class MapCopyToThreadsOp(MapCopyToThreadsOp):
+    """Specialization for MapCopyToThreadsOp class."""
+
+    @overload
+    def __init__(
+        self,
+        forall_op_type: Type,
+        tiled_op_type: Type,
+        target: Union[Operation, OpView, Value],
+        *,
+        total_num_threads: Union[int, IntegerAttr],
+        desired_bit_alignment: Union[int, IntegerAttr],
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        target: Union[Operation, OpView, Value],
+        *,
+        total_num_threads: Union[int, IntegerAttr],
+        desired_bit_alignment: Union[int, IntegerAttr],
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    def __init__(
+        self,
+        forall_op_type_or_target: Union[Operation, OpView, Type, Value],
+        tiled_op_type_or_none: Optional[Type] = None,
+        target_or_none: Optional[Union[Operation, OpView, Value]] = None,
+        *,
+        total_num_threads: Union[int, IntegerAttr],
+        desired_bit_alignment: Union[int, IntegerAttr],
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(forall_op_type_or_target, Type):
+            forall_op_type = forall_op_type_or_target
+            tiled_op_type = tiled_op_type_or_none
+            target = target_or_none
+        else:
+            forall_op_type = transform.AnyOpType.get()
+            tiled_op_type = transform.AnyOpType.get()
+            target = forall_op_type_or_target
+
+        super().__init__(
+            forall_op_type,
+            tiled_op_type,
+            target,
+            total_num_threads=total_num_threads,
+            desired_bit_alignment=desired_bit_alignment,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class VectorizeOp(VectorizeOp):
+    """Specialization for VectorizeOp class."""
+
+    def __init__(
+        self,
+        target: Union[Operation, OpView, Value],
+        vector_sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
+        *,
+        vectorize_nd_extract: Optional[bool] = None,
+        scalable_sizes: OptionalBoolList = None,
+        static_vector_sizes: OptionalIntList = None,
+        loc=None,
+        ip=None,
+    ):
+        if (
+            scalable_sizes is None
+            and static_vector_sizes is None
+            and vector_sizes is None
+        ):
+            dynamic_vector_sizes = []
+        elif scalable_sizes is None and static_vector_sizes is None:
+            (
+                dynamic_vector_sizes,
+                static_vector_sizes,
+                scalable_sizes,
+            ) = _dispatch_dynamic_index_list(vector_sizes)
+        elif scalable_sizes is None or static_vector_sizes is None:
+            raise TypeError(
+                "'scalable_sizes' and 'static_vector_sizes' must either both "
+                "be given explicitly or both be given as part of 'vector_sizes'."
+            )
+        else:
+            dynamic_vector_sizes = vector_sizes
+
+        super().__init__(
+            target,
+            vector_sizes=dynamic_vector_sizes,
+            static_vector_sizes=static_vector_sizes,
+            scalable_sizes=scalable_sizes,
+            vectorize_nd_extract=vectorize_nd_extract,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class MatchOp(MatchOp):
+    """Specialization for MatchOp class."""
+
+    @overload
+    @classmethod
+    def match_op_names(
+        cls,
+        target: Union[Operation, Value],
+        names: Union[str, Sequence[str]],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    @classmethod
+    def match_op_names(
+        cls,
+        result_type: Type,
+        target: Union[Operation, Value],
+        names: Union[str, Sequence[str]],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @classmethod
+    def match_op_names(
+        cls,
+        result_type_or_target: Union[Type, Operation, Value],
+        target_or_names: Union[Operation, Value, Sequence[str], str],
+        names_or_none: Optional[Union[Sequence[str], str]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(result_type_or_target, Type):
+            result_type = result_type_or_target
+            target = target_or_names
+            names = names_or_none
+        else:
+            result_type = transform.AnyOpType.get()
+            target = result_type_or_target
+            names = target_or_names
+
+        if isinstance(names, str):
+            names = [names]
+
+        return cls(
+            result_type,
+            target,
+            ops=ArrayAttr.get(list(map(lambda s: StringAttr.get(s), names))),
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class MultiTileSizesOp(MultiTileSizesOp):
+    """Specialization for MultiTileSizesOp class."""
+
+    def __init__(
+        self,
+        result_type: Type,
+        target: Union[Operation, Value],
+        *,
+        dimension: Union[int, IntegerAttr],
+        target_size: Union[int, IntegerAttr],
+        divisor: Optional[Optional[Union[int, IntegerAttr]]] = None,
+        loc=None,
+        ip=None,
+    ):
+        super().__init__(
+            result_type,
+            result_type,
+            result_type,
+            target,
+            dimension=dimension,
+            target_size=target_size,
+            divisor=divisor,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class PadOp(PadOp):
+    """Specialization for PadOp class."""
+
+    def __init__(
+        self,
+        target: Union[Operation, OpView, Value],
+        *,
+        padding_values: Optional[Union[ArrayAttr, Sequence[Attribute]]] = None,
+        padding_dimensions: OptionalIntList = None,
+        pad_to_multiple_of: OptionalIntList = None,
+        pack_paddings: OptionalIntList = None,
+        transpose_paddings: Optional[
+            Union[ArrayAttr, Sequence[Union[ArrayAttr, IntOrAttrList]]]
+        ] = None,
+        copy_back_op: Optional[Union[str, StringAttr]] = None,
+        loc=None,
+        ip=None,
+    ):
+        transpose_paddings = _get_int_array_array_attr(transpose_paddings)
+
+        any_op_type = transform.AnyOpType.get()
+        super().__init__(
+            any_op_type,
+            any_op_type,
+            any_op_type,
+            target,
+            padding_values=padding_values,
+            padding_dimensions=padding_dimensions,
+            pad_to_multiple_of=pad_to_multiple_of,
+            pack_paddings=pack_paddings,
+            transpose_paddings=transpose_paddings,
+            copy_back_op=copy_back_op,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class ScalarizeOp(ScalarizeOp):
+    """Specialization for ScalarizeOp class."""
+
+    def __init__(self, target: Union[Operation, Value], *, loc=None, ip=None):
+        result_type = transform.AnyOpType.get()
+        super().__init__(result_type, target, loc=loc, ip=ip)
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class SplitOp(SplitOp):
+    """Specialization for SplitOp class."""
+
+    def __init__(
+        self,
+        target: Union[Operation, Value],
+        dimension: Union[int, Attribute],
+        split_point: Union[int, Operation, Value, Attribute],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(split_point, int):
+            static_split_point = split_point
+            dynamic_split_point = None
+        else:
+            static_split_point = ShapedType.get_dynamic_size()
+            dynamic_split_point = split_point
+
+        super().__init__(
+            target.type,
+            target.type,
+            target,
+            dimension=dimension,
+            static_split_point=static_split_point,
+            dynamic_split_point=dynamic_split_point,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class TileUsingForOp(TileUsingForOp):
+    """Specialization for TileUsingForOp class."""
+
+    @overload
+    def __init__(
+        self,
+        loop_types: Union[Type, List[Type]],
+        target: Union[Operation, Value],
+        *,
+        sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
+        interchange: OptionalIntList = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        target: Union[Operation, Value, OpView],
+        *,
+        sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
+        interchange: OptionalIntList = None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    def __init__(
+        self,
+        loop_types_or_target: Union[Type, List[Type], Operation, Value],
+        target_or_none: Optional[Union[Operation, Value, OpView]] = None,
+        *,
+        sizes: Optional[Union[DynamicIndexList, ArrayAttr]] = None,
+        interchange: OptionalIntList = None,
+        loc=None,
+        ip=None,
+    ):
+        (
+            dynamic_sizes,
+            static_sizes,
+            scalable_sizes,
+        ) = _dispatch_dynamic_index_list(sizes)
+
+        num_loops = sum(v if v == 0 else 1 for v in static_sizes)
+
+        if isinstance(loop_types_or_target, (Operation, Value, OpView)):
+            loop_types = [transform.AnyOpType.get()] * num_loops
+            target = loop_types_or_target
+            assert (
+                target_or_none is None
+            ), "Cannot construct TileUsingForOp with two targets."
+        else:
+            loop_types = (
+                ([loop_types_or_target] * num_loops)
+                if isinstance(loop_types_or_target, Type)
+                else loop_types_or_target
+            )
+            target = target_or_none
+
+        super().__init__(
+            target.type,
+            loop_types,
+            target,
+            dynamic_sizes=dynamic_sizes,
+            static_sizes=static_sizes,
+            interchange=interchange,
+            scalable_sizes=scalable_sizes,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class TileUsingForallOp(TileUsingForallOp):
+    """Specialization for TileUsingForallOp class."""
+
+    @overload
+    def __init__(
+        self,
+        loops_type: Type,
+        tiled_op_type: Type,
+        target: Union[Operation, Value, OpView],
+        *,
+        num_threads: Optional[MixedValues] = None,
+        tile_sizes: MixedValues = None,
+        mapping=None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        target: Union[Operation, Value, OpView],
+        *,
+        num_threads: Optional[MixedValues] = None,
+        tile_sizes: MixedValues = None,
+        mapping=None,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    def __init__(
+        self,
+        loops_type_or_target: Union[
+            Type, Union[Operation, Value, OpView]  # loops_type
+        ],  # target
+        tiled_op_type_or_none: Optional[Type] = None,
+        target_or_none: Optional[Union[Operation, Value, OpView]] = None,
+        *,
+        num_threads: MixedValues = None,
+        tile_sizes: MixedValues = None,
+        mapping=None,
+        loc=None,
+        ip=None,
+    ):
+        # `Type` arguments in the front are optional: add default values to front.
+        if isinstance(loops_type_or_target, Type):
+            # First overload: type arguments provided.
+            if not isinstance(tiled_op_type_or_none, Type):
+                raise TypeError(
+                    "If 'loops_type_or_target' is a type, then "
+                    "'tiled_op_type_or_none' is expected to be one as well."
+                )
+            loops_type = loops_type_or_target
+            tiled_op_type = tiled_op_type_or_none
+            target = target_or_none
+        else:
+            # Last overload: type arguments missing.
+            loops_type = transform.AnyOpType.get()
+            tiled_op_type = transform.AnyOpType.get()
+            target = loops_type_or_target
+
+        # Unpack mixed num_threads.
+        (
+            dynamic_num_threads,
+            packed_num_threads,
+            num_threads_attr,
+        ) = _dispatch_mixed_values(num_threads)
+
+        # Unpack mixed tile_sizes.
+        (
+            dynamic_tile_sizes,
+            packed_tile_sizes,
+            tile_sizes_attr,
+        ) = _dispatch_mixed_values(tile_sizes)
+
+        super().__init__(
+            loops_type,
+            tiled_op_type,
+            target=target,
+            tile_sizes=dynamic_tile_sizes,
+            packed_tile_sizes=packed_tile_sizes,
+            static_tile_sizes=tile_sizes_attr,
+            num_threads=dynamic_num_threads,
+            packed_num_threads=packed_num_threads,
+            static_num_threads=num_threads_attr,
+            mapping=mapping,
+            loc=loc,
+            ip=ip,
+        )
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class VectorizeChildrenAndApplyPatternsOp(VectorizeChildrenAndApplyPatternsOp):
+    """Specialization for VectorizeChildrenAndApplyPatternsOp class."""
+
+    def __init__(
+        self,
+        target: Union[Operation, Value],
+        *,
+        disable_multi_reduction_to_contract_patterns: bool = False,
+        disable_transfer_permutation_map_lowering_patterns: bool = False,
+        vectorize_nd_extract: bool = False,
+        vectorize_padding: bool = False,
+        loc=None,
+        ip=None,
+    ):
+        transformed_type = transform.AnyOpType.get()
+        super().__init__(
+            transformed_type,
+            target,
+            disable_multi_reduction_to_contract_patterns=disable_multi_reduction_to_contract_patterns,
+            disable_transfer_permutation_map_lowering_patterns=disable_transfer_permutation_map_lowering_patterns,
+            vectorize_nd_extract=vectorize_nd_extract,
+            vectorize_padding=vectorize_padding,
+            loc=loc,
+            ip=ip,
+        )
diff --git a/mlir/python/mlir/dialects/transform/tensor.py b/mlir/python/mlir/dialects/transform/tensor.py
index bf52255b3df7145..4eb30398f087212 100644
--- a/mlir/python/mlir/dialects/transform/tensor.py
+++ b/mlir/python/mlir/dialects/transform/tensor.py
@@ -3,3 +3,67 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from .._tensor_transform_ops_gen import *
+from .._tensor_transform_ops_gen import _Dialect
+
+try:
+    from ...ir import *
+    from ...dialects import transform
+    from .._ods_common import _cext as _ods_cext
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+from typing import Optional, overload, Union
+
+
+ at _ods_cext.register_operation(_Dialect, replace=True)
+class MakeLoopIndependentOp(MakeLoopIndependentOp):
+    """Specialization for MakeLoopIndependentOp class."""
+
+    @overload
+    def __init__(
+        self,
+        transformed_type: Type,
+        target: Union[Operation, OpView, Value],
+        num_loops: Union[int, IntegerAttr],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        target: Union[Operation, OpView, Value],
+        num_loops: Union[int, IntegerAttr],
+        *,
+        loc=None,
+        ip=None,
+    ):
+        ...
+
+    def __init__(
+        self,
+        transformed_type_or_target: Type,
+        target_or_num_loops: Union[int, IntegerAttr, Operation, OpView, Value] = None,
+        num_loops_or_none: Optional[Union[int, IntegerAttr]] = None,
+        *,
+        loc=None,
+        ip=None,
+    ):
+        if isinstance(transformed_type_or_target, Type):
+            transformed_type = transformed_type_or_target
+            target = target_or_num_loops
+            num_loops = num_loops_or_none
+        else:
+            transformed_type = transform.AnyOpType.get()
+            target = transformed_type_or_target
+            num_loops = target_or_num_loops
+
+        super().__init__(
+            transformed_type,
+            target,
+            num_loops,
+            loc=loc,
+            ip=ip,
+        )
diff --git a/mlir/python/mlir/runtime/np_to_memref.py b/mlir/python/mlir/runtime/np_to_memref.py
index 0a3b411041b2f4d..f6b706f9bc8ae24 100644
--- a/mlir/python/mlir/runtime/np_to_memref.py
+++ b/mlir/python/mlir/runtime/np_to_memref.py
@@ -114,6 +114,7 @@ def get_unranked_memref_descriptor(nparray):
     d.descriptor = ctypes.cast(ctypes.pointer(x), ctypes.c_void_p)
     return d
 
+
 def move_aligned_ptr_by_offset(aligned_ptr, offset):
     """Moves the supplied ctypes pointer ahead by `offset` elements."""
     aligned_addr = ctypes.addressof(aligned_ptr.contents)
@@ -122,6 +123,7 @@ def move_aligned_ptr_by_offset(aligned_ptr, offset):
     content_ptr = ctypes.cast(aligned_addr + shift, type(aligned_ptr))
     return content_ptr
 
+
 def unranked_memref_to_numpy(unranked_memref, np_dtype):
     """Converts unranked memrefs to numpy arrays."""
     ctp = as_ctype(np_dtype)
@@ -139,10 +141,10 @@ def unranked_memref_to_numpy(unranked_memref, np_dtype):
 
 def ranked_memref_to_numpy(ranked_memref):
     """Converts ranked memrefs to numpy arrays."""
-    content_ptr = move_aligned_ptr_by_offset(ranked_memref[0].aligned, ranked_memref[0].offset)
-    np_arr = np.ctypeslib.as_array(
-        content_ptr, shape=ranked_memref[0].shape
+    content_ptr = move_aligned_ptr_by_offset(
+        ranked_memref[0].aligned, ranked_memref[0].offset
     )
+    np_arr = np.ctypeslib.as_array(content_ptr, shape=ranked_memref[0].shape)
     strided_arr = np.lib.stride_tricks.as_strided(
         np_arr,
         np.ctypeslib.as_array(ranked_memref[0].shape),
diff --git a/mlir/test/python/dialects/arith_dialect.py b/mlir/test/python/dialects/arith_dialect.py
index f4a793aee4aa14c..6d1c5eab7589847 100644
--- a/mlir/test/python/dialects/arith_dialect.py
+++ b/mlir/test/python/dialects/arith_dialect.py
@@ -33,3 +33,16 @@ def testFastMathFlags():
             )
             # CHECK: %0 = arith.addf %cst, %cst fastmath<nnan,ninf> : f32
             print(r)
+
+
+# CHECK-LABEL: TEST: testArithValueBuilder
+ at run
+def testArithValueBuilder():
+    with Context() as ctx, Location.unknown():
+        module = Module.create()
+        f32_t = F32Type.get()
+
+        with InsertionPoint(module.body):
+            a = arith.constant(value=FloatAttr.get(f32_t, 42.42))
+            # CHECK: %cst = arith.constant 4.242000e+01 : f32
+            print(a)
diff --git a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp
index 49f3a951426d0ee..c8ef84721090ab9 100644
--- a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp
@@ -30,14 +30,9 @@ constexpr const char *fileHeader = R"Py(
 # Autogenerated by mlir-tblgen; don't manually edit.
 
 from ._ods_common import _cext as _ods_cext
-from ._ods_common import extend_opview_class as _ods_extend_opview_class, segmented_accessor as _ods_segmented_accessor, equally_sized_accessor as _ods_equally_sized_accessor, get_default_loc_context as _ods_get_default_loc_context, get_op_result_or_value as _get_op_result_or_value, get_op_results_or_values as _get_op_results_or_values, get_op_result_or_op_results as _get_op_result_or_op_results
+from ._ods_common import segmented_accessor as _ods_segmented_accessor, equally_sized_accessor as _ods_equally_sized_accessor, get_default_loc_context as _ods_get_default_loc_context, get_op_result_or_value as _get_op_result_or_value, get_op_results_or_values as _get_op_results_or_values, get_op_result_or_op_results as _get_op_result_or_op_results
 _ods_ir = _ods_cext.ir
 
-try:
-  from . import _{0}_ops_ext as _ods_ext_module
-except ImportError:
-  _ods_ext_module = None
-
 import builtins
 from typing import Sequence as _Sequence, Union as _Union
 
@@ -62,7 +57,6 @@ from ._{0}_ops_gen import _Dialect
 ///   {1} is the operation name.
 constexpr const char *opClassTemplate = R"Py(
 @_ods_cext.register_operation(_Dialect)
- at _ods_extend_opview_class(_ods_ext_module)
 class {0}(_ods_ir.OpView):
   OPERATION_NAME = "{1}"
 )Py";
@@ -301,17 +295,17 @@ static bool isODSReserved(StringRef str) {
 /// (does not change the `name` if it already is suitable) and returns the
 /// modified version.
 static std::string sanitizeName(StringRef name) {
-  std::string processed_str = name.str();
+  std::string processedStr = name.str();
   std::replace_if(
-      processed_str.begin(), processed_str.end(),
+      processedStr.begin(), processedStr.end(),
       [](char c) { return !llvm::isAlnum(c); }, '_');
 
-  if (llvm::isDigit(*processed_str.begin()))
-    return "_" + processed_str;
+  if (llvm::isDigit(*processedStr.begin()))
+    return "_" + processedStr;
 
-  if (isPythonReserved(processed_str) || isODSReserved(processed_str))
-    return processed_str + "_";
-  return processed_str;
+  if (isPythonReserved(processedStr) || isODSReserved(processedStr))
+    return processedStr + "_";
+  return processedStr;
 }
 
 static std::string attrSizedTraitForKind(const char *kind) {
@@ -853,10 +847,6 @@ populateBuilderRegions(const Operator &op,
 /// rebuild anew).
 static llvm::SmallVector<std::string> emitDefaultOpBuilder(const Operator &op,
                                                            raw_ostream &os) {
-  // If we are asked to skip default builders, comply.
-  if (op.skipDefaultBuilders())
-    return {};
-
   llvm::SmallVector<std::string> builderArgs;
   llvm::SmallVector<std::string> builderLines;
   llvm::SmallVector<std::string> operandArgNames;
@@ -989,9 +979,6 @@ static void emitRegionAccessors(const Operator &op, raw_ostream &os) {
 static void emitValueBuilder(const Operator &op,
                              llvm::SmallVector<std::string> functionArgs,
                              raw_ostream &os) {
-  // If we are asked to skip default builders, comply.
-  if (op.skipDefaultBuilders())
-    return;
   // Params with (possibly) default args.
   auto valueBuilderParams =
       llvm::map_range(functionArgs, [](const std::string &argAndMaybeDefault) {
@@ -1010,9 +997,9 @@ static void emitValueBuilder(const Operator &op,
         auto lhs = *llvm::split(arg, "=").begin();
         return (lhs + "=" + llvm::convertToSnakeFromCamelCase(lhs)).str();
       });
-  std::string name_without_dialect =
+  std::string nameWithoutDialect =
       op.getOperationName().substr(op.getOperationName().find('.') + 1);
-  os << llvm::formatv(valueBuilderTemplate, sanitizeName(name_without_dialect),
+  os << llvm::formatv(valueBuilderTemplate, sanitizeName(nameWithoutDialect),
                       op.getCppClassName(),
                       llvm::join(valueBuilderParams, ", "),
                       llvm::join(opBuilderArgs, ", "),
@@ -1051,11 +1038,8 @@ static bool emitAllOps(const llvm::RecordKeeper &records, raw_ostream &os) {
   if (clDialectName.empty())
     llvm::PrintFatalError("dialect name not provided");
 
-  bool isExtension = !clDialectExtensionName.empty();
-  os << llvm::formatv(fileHeader, isExtension
-                                      ? clDialectExtensionName.getValue()
-                                      : clDialectName.getValue());
-  if (isExtension)
+  os << fileHeader;
+  if (!clDialectExtensionName.empty())
     os << llvm::formatv(dialectExtensionTemplate, clDialectName.getValue());
   else
     os << llvm::formatv(dialectClassTemplate, clDialectName.getValue());

>From fb5047f5244d81aa89f68210a9cd34ddddcc8af4 Mon Sep 17 00:00:00 2001
From: Yinying Li <107574043+yinying-lisa-li at users.noreply.github.com>
Date: Thu, 19 Oct 2023 17:33:28 -0400
Subject: [PATCH 60/76] [mlir][sparse] Remove old syntax (#69624)

---
 .../SparseTensor/IR/SparseTensorDialect.cpp   | 109 +++---------------
 1 file changed, 18 insertions(+), 91 deletions(-)

diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 170cd00821ea6ba..b03a9140a9f1994 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -415,28 +415,6 @@ SparseTensorEncodingAttr::getStaticLvlSliceStride(Level lvl) const {
   return getStaticDimSliceStride(toOrigDim(*this, lvl));
 }
 
-const static DimLevelType validDLTs[] = {DimLevelType::Dense,
-                                         DimLevelType::TwoOutOfFour,
-                                         DimLevelType::Compressed,
-                                         DimLevelType::CompressedNu,
-                                         DimLevelType::CompressedNo,
-                                         DimLevelType::CompressedNuNo,
-                                         DimLevelType::Singleton,
-                                         DimLevelType::SingletonNu,
-                                         DimLevelType::SingletonNo,
-                                         DimLevelType::SingletonNuNo,
-                                         DimLevelType::LooseCompressed,
-                                         DimLevelType::LooseCompressedNu,
-                                         DimLevelType::LooseCompressedNo,
-                                         DimLevelType::LooseCompressedNuNo};
-
-static std::optional<DimLevelType> parseDLT(StringRef str) {
-  for (DimLevelType dlt : validDLTs)
-    if (str == toMLIRString(dlt))
-      return dlt;
-  return std::nullopt;
-}
-
 Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
 #define RETURN_ON_FAIL(stmt)                                                   \
   if (failed(stmt)) {                                                          \
@@ -459,8 +437,7 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
   unsigned posWidth = 0;
   unsigned crdWidth = 0;
   StringRef attrName;
-  SmallVector<StringRef, 6> keys = {"lvlTypes", "dimToLvl",  "posWidth",
-                                    "crdWidth", "dimSlices", "map"};
+  SmallVector<StringRef, 3> keys = {"map", "posWidth", "crdWidth"};
   while (succeeded(parser.parseOptionalKeyword(&attrName))) {
     // Detect admissible keyword.
     auto *it = find(keys, attrName);
@@ -473,81 +450,16 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
     RETURN_ON_FAIL(parser.parseEqual())
     // Dispatch on keyword.
     switch (keyWordIndex) {
-    case 0: { // lvlTypes
-      Attribute attr;
-      RETURN_ON_FAIL(parser.parseAttribute(attr));
-      auto arrayAttr = llvm::dyn_cast<ArrayAttr>(attr);
-      ERROR_IF(!arrayAttr, "expected an array for lvlTypes")
-      for (auto i : arrayAttr) {
-        auto strAttr = llvm::dyn_cast<StringAttr>(i);
-        ERROR_IF(!strAttr, "expected a string value in lvlTypes")
-        auto strVal = strAttr.getValue();
-        if (auto optDLT = parseDLT(strVal)) {
-          lvlTypes.push_back(optDLT.value());
-        } else {
-          parser.emitError(parser.getNameLoc(), "unexpected level-type: ")
-              << strVal;
-          return {};
-        }
-      }
-      break;
-    }
-    case 1: { // dimToLvl
-      Attribute attr;
-      RETURN_ON_FAIL(parser.parseAttribute(attr))
-      auto affineAttr = llvm::dyn_cast<AffineMapAttr>(attr);
-      ERROR_IF(!affineAttr, "expected an affine map for dimToLvl")
-      dimToLvl = affineAttr.getValue();
-      break;
-    }
-    case 2: { // posWidth
-      Attribute attr;
-      RETURN_ON_FAIL(parser.parseAttribute(attr))
-      auto intAttr = llvm::dyn_cast<IntegerAttr>(attr);
-      ERROR_IF(!intAttr, "expected an integral position bitwidth")
-      posWidth = intAttr.getInt();
-      break;
-    }
-    case 3: { // crdWidth
-      Attribute attr;
-      RETURN_ON_FAIL(parser.parseAttribute(attr))
-      auto intAttr = llvm::dyn_cast<IntegerAttr>(attr);
-      ERROR_IF(!intAttr, "expected an integral index bitwidth")
-      crdWidth = intAttr.getInt();
-      break;
-    }
-    case 4: { // dimSlices
-      RETURN_ON_FAIL(parser.parseLSquare())
-      // Dispatches to DimSliceAttr to skip mnemonic
-      bool finished = false;
-      while (auto attr = SparseTensorDimSliceAttr::parse(parser, nullptr)) {
-        auto sliceAttr = llvm::cast<SparseTensorDimSliceAttr>(attr);
-        dimSlices.push_back(sliceAttr);
-        if (parser.parseOptionalComma().failed()) {
-          finished = true;
-          break;
-        }
-      }
-      // Wrong when parsing slices
-      if (!finished)
-        return {};
-      RETURN_ON_FAIL(parser.parseRSquare())
-      break;
-    }
-    case 5: { // map (new STEA surface syntax)
+    case 0: { // map
       ir_detail::DimLvlMapParser cParser(parser);
       auto res = cParser.parseDimLvlMap();
       RETURN_ON_FAIL(res);
-      // TODO: use DimLvlMap directly as storage representation, rather
-      // than converting things over.
       const auto &dlm = *res;
 
-      ERROR_IF(!lvlTypes.empty(), "Cannot mix `lvlTypes` with `map`")
       const Level lvlRank = dlm.getLvlRank();
       for (Level lvl = 0; lvl < lvlRank; lvl++)
         lvlTypes.push_back(dlm.getLvlType(lvl));
 
-      ERROR_IF(!dimSlices.empty(), "Cannot mix `dimSlices` with `map`")
       const Dimension dimRank = dlm.getDimRank();
       for (Dimension dim = 0; dim < dimRank; dim++)
         dimSlices.push_back(dlm.getDimSlice(dim));
@@ -567,11 +479,26 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
         dimSlices.clear();
       }
 
-      ERROR_IF(dimToLvl, "Cannot mix `dimToLvl` with `map`")
       dimToLvl = dlm.getDimToLvlMap(parser.getContext());
       lvlToDim = dlm.getLvlToDimMap(parser.getContext());
       break;
     }
+    case 1: { // posWidth
+      Attribute attr;
+      RETURN_ON_FAIL(parser.parseAttribute(attr))
+      auto intAttr = llvm::dyn_cast<IntegerAttr>(attr);
+      ERROR_IF(!intAttr, "expected an integral position bitwidth")
+      posWidth = intAttr.getInt();
+      break;
+    }
+    case 2: { // crdWidth
+      Attribute attr;
+      RETURN_ON_FAIL(parser.parseAttribute(attr))
+      auto intAttr = llvm::dyn_cast<IntegerAttr>(attr);
+      ERROR_IF(!intAttr, "expected an integral index bitwidth")
+      crdWidth = intAttr.getInt();
+      break;
+    }
     } // switch
     // Only last item can omit the comma.
     if (parser.parseOptionalComma().failed())

>From 1d8985fc14457429c8120b09ae842250bdd225b1 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn at google.com>
Date: Thu, 19 Oct 2023 13:55:30 -0700
Subject: [PATCH 61/76] [Fuchsia] Add lldb-dap to LLDB distribution

---
 clang/cmake/caches/Fuchsia-stage2.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index 0dcc35ee1495f55..13bb5ad3546e98f 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -368,6 +368,7 @@ if(FUCHSIA_ENABLE_LLDB)
     liblldb
     lldb-server
     lldb-argdumper
+    lldb-dap
   )
   if(LLDB_ENABLE_PYTHON)
     list(APPEND _FUCHSIA_LLDB_COMPONENTS lldb-python-scripts)

>From e353cd8173db939af22a6fd90705e35fbadb01a7 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Thu, 19 Oct 2023 14:55:33 -0700
Subject: [PATCH 62/76] [RISCV] Apply `IsSignExtendingOpW = 1` on `fcvtmod.w.d`
 (#69633)

Such that RISCVOptWInstrs can eliminate the redundant sign extend.
---
 llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td |  1 +
 llvm/test/CodeGen/RISCV/opt-w-instrs.mir   | 30 ++++++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/opt-w-instrs.mir

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index 5d6e8821b85931a..6f88ff7f7ac19af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -112,6 +112,7 @@ def FROUND_D : FPUnaryOp_r_frm<0b0100001, 0b00100, FPR64, FPR64, "fround.d">,
 def FROUNDNX_D : FPUnaryOp_r_frm<0b0100001, 0b00101, FPR64, FPR64, "froundnx.d">,
                  Sched<[WriteFRoundF64, ReadFRoundF64]>;
 
+let IsSignExtendingOpW = 1 in
 def FCVTMOD_W_D
     : FPUnaryOp_r_rtz<0b1100001, 0b01000, GPR, FPR64, "fcvtmod.w.d">,
       Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
diff --git a/llvm/test/CodeGen/RISCV/opt-w-instrs.mir b/llvm/test/CodeGen/RISCV/opt-w-instrs.mir
new file mode 100644
index 000000000000000..0ecf8fd6bef33a2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/opt-w-instrs.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -mtriple=riscv64 -mattr='+d,+zfa' -verify-machineinstrs -run-pass=riscv-opt-w-instrs %s -o - | FileCheck %s --check-prefix=CHECK-ZFA
+
+---
+name:            fcvtmod_w_d
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $x11
+
+    ; CHECK-ZFA-LABEL: name: fcvtmod_w_d
+    ; CHECK-ZFA: liveins: $x10, $x11
+    ; CHECK-ZFA-NEXT: {{  $}}
+    ; CHECK-ZFA-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $x10
+    ; CHECK-ZFA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
+    ; CHECK-ZFA-NEXT: [[FCVTMOD_W_D:%[0-9]+]]:gpr = nofpexcept FCVTMOD_W_D [[COPY]], 1
+    ; CHECK-ZFA-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY1]], [[FCVTMOD_W_D]]
+    ; CHECK-ZFA-NEXT: $x10 = COPY [[ADD]]
+    ; CHECK-ZFA-NEXT: $x11 = COPY [[FCVTMOD_W_D]]
+    ; CHECK-ZFA-NEXT: PseudoRET
+    %0:fpr64 = COPY $x10
+    %1:gpr = COPY $x11
+
+    %2:gpr = nofpexcept FCVTMOD_W_D %0, 1
+    %3:gpr = ADD %1, %2
+    %4:gpr = ADDIW %2, 0
+    $x10 = COPY %3
+    $x11 = COPY %4
+    PseudoRET
+...

>From 969ba9ff1477d9dc350aa1b3f5fd23b6f6af76cd Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Thu, 19 Oct 2023 13:22:43 -0700
Subject: [PATCH 63/76] [lldb] Remove FileSpecList::GetFilesMatchingPartialPath
 (NFC)

This function is unused and unimplemented.
---
 lldb/include/lldb/Utility/FileSpecList.h | 3 ---
 lldb/source/Utility/FileSpecList.cpp     | 6 ------
 2 files changed, 9 deletions(-)

diff --git a/lldb/include/lldb/Utility/FileSpecList.h b/lldb/include/lldb/Utility/FileSpecList.h
index 14e8069f5ebd6d2..77587aa917916b8 100644
--- a/lldb/include/lldb/Utility/FileSpecList.h
+++ b/lldb/include/lldb/Utility/FileSpecList.h
@@ -200,9 +200,6 @@ class FileSpecList {
     return false;
   }
 
-  static size_t GetFilesMatchingPartialPath(const char *path, bool dir_okay,
-                                            FileSpecList &matches);
-
   const_iterator begin() const { return m_files.begin(); }
   const_iterator end() const { return m_files.end(); }
 
diff --git a/lldb/source/Utility/FileSpecList.cpp b/lldb/source/Utility/FileSpecList.cpp
index 35486fdc7eff16e..d5369ac4bbe5160 100644
--- a/lldb/source/Utility/FileSpecList.cpp
+++ b/lldb/source/Utility/FileSpecList.cpp
@@ -156,9 +156,3 @@ size_t FileSpecList::MemorySize() const {
 
 // Return the number of files in the file spec list.
 size_t FileSpecList::GetSize() const { return m_files.size(); }
-
-size_t FileSpecList::GetFilesMatchingPartialPath(const char *path,
-                                                 bool dir_okay,
-                                                 FileSpecList &matches) {
-  return 0;
-}

>From f6818528f71bc11dce82764ca0c5127f50dd15d3 Mon Sep 17 00:00:00 2001
From: Emilio Cota <ecg at google.com>
Date: Thu, 19 Oct 2023 18:35:13 -0400
Subject: [PATCH 64/76] [bazel][mlir] fixes for a2288a89

---
 .../mlir/python/BUILD.bazel                   | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
index 159957360c91bfb..c83e4cc7ada23e3 100644
--- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel
@@ -119,7 +119,6 @@ gentbl_filegroup(
 filegroup(
     name = "AffineOpsPyFiles",
     srcs = [
-        "mlir/dialects/_affine_ops_ext.py",
         "mlir/dialects/affine.py",
         ":AffineOpsPyGen",
     ],
@@ -163,7 +162,6 @@ gentbl_filegroup(
 filegroup(
     name = "BuiltinOpsPyFiles",
     srcs = [
-        "mlir/dialects/_builtin_ops_ext.py",
         "mlir/dialects/builtin.py",
         ":BuiltinOpsPyGen",
     ],
@@ -260,7 +258,6 @@ gentbl_filegroup(
 filegroup(
     name = "LinalgOpsPyFiles",
     srcs = [
-        "mlir/dialects/_linalg_ops_ext.py",
         ":LinalgOpsPyGen",
     ],
 )
@@ -361,7 +358,6 @@ gentbl_filegroup(
 filegroup(
     name = "ArithOpsPyFiles",
     srcs = [
-        "mlir/dialects/_arith_ops_ext.py",
         "mlir/dialects/arith.py",
         ":ArithOpsPyGen",
     ],
@@ -424,7 +420,6 @@ gentbl_filegroup(
 filegroup(
     name = "BufferizationOpsPyFiles",
     srcs = [
-        "mlir/dialects/_bufferization_ops_ext.py",
         "mlir/dialects/bufferization.py",
         ":BufferizationEnumPyGen",
         ":BufferizationOpsPyGen",
@@ -550,7 +545,6 @@ gentbl_filegroup(
 filegroup(
     name = "MemRefOpsPyFiles",
     srcs = [
-        "mlir/dialects/_memref_ops_ext.py",
         "mlir/dialects/memref.py",
         ":MemRefOpsPyGen",
     ],
@@ -582,7 +576,6 @@ gentbl_filegroup(
 filegroup(
     name = "MLProgramOpsPyFiles",
     srcs = [
-        "mlir/dialects/_ml_program_ops_ext.py",
         "mlir/dialects/ml_program.py",
         ":MLProgramOpsPyGen",
     ],
@@ -652,7 +645,6 @@ filegroup(
 filegroup(
     name = "PDLPyFiles",
     srcs = [
-        "mlir/dialects/_pdl_ops_ext.py",
         "mlir/dialects/pdl.py",
         ":PDLPyGen",
     ],
@@ -867,7 +859,6 @@ gentbl_filegroup(
 filegroup(
     name = "SCFPyFiles",
     srcs = [
-        "mlir/dialects/_scf_ops_ext.py",
         "mlir/dialects/scf.py",
         ":SCFPyGen",
     ],
@@ -940,7 +931,6 @@ gentbl_filegroup(
 filegroup(
     name = "FuncPyFiles",
     srcs = [
-        "mlir/dialects/_func_ops_ext.py",
         "mlir/dialects/func.py",
         ":FuncPyGen",
     ],
@@ -1040,7 +1030,6 @@ gentbl_filegroup(
 filegroup(
     name = "TensorOpsPyFiles",
     srcs = [
-        "mlir/dialects/_tensor_ops_ext.py",
         "mlir/dialects/tensor.py",
         ":TensorOpsPyGen",
     ],
@@ -1378,14 +1367,6 @@ gentbl_filegroup(
 filegroup(
     name = "TransformOpsPyFiles",
     srcs = [
-        "mlir/dialects/_bufferization_transform_ops_ext.py",
-        "mlir/dialects/_gpu_transform_ops_ext.py",
-        "mlir/dialects/_loop_transform_ops_ext.py",
-        "mlir/dialects/_memref_transform_ops_ext.py",
-        "mlir/dialects/_structured_transform_ops_ext.py",
-        "mlir/dialects/_tensor_transform_ops_ext.py",
-        "mlir/dialects/_transform_ops_ext.py",
-        "mlir/dialects/_transform_pdl_extension_ops_ext.py",
         ":BufferizationTransformOpsPyGen",
         ":GPUTransformOpsPyGen",
         ":LoopTransformOpsPyGen",

>From ff21a90e51ac3ad954df4f13adcf030a24c2a6a3 Mon Sep 17 00:00:00 2001
From: Peiming Liu <36770114+PeimingLiu at users.noreply.github.com>
Date: Thu, 19 Oct 2023 15:42:09 -0700
Subject: [PATCH 65/76] [mlir][sparse] introduce sparse_tensor.crd_translate
 operation (#69630)

---
 .../SparseTensor/IR/SparseTensorAttrDefs.td   | 22 ++++++++
 .../SparseTensor/IR/SparseTensorOps.td        | 26 +++++++++
 .../SparseTensor/IR/SparseTensorDialect.cpp   | 55 +++++++++++++++++++
 mlir/test/Dialect/SparseTensor/fold.mlir      | 18 ++++++
 mlir/test/Dialect/SparseTensor/invalid.mlir   | 34 ++++++++++++
 mlir/test/Dialect/SparseTensor/roundtrip.mlir | 22 ++++++++
 6 files changed, 177 insertions(+)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
index 47fd18a689d5a8d..b0fbbd747b76604 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
@@ -541,4 +541,26 @@ def SparseTensorSortKindAttr
                "SparseTensorSortAlgorithm"> {
 }
 
+
+//===----------------------------------------------------------------------===//
+// Sparse Tensor Coordinate Translation Direction Attribute.
+//===----------------------------------------------------------------------===//
+
+// The C++ enum for sparse tensor coordinate translation direction enum.
+def SparseTensorCrdTransDirectionEnum
+    : I32EnumAttr<"CrdTransDirectionKind", "sparse tensor coordinate translation direction", [
+        I32EnumAttrCase<"dim2lvl", 0, "dim_to_lvl">,
+        I32EnumAttrCase<"lvl2dim", 1, "lvl_to_dim">,
+      ]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = SparseTensor_Dialect.cppNamespace;
+}
+
+// The C++ enum for sparse tensor coordinate translation direction attribute.
+def SparseTensorCrdTransDirectionAttr
+    : EnumAttr<SparseTensor_Dialect, SparseTensorCrdTransDirectionEnum,
+               "CrdTransDirection"> {
+}
+
+
 #endif // SPARSETENSOR_ATTRDEFS
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index c446b84c5d34103..7209f2ef8488bcc 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -520,6 +520,32 @@ def SparseTensor_SetStorageSpecifierOp : SparseTensor_Op<"storage_specifier.set"
   let hasVerifier = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// Sparse Tensor Coordinate Translation Operation.
+//===----------------------------------------------------------------------===//
+
+def SparseTensor_CrdTranslateOp : SparseTensor_Op<"crd_translate", [Pure]>,
+    Arguments<(ins Variadic<Index>:$in_crds,
+                   SparseTensorCrdTransDirectionAttr:$direction,
+                   SparseTensorEncodingAttr:$encoder)>,
+    Results<(outs Variadic<Index>:$out_crds)> {
+  string summary = "Performs coordinate translation between level and dimension coordinate space.";
+  string description = [{
+    Performs coordinate translation between level and dimension coordinate space according
+    to the affine maps defined by $encoder.
+
+    Example:
+
+    ```mlir
+    %l0, %l1, %l2, %l3 = sparse_tensor.crd_translate dim_to_lvl [%d0, %d1] as #BSR
+                       : index, index, index, index
+    ```
+  }];
+  let assemblyFormat = "$direction `[` $in_crds `]` `as` $encoder attr-dict `:` type($out_crds)";
+  let hasVerifier = 1;
+  let hasFolder = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // Sparse Tensor Management Operations. These operations are "impure" in the
 // sense that some behavior is defined by side-effects. These operations provide
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index b03a9140a9f1994..c6e7bfaf47d04d3 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -1140,6 +1140,61 @@ bool ConvertOp::needsExtraSort() {
   return true;
 }
 
+LogicalResult CrdTranslateOp::verify() {
+  uint64_t inRank = getEncoder().getLvlRank();
+  uint64_t outRank = getEncoder().getDimRank();
+
+  if (getDirection() == CrdTransDirectionKind::dim2lvl)
+    std::swap(inRank, outRank);
+
+  if (inRank != getInCrds().size() || outRank != getOutCrds().size())
+    return emitError("Coordinate rank mismatch with encoding");
+
+  return success();
+}
+
+LogicalResult CrdTranslateOp::fold(FoldAdaptor adaptor,
+                                   SmallVectorImpl<OpFoldResult> &results) {
+  if (getEncoder().isPermutation()) {
+    AffineMap perm = getDirection() == CrdTransDirectionKind::dim2lvl
+                         ? getEncoder().getDimToLvl()
+                         : getEncoder().getLvlToDim();
+    for (AffineExpr exp : perm.getResults())
+      results.push_back(getInCrds()[exp.cast<AffineDimExpr>().getPosition()]);
+    return success();
+  }
+
+  // Fuse dim2lvl/lvl2dim pairs.
+  auto def = getInCrds()[0].getDefiningOp<CrdTranslateOp>();
+  bool sameDef = def && llvm::all_of(getInCrds(), [def](Value v) {
+                   return v.getDefiningOp() == def;
+                 });
+  if (!sameDef)
+    return failure();
+
+  bool oppositeDir = def.getDirection() != getDirection();
+  bool sameOracle =
+      def.getEncoder().getDimToLvl() == getEncoder().getDimToLvl();
+  bool sameCount = def.getNumResults() == getInCrds().size();
+  if (!oppositeDir || !sameOracle || !sameCount)
+    return failure();
+
+  // The definition produces the coordinates in the same order as the input
+  // coordinates.
+  bool sameOrder = llvm::all_of(llvm::zip_equal(def.getOutCrds(), getInCrds()),
+                                [](auto valuePair) {
+                                  auto [lhs, rhs] = valuePair;
+                                  return lhs == rhs;
+                                });
+
+  if (!sameOrder)
+    return failure();
+  // l1 = dim2lvl (lvl2dim l0)
+  // ==> l0
+  results.append(def.getInCrds().begin(), def.getInCrds().end());
+  return success();
+}
+
 LogicalResult ToPositionsOp::verify() {
   auto e = getSparseTensorEncoding(getTensor().getType());
   if (failed(lvlIsInBounds(getLevel(), getTensor())))
diff --git a/mlir/test/Dialect/SparseTensor/fold.mlir b/mlir/test/Dialect/SparseTensor/fold.mlir
index 3dd1a629c129fff..3428f6d4ae5a117 100644
--- a/mlir/test/Dialect/SparseTensor/fold.mlir
+++ b/mlir/test/Dialect/SparseTensor/fold.mlir
@@ -75,3 +75,21 @@ func.func @sparse_reorder_coo(%arg0 : tensor<?x?xf32, #COO>) -> tensor<?x?xf32,
   %ret = sparse_tensor.reorder_coo quick_sort %arg0 : tensor<?x?xf32, #COO> to tensor<?x?xf32, #COO>
   return %ret : tensor<?x?xf32, #COO>
 }
+
+
+#BSR = #sparse_tensor.encoding<{
+  map = ( i, j ) ->
+  ( i floordiv 2 : dense,
+    j floordiv 3 : compressed,
+    i mod 2      : dense,
+    j mod 3      : dense
+  )
+}>
+
+// CHECK-LABEL: func @sparse_crd_translate(
+//   CHECK-NOT:   sparse_tensor.crd_translate
+func.func @sparse_crd_translate(%arg0: index, %arg1: index) -> (index, index) {
+  %l0, %l1, %l2, %l3 = sparse_tensor.crd_translate dim_to_lvl [%arg0, %arg1] as #BSR : index, index, index, index
+  %d0, %d1 = sparse_tensor.crd_translate lvl_to_dim [%l0, %l1, %l2, %l3] as #BSR : index, index
+  return  %d0, %d1 : index, index
+}
diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir
index 805f3d161921c13..1ab1bac6b592ee7 100644
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@@ -861,3 +861,37 @@ func.func @sparse_permuted_reorder_coo(%arg0 : tensor<?x?xf32, #UnorderedCOO>) -
   %ret = sparse_tensor.reorder_coo quick_sort %arg0 : tensor<?x?xf32, #UnorderedCOO> to tensor<?x?xf64, #OrderedCOO>
   return %ret : tensor<?x?xf64, #OrderedCOO>
 }
+
+// -----
+
+#BSR = #sparse_tensor.encoding<{
+  map = ( i, j ) ->
+  ( i floordiv 2 : dense,
+    j floordiv 3 : compressed,
+    i mod 2      : dense,
+    j mod 3      : dense
+  )
+}>
+
+func.func @sparse_crd_translate(%arg0: index, %arg1: index) -> (index, index, index) {
+  // expected-error at +1 {{Coordinate rank mismatch with encoding}}
+  %l0, %l1, %l2 = sparse_tensor.crd_translate dim_to_lvl [%arg0, %arg1] as #BSR : index, index, index
+  return  %l0, %l1, %l2 : index, index, index
+}
+
+// -----
+
+#BSR = #sparse_tensor.encoding<{
+  map = ( i, j ) ->
+  ( i floordiv 2 : dense,
+    j floordiv 3 : compressed,
+    i mod 2      : dense,
+    j mod 3      : dense
+  )
+}>
+
+func.func @sparse_crd_translate(%arg0: index, %arg1: index, %arg2: index) -> (index, index, index, index) {
+  // expected-error at +1 {{Coordinate rank mismatch with encoding}}
+  %l0, %l1, %l2, %l3 = sparse_tensor.crd_translate dim_to_lvl [%arg0, %arg1, %arg2] as #BSR : index, index, index, index
+  return  %l0, %l1, %l2, %l3 : index, index, index, index
+}
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
index cbc3bb824924cdb..af9618ebe380d80 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -647,3 +647,25 @@ func.func @sparse_reorder_coo(%arg0 : tensor<?x?xf32, #UnorderedCOO>) -> tensor<
   %ret = sparse_tensor.reorder_coo quick_sort %arg0 : tensor<?x?xf32, #UnorderedCOO> to tensor<?x?xf32, #OrderedCOO>
   return %ret : tensor<?x?xf32, #OrderedCOO>
 }
+
+
+// -----
+
+#BSR = #sparse_tensor.encoding<{
+  map = ( i, j ) ->
+  ( i floordiv 2 : dense,
+    j floordiv 3 : compressed,
+    i mod 2      : dense,
+    j mod 3      : dense
+  )
+}>
+
+// CHECK-LABEL:   func.func @sparse_crd_translate(
+// CHECK-SAME:      %[[VAL_0:.*]]: index,
+// CHECK-SAME:      %[[VAL_1:.*]]: index)
+// CHECK:           %[[VAL_2:.*]]:4 = sparse_tensor.crd_translate  dim_to_lvl{{\[}}%[[VAL_0]], %[[VAL_1]]]
+// CHECK:           return %[[VAL_2]]#0, %[[VAL_2]]#1, %[[VAL_2]]#2, %[[VAL_2]]#3
+func.func @sparse_crd_translate(%arg0: index, %arg1: index) -> (index, index, index, index) {
+  %l0, %l1, %l2, %l3 = sparse_tensor.crd_translate dim_to_lvl [%arg0, %arg1] as #BSR : index, index, index, index
+  return  %l0, %l1, %l2, %l3 : index, index, index, index
+}

>From ab17ecd107670cd843a9074e4d0bc33810abcf5e Mon Sep 17 00:00:00 2001
From: ChiaHungDuan <chiahungduan at google.com>
Date: Thu, 19 Oct 2023 15:47:18 -0700
Subject: [PATCH 66/76] [scudo] Add ConditionVariable in SizeClassAllocator64
 (#69031)

This may improve the waiting of `Region->MMLock` while trying to refill
the freelist. Instead of always waiting on the completion of
`populateFreeListAndPopBatch()` or `releaseToOSMaybe()`, `pushBlocks()`
also refills the freelist. This increases the chance of earlier return
from `popBatches()`.

The support of condition variable hasn't been done for all platforms.
Therefore, add another `popBatchWithCV()` and it can be configured in
the allocator configuration by setting `Primary::UseConditionVariable`
and the desired `ConditionVariableT`.

Reviewed By: cferris

Differential Revision: https://reviews.llvm.org/D156146
---
 .../lib/scudo/standalone/CMakeLists.txt       |   4 +
 .../lib/scudo/standalone/allocator_config.h   |   9 ++
 .../lib/scudo/standalone/condition_variable.h |  60 ++++++++
 .../standalone/condition_variable_base.h      |  56 ++++++++
 .../standalone/condition_variable_linux.cpp   |  52 +++++++
 .../standalone/condition_variable_linux.h     |  38 +++++
 compiler-rt/lib/scudo/standalone/primary64.h  | 135 +++++++++++++++---
 .../lib/scudo/standalone/tests/CMakeLists.txt |   1 +
 .../scudo/standalone/tests/combined_test.cpp  |  51 ++++++-
 .../tests/condition_variable_test.cpp         |  59 ++++++++
 .../scudo/standalone/tests/primary_test.cpp   |  32 ++++-
 11 files changed, 476 insertions(+), 21 deletions(-)
 create mode 100644 compiler-rt/lib/scudo/standalone/condition_variable.h
 create mode 100644 compiler-rt/lib/scudo/standalone/condition_variable_base.h
 create mode 100644 compiler-rt/lib/scudo/standalone/condition_variable_linux.cpp
 create mode 100644 compiler-rt/lib/scudo/standalone/condition_variable_linux.h
 create mode 100644 compiler-rt/lib/scudo/standalone/tests/condition_variable_test.cpp

diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt
index ba699f6a67c670a..60092005cc33bbf 100644
--- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt
+++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt
@@ -62,6 +62,9 @@ set(SCUDO_HEADERS
   bytemap.h
   checksum.h
   chunk.h
+  condition_variable.h
+  condition_variable_base.h
+  condition_variable_linux.h
   combined.h
   common.h
   flags_parser.h
@@ -104,6 +107,7 @@ set(SCUDO_HEADERS
 set(SCUDO_SOURCES
   checksum.cpp
   common.cpp
+  condition_variable_linux.cpp
   crc32_hw.cpp
   flags_parser.cpp
   flags.cpp
diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.h b/compiler-rt/lib/scudo/standalone/allocator_config.h
index 44c1ac5f74a2f2f..3c6aa3acb0e45b4 100644
--- a/compiler-rt/lib/scudo/standalone/allocator_config.h
+++ b/compiler-rt/lib/scudo/standalone/allocator_config.h
@@ -11,6 +11,7 @@
 
 #include "combined.h"
 #include "common.h"
+#include "condition_variable.h"
 #include "flags.h"
 #include "primary32.h"
 #include "primary64.h"
@@ -82,6 +83,14 @@ namespace scudo {
 //     // Defines the minimal & maximal release interval that can be set.
 //     static const s32 MinReleaseToOsIntervalMs = INT32_MIN;
 //     static const s32 MaxReleaseToOsIntervalMs = INT32_MAX;
+//
+//     // Use condition variable to shorten the waiting time of refillment of
+//     // freelist. Note that this depends on the implementation of condition
+//     // variable on each platform and the performance may vary so that it
+//     // doesn't guarantee a performance benefit.
+//     // Note that both variables have to be defined to enable it.
+//     static const bool UseConditionVariable = true;
+//     using ConditionVariableT = ConditionVariableLinux;
 //   };
 //   // Defines the type of Primary allocator to use.
 //   template <typename Config> using PrimaryT = SizeClassAllocator64<Config>;
diff --git a/compiler-rt/lib/scudo/standalone/condition_variable.h b/compiler-rt/lib/scudo/standalone/condition_variable.h
new file mode 100644
index 000000000000000..549f6e9f787bad9
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/condition_variable.h
@@ -0,0 +1,60 @@
+//===-- condition_variable.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_CONDITION_VARIABLE_H_
+#define SCUDO_CONDITION_VARIABLE_H_
+
+#include "condition_variable_base.h"
+
+#include "common.h"
+#include "platform.h"
+
+#include "condition_variable_linux.h"
+
+namespace scudo {
+
+// A default implementation of default condition variable. It doesn't do a real
+// `wait`, instead it spins a short amount of time only.
+class ConditionVariableDummy
+    : public ConditionVariableBase<ConditionVariableDummy> {
+public:
+  void notifyAllImpl(UNUSED HybridMutex &M) REQUIRES(M) {}
+
+  void waitImpl(UNUSED HybridMutex &M) REQUIRES(M) {
+    M.unlock();
+
+    constexpr u32 SpinTimes = 64;
+    volatile u32 V = 0;
+    for (u32 I = 0; I < SpinTimes; ++I) {
+      u32 Tmp = V + 1;
+      V = Tmp;
+    }
+
+    M.lock();
+  }
+};
+
+template <typename Config, typename = const bool>
+struct ConditionVariableState {
+  static constexpr bool enabled() { return false; }
+  // This is only used for compilation purpose so that we won't end up having
+  // many conditional compilations. If you want to use `ConditionVariableDummy`,
+  // define `ConditionVariableT` in your allocator configuration. See
+  // allocator_config.h for more details.
+  using ConditionVariableT = ConditionVariableDummy;
+};
+
+template <typename Config>
+struct ConditionVariableState<Config, decltype(Config::UseConditionVariable)> {
+  static constexpr bool enabled() { return true; }
+  using ConditionVariableT = typename Config::ConditionVariableT;
+};
+
+} // namespace scudo
+
+#endif // SCUDO_CONDITION_VARIABLE_H_
diff --git a/compiler-rt/lib/scudo/standalone/condition_variable_base.h b/compiler-rt/lib/scudo/standalone/condition_variable_base.h
new file mode 100644
index 000000000000000..416c327fed49ef8
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/condition_variable_base.h
@@ -0,0 +1,56 @@
+//===-- condition_variable_base.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_CONDITION_VARIABLE_BASE_H_
+#define SCUDO_CONDITION_VARIABLE_BASE_H_
+
+#include "mutex.h"
+#include "thread_annotations.h"
+
+namespace scudo {
+
+template <typename Derived> class ConditionVariableBase {
+public:
+  constexpr ConditionVariableBase() = default;
+
+  void bindTestOnly(HybridMutex &Mutex) {
+#if SCUDO_DEBUG
+    boundMutex = &Mutex;
+#else
+    (void)Mutex;
+#endif
+  }
+
+  void notifyAll(HybridMutex &M) REQUIRES(M) {
+#if SCUDO_DEBUG
+    CHECK_EQ(&M, boundMutex);
+#endif
+    getDerived()->notifyAllImpl(M);
+  }
+
+  void wait(HybridMutex &M) REQUIRES(M) {
+#if SCUDO_DEBUG
+    CHECK_EQ(&M, boundMutex);
+#endif
+    getDerived()->waitImpl(M);
+  }
+
+protected:
+  Derived *getDerived() { return static_cast<Derived *>(this); }
+
+#if SCUDO_DEBUG
+  // Because thread-safety analysis doesn't support pointer aliasing, we are not
+  // able to mark the proper annotations without false positive. Instead, we
+  // pass the lock and do the same-lock check separately.
+  HybridMutex *boundMutex = nullptr;
+#endif
+};
+
+} // namespace scudo
+
+#endif // SCUDO_CONDITION_VARIABLE_BASE_H_
diff --git a/compiler-rt/lib/scudo/standalone/condition_variable_linux.cpp b/compiler-rt/lib/scudo/standalone/condition_variable_linux.cpp
new file mode 100644
index 000000000000000..e6d9bd1771a4b8b
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/condition_variable_linux.cpp
@@ -0,0 +1,52 @@
+//===-- condition_variable_linux.cpp ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "platform.h"
+
+#if SCUDO_LINUX
+
+#include "condition_variable_linux.h"
+
+#include "atomic_helpers.h"
+
+#include <limits.h>
+#include <linux/futex.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+namespace scudo {
+
+void ConditionVariableLinux::notifyAllImpl(UNUSED HybridMutex &M) {
+  const u32 V = atomic_load_relaxed(&Counter);
+  atomic_store_relaxed(&Counter, V + 1);
+
+  // TODO(chiahungduan): Move the waiters from the futex waiting queue
+  // `Counter` to futex waiting queue `M` so that the awoken threads won't be
+  // blocked again due to locked `M` by current thread.
+  if (LastNotifyAll != V) {
+    syscall(SYS_futex, reinterpret_cast<uptr>(&Counter), FUTEX_WAKE_PRIVATE,
+            INT_MAX, nullptr, nullptr, 0);
+  }
+
+  LastNotifyAll = V + 1;
+}
+
+void ConditionVariableLinux::waitImpl(HybridMutex &M) {
+  const u32 V = atomic_load_relaxed(&Counter) + 1;
+  atomic_store_relaxed(&Counter, V);
+
+  // TODO: Use ScopedUnlock when it's supported.
+  M.unlock();
+  syscall(SYS_futex, reinterpret_cast<uptr>(&Counter), FUTEX_WAIT_PRIVATE, V,
+          nullptr, nullptr, 0);
+  M.lock();
+}
+
+} // namespace scudo
+
+#endif // SCUDO_LINUX
diff --git a/compiler-rt/lib/scudo/standalone/condition_variable_linux.h b/compiler-rt/lib/scudo/standalone/condition_variable_linux.h
new file mode 100644
index 000000000000000..cd073287326d9be
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/condition_variable_linux.h
@@ -0,0 +1,38 @@
+//===-- condition_variable_linux.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_CONDITION_VARIABLE_LINUX_H_
+#define SCUDO_CONDITION_VARIABLE_LINUX_H_
+
+#include "platform.h"
+
+#if SCUDO_LINUX
+
+#include "atomic_helpers.h"
+#include "condition_variable_base.h"
+#include "thread_annotations.h"
+
+namespace scudo {
+
+class ConditionVariableLinux
+    : public ConditionVariableBase<ConditionVariableLinux> {
+public:
+  void notifyAllImpl(HybridMutex &M) REQUIRES(M);
+
+  void waitImpl(HybridMutex &M) REQUIRES(M);
+
+private:
+  u32 LastNotifyAll = 0;
+  atomic_u32 Counter = {};
+};
+
+} // namespace scudo
+
+#endif // SCUDO_LINUX
+
+#endif // SCUDO_CONDITION_VARIABLE_LINUX_H_
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 6e5ab7e3ba7965d..0f4ba88ee1f4b9d 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -22,6 +22,8 @@
 #include "string_utils.h"
 #include "thread_annotations.h"
 
+#include "condition_variable.h"
+
 namespace scudo {
 
 // SizeClassAllocator64 is an allocator tuned for 64-bit address space.
@@ -48,6 +50,8 @@ template <typename Config> class SizeClassAllocator64 {
 public:
   typedef typename Config::Primary::CompactPtrT CompactPtrT;
   typedef typename Config::Primary::SizeClassMap SizeClassMap;
+  typedef typename ConditionVariableState<
+      typename Config::Primary>::ConditionVariableT ConditionVariableT;
   static const uptr CompactPtrScale = Config::Primary::CompactPtrScale;
   static const uptr RegionSizeLog = Config::Primary::RegionSizeLog;
   static const uptr GroupSizeLog = Config::Primary::GroupSizeLog;
@@ -70,6 +74,10 @@ template <typename Config> class SizeClassAllocator64 {
 
   static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; }
 
+  static bool conditionVariableEnabled() {
+    return ConditionVariableState<typename Config::Primary>::enabled();
+  }
+
   void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS {
     DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT)));
 
@@ -124,6 +132,7 @@ template <typename Config> class SizeClassAllocator64 {
 
     for (uptr I = 0; I < NumClasses; I++) {
       RegionInfo *Region = getRegionInfo(I);
+
       // The actual start of a region is offset by a random number of pages
       // when PrimaryEnableRandomOffset is set.
       Region->RegionBeg = (PrimaryBase + (I << RegionSizeLog)) +
@@ -145,6 +154,11 @@ template <typename Config> class SizeClassAllocator64 {
     }
     shuffle(RegionInfoArray, NumClasses, &Seed);
 
+    // The binding should be done after region shuffling so that it won't bind
+    // the FLLock from the wrong region.
+    for (uptr I = 0; I < NumClasses; I++)
+      getRegionInfo(I)->FLLockCV.bindTestOnly(getRegionInfo(I)->FLLock);
+
     setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
   }
 
@@ -236,26 +250,26 @@ template <typename Config> class SizeClassAllocator64 {
     bool ReportRegionExhausted = false;
     TransferBatchT *B = nullptr;
 
-    while (true) {
-      // When two threads compete for `Region->MMLock`, we only want one of them
-      // does the populateFreeListAndPopBatch(). To avoid both of them doing
-      // that, always check the freelist before mapping new pages.
-      //
-      // TODO(chiahungduan): Use a condition variable so that we don't need to
-      // hold `Region->MMLock` here.
-      ScopedLock ML(Region->MMLock);
-      {
-        ScopedLock FL(Region->FLLock);
-        B = popBatchImpl(C, ClassId, Region);
-        if (LIKELY(B))
-          return B;
-      }
+    if (conditionVariableEnabled()) {
+      B = popBatchWithCV(C, ClassId, Region, ReportRegionExhausted);
+    } else {
+      while (true) {
+        // When two threads compete for `Region->MMLock`, we only want one of
+        // them to call populateFreeListAndPopBatch(). To avoid both of them
+        // doing that, always check the freelist before mapping new pages.
+        ScopedLock ML(Region->MMLock);
+        {
+          ScopedLock FL(Region->FLLock);
+          if ((B = popBatchImpl(C, ClassId, Region)))
+            break;
+        }
 
-      const bool RegionIsExhausted = Region->Exhausted;
-      if (!RegionIsExhausted)
-        B = populateFreeListAndPopBatch(C, ClassId, Region);
-      ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
-      break;
+        const bool RegionIsExhausted = Region->Exhausted;
+        if (!RegionIsExhausted)
+          B = populateFreeListAndPopBatch(C, ClassId, Region);
+        ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
+        break;
+      }
     }
 
     if (UNLIKELY(ReportRegionExhausted)) {
@@ -280,6 +294,8 @@ template <typename Config> class SizeClassAllocator64 {
     if (ClassId == SizeClassMap::BatchClassId) {
       ScopedLock L(Region->FLLock);
       pushBatchClassBlocks(Region, Array, Size);
+      if (conditionVariableEnabled())
+        Region->FLLockCV.notifyAll(Region->FLLock);
       return;
     }
 
@@ -306,6 +322,8 @@ template <typename Config> class SizeClassAllocator64 {
     {
       ScopedLock L(Region->FLLock);
       pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup);
+      if (conditionVariableEnabled())
+        Region->FLLockCV.notifyAll(Region->FLLock);
     }
   }
 
@@ -538,6 +556,7 @@ template <typename Config> class SizeClassAllocator64 {
   struct UnpaddedRegionInfo {
     // Mutex for operations on freelist
     HybridMutex FLLock;
+    ConditionVariableT FLLockCV GUARDED_BY(FLLock);
     // Mutex for memmap operations
     HybridMutex MMLock ACQUIRED_BEFORE(FLLock);
     // `RegionBeg` is initialized before thread creation and won't be changed.
@@ -549,6 +568,7 @@ template <typename Config> class SizeClassAllocator64 {
     uptr TryReleaseThreshold GUARDED_BY(MMLock) = 0;
     ReleaseToOsInfo ReleaseInfo GUARDED_BY(MMLock) = {};
     bool Exhausted GUARDED_BY(MMLock) = false;
+    bool isPopulatingFreeList GUARDED_BY(FLLock) = false;
   };
   struct RegionInfo : UnpaddedRegionInfo {
     char Padding[SCUDO_CACHE_LINE_SIZE -
@@ -831,6 +851,76 @@ template <typename Config> class SizeClassAllocator64 {
     InsertBlocks(Cur, Array + Size - Count, Count);
   }
 
+  TransferBatchT *popBatchWithCV(CacheT *C, uptr ClassId, RegionInfo *Region,
+                                 bool &ReportRegionExhausted) {
+    TransferBatchT *B = nullptr;
+
+    while (true) {
+      // We only expect one thread doing the freelist refillment and other
+      // threads will be waiting for either the completion of the
+      // `populateFreeListAndPopBatch()` or `pushBlocks()` called by other
+      // threads.
+      bool PopulateFreeList = false;
+      {
+        ScopedLock FL(Region->FLLock);
+        if (!Region->isPopulatingFreeList) {
+          Region->isPopulatingFreeList = true;
+          PopulateFreeList = true;
+        }
+      }
+
+      if (PopulateFreeList) {
+        ScopedLock ML(Region->MMLock);
+
+        const bool RegionIsExhausted = Region->Exhausted;
+        if (!RegionIsExhausted)
+          B = populateFreeListAndPopBatch(C, ClassId, Region);
+        ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
+
+        {
+          // Before reacquiring the `FLLock`, the freelist may be used up again
+          // and some threads are waiting for the freelist refillment by the
+          // current thread. It's important to set
+          // `Region->isPopulatingFreeList` to false so the threads about to
+          // sleep will notice the status change.
+          ScopedLock FL(Region->FLLock);
+          Region->isPopulatingFreeList = false;
+          Region->FLLockCV.notifyAll(Region->FLLock);
+        }
+
+        break;
+      }
+
+      // At here, there are two preconditions to be met before waiting,
+      //   1. The freelist is empty.
+      //   2. Region->isPopulatingFreeList == true, i.e, someone is still doing
+      //   `populateFreeListAndPopBatch()`.
+      //
+      // Note that it has the chance that freelist is empty but
+      // Region->isPopulatingFreeList == false because all the new populated
+      // blocks were used up right after the refillment. Therefore, we have to
+      // check if someone is still populating the freelist.
+      ScopedLock FL(Region->FLLock);
+      if (LIKELY(B = popBatchImpl(C, ClassId, Region)))
+        break;
+
+      if (!Region->isPopulatingFreeList)
+        continue;
+
+      // Now the freelist is empty and someone's doing the refillment. We will
+      // wait until anyone refills the freelist or someone finishes doing
+      // `populateFreeListAndPopBatch()`. The refillment can be done by
+      // `populateFreeListAndPopBatch()`, `pushBlocks()`,
+      // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`.
+      Region->FLLockCV.wait(Region->FLLock);
+
+      if (LIKELY(B = popBatchImpl(C, ClassId, Region)))
+        break;
+    }
+
+    return B;
+  }
+
   // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
   // group id will be considered first.
   //
@@ -1521,6 +1611,8 @@ template <typename Config> class SizeClassAllocator64 {
         if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) {
           ScopedLock L(BatchClassRegion->FLLock);
           pushBatchClassBlocks(BatchClassRegion, Blocks, Idx);
+          if (conditionVariableEnabled())
+            BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock);
           Idx = 0;
         }
         Blocks[Idx++] =
@@ -1556,6 +1648,8 @@ template <typename Config> class SizeClassAllocator64 {
     if (Idx != 0) {
       ScopedLock L(BatchClassRegion->FLLock);
       pushBatchClassBlocks(BatchClassRegion, Blocks, Idx);
+      if (conditionVariableEnabled())
+        BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock);
     }
 
     if (SCUDO_DEBUG) {
@@ -1565,6 +1659,9 @@ template <typename Config> class SizeClassAllocator64 {
         CHECK_LT(Prev->CompactPtrGroupBase, Cur->CompactPtrGroupBase);
       }
     }
+
+    if (conditionVariableEnabled())
+      Region->FLLockCV.notifyAll(Region->FLLock);
   }
 
   // TODO: `PrimaryBase` can be obtained from ReservedMemory. This needs to be
diff --git a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt
index a4a031d54d7c3ad..c6b6a1cb57ceea6 100644
--- a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt
+++ b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt
@@ -96,6 +96,7 @@ set(SCUDO_UNIT_TEST_SOURCES
   chunk_test.cpp
   combined_test.cpp
   common_test.cpp
+  condition_variable_test.cpp
   flags_test.cpp
   list_test.cpp
   map_test.cpp
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index 7958e9dabf60db2..3dbd93cacefd684 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -12,7 +12,9 @@
 #include "allocator_config.h"
 #include "chunk.h"
 #include "combined.h"
+#include "condition_variable.h"
 #include "mem_map.h"
+#include "size_class_map.h"
 
 #include <algorithm>
 #include <condition_variable>
@@ -164,13 +166,60 @@ template <class TypeParam> struct ScudoCombinedTest : public Test {
 
 template <typename T> using ScudoCombinedDeathTest = ScudoCombinedTest<T>;
 
+namespace scudo {
+struct TestConditionVariableConfig {
+  static const bool MaySupportMemoryTagging = true;
+  template <class A>
+  using TSDRegistryT =
+      scudo::TSDRegistrySharedT<A, 8U, 4U>; // Shared, max 8 TSDs.
+
+  struct Primary {
+    using SizeClassMap = scudo::AndroidSizeClassMap;
+#if SCUDO_CAN_USE_PRIMARY64
+    static const scudo::uptr RegionSizeLog = 28U;
+    typedef scudo::u32 CompactPtrT;
+    static const scudo::uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
+    static const scudo::uptr GroupSizeLog = 20U;
+    static const bool EnableRandomOffset = true;
+    static const scudo::uptr MapSizeIncrement = 1UL << 18;
+#else
+    static const scudo::uptr RegionSizeLog = 18U;
+    static const scudo::uptr GroupSizeLog = 18U;
+    typedef scudo::uptr CompactPtrT;
+#endif
+    static const scudo::s32 MinReleaseToOsIntervalMs = 1000;
+    static const scudo::s32 MaxReleaseToOsIntervalMs = 1000;
+    static const bool UseConditionVariable = true;
+#if SCUDO_LINUX
+    using ConditionVariableT = scudo::ConditionVariableLinux;
+#else
+    using ConditionVariableT = scudo::ConditionVariableDummy;
+#endif
+  };
+#if SCUDO_CAN_USE_PRIMARY64
+  template <typename Config>
+  using PrimaryT = scudo::SizeClassAllocator64<Config>;
+#else
+  template <typename Config>
+  using PrimaryT = scudo::SizeClassAllocator32<Config>;
+#endif
+
+  struct Secondary {
+    template <typename Config>
+    using CacheT = scudo::MapAllocatorNoCache<Config>;
+  };
+  template <typename Config> using SecondaryT = scudo::MapAllocator<Config>;
+};
+} // namespace scudo
+
 #if SCUDO_FUCHSIA
 #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME)                              \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, FuchsiaConfig)
 #else
 #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME)                              \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, DefaultConfig)                          \
-  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidConfig)
+  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidConfig)                          \
+  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConditionVariableConfig)
 #endif
 
 #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE)                             \
diff --git a/compiler-rt/lib/scudo/standalone/tests/condition_variable_test.cpp b/compiler-rt/lib/scudo/standalone/tests/condition_variable_test.cpp
new file mode 100644
index 000000000000000..caba1f64ab0593e
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/tests/condition_variable_test.cpp
@@ -0,0 +1,59 @@
+//===-- condition_variable_test.cpp -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "tests/scudo_unit_test.h"
+
+#include "common.h"
+#include "condition_variable.h"
+#include "mutex.h"
+
+#include <thread>
+
+template <typename ConditionVariableT> void simpleWaitAndNotifyAll() {
+  constexpr scudo::u32 NumThreads = 2;
+  constexpr scudo::u32 CounterMax = 1024;
+  std::thread Threads[NumThreads];
+
+  scudo::HybridMutex M;
+  ConditionVariableT CV;
+  CV.bindTestOnly(M);
+  scudo::u32 Counter = 0;
+
+  for (scudo::u32 I = 0; I < NumThreads; ++I) {
+    Threads[I] = std::thread(
+        [&](scudo::u32 Id) {
+          do {
+            scudo::ScopedLock L(M);
+            if (Counter % NumThreads != Id && Counter < CounterMax)
+              CV.wait(M);
+            if (Counter >= CounterMax) {
+              break;
+            } else {
+              ++Counter;
+              CV.notifyAll(M);
+            }
+          } while (true);
+        },
+        I);
+  }
+
+  for (std::thread &T : Threads)
+    T.join();
+
+  EXPECT_EQ(Counter, CounterMax);
+}
+
+TEST(ScudoConditionVariableTest, DummyCVWaitAndNotifyAll) {
+  simpleWaitAndNotifyAll<scudo::ConditionVariableDummy>();
+}
+
+#ifdef SCUDO_LINUX
+TEST(ScudoConditionVariableTest, LinuxCVWaitAndNotifyAll) {
+  simpleWaitAndNotifyAll<scudo::ConditionVariableLinux>();
+}
+#endif
diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
index 4db05b76241134c..18171511758a14e 100644
--- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
@@ -9,6 +9,7 @@
 #include "tests/scudo_unit_test.h"
 
 #include "allocator_config.h"
+#include "condition_variable.h"
 #include "primary32.h"
 #include "primary64.h"
 #include "size_class_map.h"
@@ -105,6 +106,34 @@ template <typename SizeClassMapT> struct TestConfig4 {
   };
 };
 
+// This is the only test config that enables the condition variable.
+template <typename SizeClassMapT> struct TestConfig5 {
+  static const bool MaySupportMemoryTagging = true;
+
+  struct Primary {
+    using SizeClassMap = SizeClassMapT;
+#if defined(__mips__)
+    // Unable to allocate greater size on QEMU-user.
+    static const scudo::uptr RegionSizeLog = 23U;
+#else
+    static const scudo::uptr RegionSizeLog = 24U;
+#endif
+    static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN;
+    static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX;
+    static const scudo::uptr CompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
+    static const scudo::uptr GroupSizeLog = 18U;
+    typedef scudo::u32 CompactPtrT;
+    static const bool EnableRandomOffset = true;
+    static const scudo::uptr MapSizeIncrement = 1UL << 18;
+    static const bool UseConditionVariable = true;
+#if SCUDO_LINUX
+    using ConditionVariableT = scudo::ConditionVariableLinux;
+#else
+    using ConditionVariableT = scudo::ConditionVariableDummy;
+#endif
+  };
+};
+
 template <template <typename> class BaseConfig, typename SizeClassMapT>
 struct Config : public BaseConfig<SizeClassMapT> {};
 
@@ -143,7 +172,8 @@ struct ScudoPrimaryTest : public Test {};
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig1)                            \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig2)                            \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3)                            \
-  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig4)
+  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig4)                            \
+  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig5)
 #endif
 
 #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE)                             \

>From 6ddc03d97c0de96691ed27f05c9b30869051ce06 Mon Sep 17 00:00:00 2001
From: Finn Plummer <50529406+inbelic at users.noreply.github.com>
Date: Fri, 20 Oct 2023 00:59:04 +0200
Subject: [PATCH 67/76] [mlir][spirv][webgpu] Add lowering of IAddCarry to IAdd
 (#68495)

WebGPU does not currently support extended arithmetic, this is an issue
when we want to lower from SPIR-V. This commit adds a pattern to
transform and emulate spirv.IAddCarry with spirv.IAdd operations

Fixes #65154
---
 .../Transforms/SPIRVWebGPUTransforms.cpp      | 44 +++++++++++-
 .../SPIRV/Transforms/webgpu-prepare.mlir      | 37 ++++++++++
 .../iaddcarry_extended.mlir                   | 68 +++++++++++++++++++
 3 files changed, 147 insertions(+), 2 deletions(-)
 create mode 100644 mlir/test/mlir-vulkan-runner/iaddcarry_extended.mlir

diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVWebGPUTransforms.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVWebGPUTransforms.cpp
index 44fea86785593e9..21de1c9e867c04e 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVWebGPUTransforms.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVWebGPUTransforms.cpp
@@ -167,6 +167,42 @@ using ExpandSMulExtendedPattern =
 using ExpandUMulExtendedPattern =
     ExpandMulExtendedPattern<UMulExtendedOp, false>;
 
+struct ExpandAddCarryPattern final : OpRewritePattern<IAddCarryOp> {
+  using OpRewritePattern<IAddCarryOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(IAddCarryOp op,
+                                PatternRewriter &rewriter) const override {
+    Location loc = op->getLoc();
+    Value lhs = op.getOperand1();
+    Value rhs = op.getOperand2();
+
+    // Currently, WGSL only supports 32-bit integer types. Any other integer
+    // types should already have been promoted/demoted to i32.
+    Type argTy = lhs.getType();
+    auto elemTy = cast<IntegerType>(getElementTypeOrSelf(argTy));
+    if (elemTy.getIntOrFloatBitWidth() != 32)
+      return rewriter.notifyMatchFailure(
+          loc,
+          llvm::formatv("Unexpected integer type for WebGPU: '{0}'", elemTy));
+
+    Value one =
+        rewriter.create<ConstantOp>(loc, argTy, getScalarOrSplatAttr(argTy, 1));
+    Value zero =
+        rewriter.create<ConstantOp>(loc, argTy, getScalarOrSplatAttr(argTy, 0));
+
+    // Calculate the carry by checking if the addition resulted in an overflow.
+    Value out = rewriter.create<IAddOp>(loc, lhs, rhs);
+    Value cmp = rewriter.create<ULessThanOp>(loc, out, lhs);
+    Value carry = rewriter.create<SelectOp>(loc, cmp, one, zero);
+
+    Value add = rewriter.create<CompositeConstructOp>(
+        loc, op->getResultTypes().front(), llvm::ArrayRef({out, carry}));
+
+    rewriter.replaceOp(op, add);
+    return success();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // Passes
 //===----------------------------------------------------------------------===//
@@ -191,8 +227,12 @@ void populateSPIRVExpandExtendedMultiplicationPatterns(
     RewritePatternSet &patterns) {
   // WGSL currently does not support extended multiplication ops, see:
   // https://github.com/gpuweb/gpuweb/issues/1565.
-  patterns.add<ExpandSMulExtendedPattern, ExpandUMulExtendedPattern>(
-      patterns.getContext());
+  patterns.add<
+      // clang-format off
+    ExpandSMulExtendedPattern,
+    ExpandUMulExtendedPattern,
+    ExpandAddCarryPattern
+  >(patterns.getContext());
 }
 } // namespace spirv
 } // namespace mlir
diff --git a/mlir/test/Dialect/SPIRV/Transforms/webgpu-prepare.mlir b/mlir/test/Dialect/SPIRV/Transforms/webgpu-prepare.mlir
index 91eeeda6ec54c64..1ec4e5e4f9664b8 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/webgpu-prepare.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/webgpu-prepare.mlir
@@ -145,4 +145,41 @@ spirv.func @smul_extended_i16(%arg : i16) -> !spirv.struct<(i16, i16)> "None" {
   spirv.ReturnValue %0 : !spirv.struct<(i16, i16)>
 }
 
+// CHECK-LABEL: func @iaddcarry_i32
+// CHECK-SAME:       ([[A:%.+]]: i32, [[B:%.+]]: i32)
+// CHECK-NEXT:       [[ONE:%.+]]    = spirv.Constant 1 : i32
+// CHECK-NEXT:       [[ZERO:%.+]]   = spirv.Constant 0 : i32
+// CHECK-NEXT:       [[OUT:%.+]]    = spirv.IAdd [[A]], [[B]]
+// CHECK-NEXT:       [[CMP:%.+]]    = spirv.ULessThan [[OUT]], [[A]]
+// CHECK-NEXT:       [[CARRY:%.+]]  = spirv.Select [[CMP]], [[ONE]], [[ZERO]]
+// CHECK-NEXT:       [[RES:%.+]]     = spirv.CompositeConstruct [[OUT]], [[CARRY]] : (i32, i32) -> !spirv.struct<(i32, i32)>
+// CHECK-NEXT:       spirv.ReturnValue [[RES]] : !spirv.struct<(i32, i32)>
+spirv.func @iaddcarry_i32(%a : i32, %b : i32) -> !spirv.struct<(i32, i32)> "None" {
+  %0 = spirv.IAddCarry %a, %b : !spirv.struct<(i32, i32)>
+  spirv.ReturnValue %0 : !spirv.struct<(i32, i32)>
+}
+
+// CHECK-LABEL: func @iaddcarry_vector_i32
+// CHECK-SAME:       ([[A:%.+]]: vector<3xi32>, [[B:%.+]]: vector<3xi32>)
+// CHECK-NEXT:       [[ONE:%.+]]    = spirv.Constant dense<1> : vector<3xi32>
+// CHECK-NEXT:       [[ZERO:%.+]]   = spirv.Constant dense<0> : vector<3xi32>
+// CHECK-NEXT:       [[OUT:%.+]]    = spirv.IAdd [[A]], [[B]]
+// CHECK-NEXT:       [[CMP:%.+]]    = spirv.ULessThan [[OUT]], [[A]]
+// CHECK-NEXT:       [[CARRY:%.+]]  = spirv.Select [[CMP]], [[ONE]], [[ZERO]]
+// CHECK-NEXT:       [[RES:%.+]]    = spirv.CompositeConstruct [[OUT]], [[CARRY]] : (vector<3xi32>, vector<3xi32>) -> !spirv.struct<(vector<3xi32>, vector<3xi32>)>
+// CHECK-NEXT:       spirv.ReturnValue [[RES]] : !spirv.struct<(vector<3xi32>, vector<3xi32>)>
+spirv.func @iaddcarry_vector_i32(%a : vector<3xi32>, %b : vector<3xi32>)
+  -> !spirv.struct<(vector<3xi32>, vector<3xi32>)> "None" {
+  %0 = spirv.IAddCarry %a, %b : !spirv.struct<(vector<3xi32>, vector<3xi32>)>
+  spirv.ReturnValue %0 : !spirv.struct<(vector<3xi32>, vector<3xi32>)>
+}
+
+// CHECK-LABEL: func @iaddcarry_i16
+// CHECK-NEXT:       spirv.IAddCarry
+// CHECK-NEXT:       spirv.ReturnValue
+spirv.func @iaddcarry_i16(%a : i16, %b : i16) -> !spirv.struct<(i16, i16)> "None" {
+  %0 = spirv.IAddCarry %a, %b : !spirv.struct<(i16, i16)>
+  spirv.ReturnValue %0 : !spirv.struct<(i16, i16)>
+}
+
 } // end module
diff --git a/mlir/test/mlir-vulkan-runner/iaddcarry_extended.mlir b/mlir/test/mlir-vulkan-runner/iaddcarry_extended.mlir
new file mode 100644
index 000000000000000..9b1f1964b3f953e
--- /dev/null
+++ b/mlir/test/mlir-vulkan-runner/iaddcarry_extended.mlir
@@ -0,0 +1,68 @@
+// Make sure that addition with carry produces expected results
+// with and without expansion to primitive add/cmp ops for WebGPU.
+
+// RUN: mlir-vulkan-runner %s \
+// RUN:  --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:  --entry-point-result=void | FileCheck %s
+
+// RUN: mlir-vulkan-runner %s --vulkan-runner-spirv-webgpu-prepare \
+// RUN:  --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \
+// RUN:  --entry-point-result=void | FileCheck %s
+
+// CHECK: [0, 42, 0, 42]
+// CHECK: [1, 0, 1, 1]
+module attributes {
+  gpu.container_module,
+  spirv.target_env = #spirv.target_env<
+    #spirv.vce<v1.4, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>
+} {
+  gpu.module @kernels {
+    gpu.func @kernel_add(%arg0 : memref<4xi32>, %arg1 : memref<4xi32>, %arg2 : memref<4xi32>, %arg3 : memref<4xi32>)
+      kernel attributes { spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [1, 1, 1]>} {
+      %0 = gpu.block_id x
+      %lhs = memref.load %arg0[%0] : memref<4xi32>
+      %rhs = memref.load %arg1[%0] : memref<4xi32>
+      %sum, %carry = arith.addui_extended %lhs, %rhs : i32, i1
+
+      %carry_i32 = arith.extui %carry : i1 to i32
+
+      memref.store %sum, %arg2[%0] : memref<4xi32> memref.store %carry_i32, %arg3[%0] : memref<4xi32>
+      gpu.return
+    }
+  }
+
+  func.func @main() {
+    %buf0 = memref.alloc() : memref<4xi32>
+    %buf1 = memref.alloc() : memref<4xi32>
+    %buf2 = memref.alloc() : memref<4xi32>
+    %buf3 = memref.alloc() : memref<4xi32>
+    %i32_0 = arith.constant 0 : i32
+
+    // Initialize output buffers.
+    %buf4 = memref.cast %buf2 : memref<4xi32> to memref<?xi32>
+    %buf5 = memref.cast %buf3 : memref<4xi32> to memref<?xi32>
+    call @fillResource1DInt(%buf4, %i32_0) : (memref<?xi32>, i32) -> ()
+    call @fillResource1DInt(%buf5, %i32_0) : (memref<?xi32>, i32) -> ()
+
+    %idx_0 = arith.constant 0 : index
+    %idx_1 = arith.constant 1 : index
+    %idx_4 = arith.constant 4 : index
+
+    // Initialize input buffers.
+    %lhs_vals = arith.constant dense<[-1, 24, 4294967295, 43]> : vector<4xi32>
+    %rhs_vals = arith.constant dense<[1, 18, 1, 4294967295]> : vector<4xi32>
+    vector.store %lhs_vals, %buf0[%idx_0] : memref<4xi32>, vector<4xi32>
+    vector.store %rhs_vals, %buf1[%idx_0] : memref<4xi32>, vector<4xi32>
+
+    gpu.launch_func @kernels::@kernel_add
+        blocks in (%idx_4, %idx_1, %idx_1) threads in (%idx_1, %idx_1, %idx_1)
+        args(%buf0 : memref<4xi32>, %buf1 : memref<4xi32>, %buf2 : memref<4xi32>, %buf3 : memref<4xi32>)
+    %buf_sum = memref.cast %buf4 : memref<?xi32> to memref<*xi32>
+    %buf_carry = memref.cast %buf5 : memref<?xi32> to memref<*xi32>
+    call @printMemrefI32(%buf_sum) : (memref<*xi32>) -> ()
+    call @printMemrefI32(%buf_carry) : (memref<*xi32>) -> ()
+    return
+  }
+  func.func private @fillResource1DInt(%0 : memref<?xi32>, %1 : i32)
+  func.func private @printMemrefI32(%ptr : memref<*xi32>)
+}

>From dda3ed9091d1094abced01aa39b605da0a4f7f54 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis at chromium.org>
Date: Thu, 19 Oct 2023 19:04:48 -0400
Subject: [PATCH 68/76] [gn] port ab17ecd10767

---
 .../gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn    | 4 ++++
 .../secondary/compiler-rt/lib/scudo/standalone/tests/BUILD.gn | 1 +
 2 files changed, 5 insertions(+)

diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn
index c46e59bc247a2ba..02a9595e2654beb 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn
@@ -20,6 +20,10 @@ source_set("sources") {
     "combined.h",
     "common.cpp",
     "common.h",
+    "condition_variable.h",
+    "condition_variable_base.h",
+    "condition_variable_linux.cpp",
+    "condition_variable_linux.h",
     "crc32_hw.cpp",
     "flags.cpp",
     "flags.h",
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/tests/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/tests/BUILD.gn
index b8fb77472917714..f69f1c413b47da0 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/tests/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/tests/BUILD.gn
@@ -14,6 +14,7 @@ unittest("ScudoUnitTest") {
     "chunk_test.cpp",
     "combined_test.cpp",
     "common_test.cpp",
+    "condition_variable_test.cpp",
     "flags_test.cpp",
     "list_test.cpp",
     "map_test.cpp",

>From dd473f1dd19c9c80ce1a9002d1a6de098b8639fe Mon Sep 17 00:00:00 2001
From: Maksim Levental <maksim.levental at gmail.com>
Date: Thu, 19 Oct 2023 18:07:06 -0500
Subject: [PATCH 69/76] [mlir][python] simplify extensions (#69642)

https://github.com/llvm/llvm-project/pull/68853 enabled a lot of nice
cleanup. Note, I made sure each of the touched extensions had tests.
---
 mlir/python/mlir/dialects/affine.py        | 45 --------------
 mlir/python/mlir/dialects/bufferization.py | 36 -----------
 mlir/python/mlir/dialects/func.py          |  3 -
 mlir/python/mlir/dialects/memref.py        | 38 ------------
 mlir/python/mlir/dialects/pdl.py           | 69 ----------------------
 mlir/python/mlir/dialects/scf.py           | 33 +++--------
 mlir/test/python/dialects/affine.py        |  2 +-
 mlir/test/python/dialects/func.py          |  4 ++
 8 files changed, 13 insertions(+), 217 deletions(-)

diff --git a/mlir/python/mlir/dialects/affine.py b/mlir/python/mlir/dialects/affine.py
index 1eaccfa73a85cbf..80d3873e19a05cb 100644
--- a/mlir/python/mlir/dialects/affine.py
+++ b/mlir/python/mlir/dialects/affine.py
@@ -3,48 +3,3 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._affine_ops_gen import *
-from ._affine_ops_gen import _Dialect
-
-try:
-    from ..ir import *
-    from ._ods_common import (
-        get_op_result_or_value as _get_op_result_or_value,
-        get_op_results_or_values as _get_op_results_or_values,
-        _cext as _ods_cext,
-    )
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, Sequence, Union
-
-
- at _ods_cext.register_operation(_Dialect, replace=True)
-class AffineStoreOp(AffineStoreOp):
-    """Specialization for the Affine store operation."""
-
-    def __init__(
-        self,
-        value: Union[Operation, OpView, Value],
-        memref: Union[Operation, OpView, Value],
-        map: AffineMap = None,
-        *,
-        map_operands=None,
-        loc=None,
-        ip=None,
-    ):
-        """Creates an affine store operation.
-
-        - `value`: the value to store into the memref.
-        - `memref`: the buffer to store into.
-        - `map`: the affine map that maps the map_operands to the index of the
-          memref.
-        - `map_operands`: the list of arguments to substitute the dimensions,
-          then symbols in the affine map, in increasing order.
-        """
-        map = map if map is not None else []
-        map_operands = map_operands if map_operands is not None else []
-        indicies = [_get_op_result_or_value(op) for op in map_operands]
-        _ods_successors = None
-        super().__init__(
-            value, memref, indicies, AffineMapAttr.get(map), loc=loc, ip=ip
-        )
diff --git a/mlir/python/mlir/dialects/bufferization.py b/mlir/python/mlir/dialects/bufferization.py
index 0ce5448ace4b14c..759b6aa24a9ff73 100644
--- a/mlir/python/mlir/dialects/bufferization.py
+++ b/mlir/python/mlir/dialects/bufferization.py
@@ -3,40 +3,4 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._bufferization_ops_gen import *
-from ._bufferization_ops_gen import _Dialect
 from ._bufferization_enum_gen import *
-
-try:
-    from typing import Sequence, Union
-    from ..ir import *
-    from ._ods_common import get_default_loc_context, _cext as _ods_cext
-
-    from typing import Any, List, Union
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-
- at _ods_cext.register_operation(_Dialect, replace=True)
-class AllocTensorOp(AllocTensorOp):
-    """Extends the bufferization.alloc_tensor op."""
-
-    def __init__(
-        self,
-        tensor_type: Type,
-        dynamic_sizes: Sequence[Value],
-        copy: Value,
-        size_hint: Value,
-        escape: BoolAttr,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        """Constructs an `alloc_tensor` with static and/or dynamic sizes."""
-        super().__init__(
-            tensor_type,
-            dynamic_sizes,
-            copy=copy,
-            size_hint=size_hint,
-            loc=loc,
-            ip=ip,
-        )
diff --git a/mlir/python/mlir/dialects/func.py b/mlir/python/mlir/dialects/func.py
index 9c6c4c9092c7a88..6599f67b7078777 100644
--- a/mlir/python/mlir/dialects/func.py
+++ b/mlir/python/mlir/dialects/func.py
@@ -26,9 +26,6 @@
 class ConstantOp(ConstantOp):
     """Specialization for the constant op class."""
 
-    def __init__(self, result: Type, value: Attribute, *, loc=None, ip=None):
-        super().__init__(result, value, loc=loc, ip=ip)
-
     @property
     def type(self):
         return self.results[0].type
diff --git a/mlir/python/mlir/dialects/memref.py b/mlir/python/mlir/dialects/memref.py
index 111ad2178703d28..3afb6a70cb9e0db 100644
--- a/mlir/python/mlir/dialects/memref.py
+++ b/mlir/python/mlir/dialects/memref.py
@@ -3,41 +3,3 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from ._memref_ops_gen import *
-from ._memref_ops_gen import _Dialect
-
-try:
-    from ..ir import *
-    from ._ods_common import (
-        get_op_result_or_value as _get_op_result_or_value,
-        get_op_results_or_values as _get_op_results_or_values,
-        _cext as _ods_cext,
-    )
-except ImportError as e:
-    raise RuntimeError("Error loading imports from extension module") from e
-
-from typing import Optional, Sequence, Union
-
-
- at _ods_cext.register_operation(_Dialect, replace=True)
-class LoadOp(LoadOp):
-    """Specialization for the MemRef load operation."""
-
-    def __init__(
-        self,
-        memref: Union[Operation, OpView, Value],
-        indices: Optional[Union[Operation, OpView, Sequence[Value]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        """Creates a memref load operation.
-
-        Args:
-          memref: the buffer to load from.
-          indices: the list of subscripts, may be empty for zero-dimensional
-            buffers.
-          loc: user-visible location of the operation.
-          ip: insertion point.
-        """
-        indices_resolved = [] if indices is None else _get_op_results_or_values(indices)
-        super().__init__(memref, indices_resolved, loc=loc, ip=ip)
diff --git a/mlir/python/mlir/dialects/pdl.py b/mlir/python/mlir/dialects/pdl.py
index a8d9c56f4233d9e..90d7d706238e649 100644
--- a/mlir/python/mlir/dialects/pdl.py
+++ b/mlir/python/mlir/dialects/pdl.py
@@ -21,43 +21,6 @@
 )
 
 
- at _ods_cext.register_operation(_Dialect, replace=True)
-class ApplyNativeConstraintOp(ApplyNativeConstraintOp):
-    """Specialization for PDL apply native constraint op class."""
-
-    def __init__(
-        self,
-        name: Union[str, StringAttr],
-        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if args is None:
-            args = []
-        args = _get_values(args)
-        super().__init__(name, args, loc=loc, ip=ip)
-
-
- at _ods_cext.register_operation(_Dialect, replace=True)
-class ApplyNativeRewriteOp(ApplyNativeRewriteOp):
-    """Specialization for PDL apply native rewrite op class."""
-
-    def __init__(
-        self,
-        results: Sequence[Type],
-        name: Union[str, StringAttr],
-        args: Optional[Sequence[Union[OpView, Operation, Value]]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        if args is None:
-            args = []
-        args = _get_values(args)
-        super().__init__(results, name, args, loc=loc, ip=ip)
-
-
 @_ods_cext.register_operation(_Dialect, replace=True)
 class AttributeOp(AttributeOp):
     """Specialization for PDL attribute op class."""
@@ -75,21 +38,6 @@ def __init__(
         super().__init__(result, valueType=valueType, value=value, loc=loc, ip=ip)
 
 
- at _ods_cext.register_operation(_Dialect, replace=True)
-class EraseOp(EraseOp):
-    """Specialization for PDL erase op class."""
-
-    def __init__(
-        self,
-        operation: Optional[Union[OpView, Operation, Value]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        operation = _get_value(operation)
-        super().__init__(operation, loc=loc, ip=ip)
-
-
 @_ods_cext.register_operation(_Dialect, replace=True)
 class OperandOp(OperandOp):
     """Specialization for PDL operand op class."""
@@ -216,23 +164,6 @@ def __init__(
         super().__init__(result, parent, index, loc=loc, ip=ip)
 
 
- at _ods_cext.register_operation(_Dialect, replace=True)
-class ResultsOp(ResultsOp):
-    """Specialization for PDL results op class."""
-
-    def __init__(
-        self,
-        result: Type,
-        parent: Union[OpView, Operation, Value],
-        index: Optional[Union[IntegerAttr, int]] = None,
-        *,
-        loc=None,
-        ip=None,
-    ):
-        parent = _get_value(parent)
-        super().__init__(result, parent, index=index, loc=loc, ip=ip)
-
-
 @_ods_cext.register_operation(_Dialect, replace=True)
 class RewriteOp(RewriteOp):
     """Specialization for PDL rewrite op class."""
diff --git a/mlir/python/mlir/dialects/scf.py b/mlir/python/mlir/dialects/scf.py
index 43ad9f4e2d65f51..71c80cab76dfb86 100644
--- a/mlir/python/mlir/dialects/scf.py
+++ b/mlir/python/mlir/dialects/scf.py
@@ -20,11 +20,8 @@
 from typing import Optional, Sequence, Union
 
 
-_ForOp = ForOp
-
-
 @_ods_cext.register_operation(_Dialect, replace=True)
-class ForOp(_ForOp):
+class ForOp(ForOp):
     """Specialization for the SCF for op class."""
 
     def __init__(
@@ -50,17 +47,8 @@ def __init__(
         iter_args = _get_op_results_or_values(iter_args)
 
         results = [arg.type for arg in iter_args]
-        super(_ForOp, self).__init__(
-            self.build_generic(
-                regions=1,
-                results=results,
-                operands=[
-                    _get_op_result_or_value(o) for o in [lower_bound, upper_bound, step]
-                ]
-                + list(iter_args),
-                loc=loc,
-                ip=ip,
-            )
+        super().__init__(
+            results, lower_bound, upper_bound, step, iter_args, loc=loc, ip=ip
         )
         self.regions[0].blocks.append(self.operands[0].type, *results)
 
@@ -83,28 +71,23 @@ def inner_iter_args(self):
         return self.body.arguments[1:]
 
 
-_IfOp = IfOp
-
-
 @_ods_cext.register_operation(_Dialect, replace=True)
-class IfOp(_IfOp):
+class IfOp(IfOp):
     """Specialization for the SCF if op class."""
 
-    def __init__(self, cond, results_=[], *, hasElse=False, loc=None, ip=None):
+    def __init__(self, cond, results_=None, *, hasElse=False, loc=None, ip=None):
         """Creates an SCF `if` operation.
 
         - `cond` is a MLIR value of 'i1' type to determine which regions of code will be executed.
         - `hasElse` determines whether the if operation has the else branch.
         """
+        if results_ is None:
+            results_ = []
         operands = []
         operands.append(cond)
         results = []
         results.extend(results_)
-        super(_IfOp, self).__init__(
-            self.build_generic(
-                regions=2, results=results, operands=operands, loc=loc, ip=ip
-            )
-        )
+        super().__init__(results, cond)
         self.regions[0].blocks.append(*[])
         if hasElse:
             self.regions[1].blocks.append(*[])
diff --git a/mlir/test/python/dialects/affine.py b/mlir/test/python/dialects/affine.py
index d2e664d4653420f..c5ec85457493b42 100644
--- a/mlir/test/python/dialects/affine.py
+++ b/mlir/test/python/dialects/affine.py
@@ -37,7 +37,7 @@ def affine_store_test(arg0):
                 a1 = arith.ConstantOp(f32, 2.1)
 
                 # CHECK: affine.store %[[A1]], %alloc[symbol(%[[ARG0]]) * 3, %[[ARG0]] + symbol(%[[ARG0]]) + 1] : memref<12x12xf32>
-                affine.AffineStoreOp(a1, mem, map, map_operands=[arg0, arg0])
+                affine.AffineStoreOp(a1, mem, indices=[arg0, arg0], map=map)
 
                 return mem
 
diff --git a/mlir/test/python/dialects/func.py b/mlir/test/python/dialects/func.py
index 161a12d78776a0e..a2014c64d2fa53b 100644
--- a/mlir/test/python/dialects/func.py
+++ b/mlir/test/python/dialects/func.py
@@ -84,6 +84,9 @@ def testFunctionCalls():
     qux = func.FuncOp("qux", ([], [F32Type.get()]))
     qux.sym_visibility = StringAttr.get("private")
 
+    con = func.ConstantOp(qux.type, qux.sym_name.value)
+    assert con.type == qux.type
+
     with InsertionPoint(func.FuncOp("caller", ([], [])).add_entry_block()):
         func.CallOp(foo, [])
         func.CallOp([IndexType.get()], "bar", [])
@@ -94,6 +97,7 @@ def testFunctionCalls():
 # CHECK: func private @foo()
 # CHECK: func private @bar() -> index
 # CHECK: func private @qux() -> f32
+# CHECK: %f = func.constant @qux : () -> f32
 # CHECK: func @caller() {
 # CHECK:   call @foo() : () -> ()
 # CHECK:   %0 = call @bar() : () -> index

>From 5070c1e3b07c5b384fe0a064aa99f25b8af4b7e9 Mon Sep 17 00:00:00 2001
From: Ryosuke Niwa <rniwa at webkit.org>
Date: Thu, 19 Oct 2023 16:06:25 -0700
Subject: [PATCH 70/76] [analyzer] WebKit checkers: recognize dynamicDowncast
 as a safe function.

It can take raw pointers without triggering a warning.

Also retire the support for makeRef and makeWeakPtr as they have been removed
from WebKit.
---
 .../Checkers/WebKit/PtrTypesSemantics.cpp     |  3 +-
 .../WebKit/UncountedCallArgsChecker.cpp       |  2 +-
 .../WebKit/call-args-dynamic-downcast.cpp     | 35 +++++++++++++++++++
 .../Analysis/Checkers/WebKit/call-args.cpp    | 16 ---------
 4 files changed, 37 insertions(+), 19 deletions(-)
 create mode 100644 clang/test/Analysis/Checkers/WebKit/call-args-dynamic-downcast.cpp

diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
index 9b1d7ae3e6a320c..c1f180f31338cb3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
@@ -186,8 +186,7 @@ bool isPtrConversion(const FunctionDecl *F) {
   // FIXME: check # of params == 1
   const auto FunctionName = safeGetName(F);
   if (FunctionName == "getPtr" || FunctionName == "WeakPtr" ||
-      FunctionName == "makeWeakPtr"
-
+      FunctionName == "dynamicDowncast"
       || FunctionName == "downcast" || FunctionName == "bitwise_cast")
     return true;
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
index 4ae8c442fa70755..407b6ba7a76428c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
@@ -149,7 +149,7 @@ class UncountedCallArgsChecker
 
     auto name = safeGetName(Callee);
     if (name == "adoptRef" || name == "getPtr" || name == "WeakPtr" ||
-        name == "makeWeakPtr" || name == "downcast" || name == "bitwise_cast" ||
+        name == "dynamicDowncast" || name == "downcast" || name == "bitwise_cast" ||
         name == "is" || name == "equal" || name == "hash" ||
         name == "isType"
         // FIXME: Most/all of these should be implemented via attributes.
diff --git a/clang/test/Analysis/Checkers/WebKit/call-args-dynamic-downcast.cpp b/clang/test/Analysis/Checkers/WebKit/call-args-dynamic-downcast.cpp
new file mode 100644
index 000000000000000..28156623d9a0fd7
--- /dev/null
+++ b/clang/test/Analysis/Checkers/WebKit/call-args-dynamic-downcast.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s
+// expected-no-diagnostics
+
+class Base {
+public:
+    inline void ref();
+    inline void deref();
+};
+
+class Derived : public Base {
+public:
+  virtual ~Derived();
+
+  void ref() const;
+  void deref() const;
+};
+
+class SubDerived final : public Derived {
+};
+
+class OtherObject {
+public:
+    Derived* obj();
+};
+
+template<typename Target, typename Source>
+inline Target* dynamicDowncast(Source* source)
+{
+    return static_cast<Target*>(source);
+}
+
+void foo(OtherObject* other)
+{
+    dynamicDowncast<SubDerived>(other->obj());
+}
diff --git a/clang/test/Analysis/Checkers/WebKit/call-args.cpp b/clang/test/Analysis/Checkers/WebKit/call-args.cpp
index a56c4222adb514a..716219836e6b445 100644
--- a/clang/test/Analysis/Checkers/WebKit/call-args.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/call-args.cpp
@@ -262,22 +262,6 @@ namespace param_forwarding_method {
   }
 }
 
-namespace make_ref {
-  void makeRef(RefCountable*) {}
-  void makeRefPtr(RefCountable*) {}
-  void makeWeakPtr(RefCountable*) {}
-  void makeWeakPtr(RefCountable&) {}
-
-  void foo() {
-    makeRef(provide());
-    makeRefPtr(provide());
-    RefPtr<RefCountable> a(provide());
-    Ref<RefCountable> b(provide());
-    makeWeakPtr(provide());
-    makeWeakPtr(*provide());
-  }
-}
-
 namespace downcast {
   void consume_ref_countable(RefCountable*) {}
   RefCountable* downcast(RefCountable*) { return nullptr; }

>From 65f946cba4085d3d3054a0db3ed0e4006b6cf783 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Thu, 19 Oct 2023 16:29:51 -0700
Subject: [PATCH 71/76] [RISCV] Fix some GlobalISel tests using -march instead
 of -mtriple.

This caused llc to assume the wrong target triple and broke some internal
AS sanitizer bots.
---
 .../CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv32.mir   | 2 +-
 .../CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv64.mir   | 2 +-
 .../CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv32.mir | 2 +-
 .../CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv64.mir | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv32.mir
index c45f10752f3755f..cf0baeb9527c955 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv32.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv32 -run-pass=instruction-select -simplify-mir \
+# RUN: llc -mtriple=riscv32 -run-pass=instruction-select -simplify-mir \
 # RUN: -verify-machineinstrs %s -o - | FileCheck %s
 ---
 name:            cmp_ult_i32
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv64.mir
index 1a4232db9754659..7b6506e738ee3c9 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/icmp-rv64.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv64 -run-pass=instruction-select -simplify-mir \
+# RUN: llc -mtriple=riscv64 -run-pass=instruction-select -simplify-mir \
 # RUN: -verify-machineinstrs %s -o - | FileCheck %s
 ---
 name:            cmp_ult_i64
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv32.mir
index bc0395685b40e9e..a96f55fa66e1edd 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv32.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv32 -run-pass=instruction-select -simplify-mir \
+# RUN: llc -mtriple=riscv32 -run-pass=instruction-select -simplify-mir \
 # RUN: -verify-machineinstrs %s -o - | FileCheck %s
 ---
 name:            add_i32
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv64.mir
index d024a7c659878de..6346d913faf06b5 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/ptradd-rv64.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv64 -run-pass=instruction-select -simplify-mir \
+# RUN: llc -mtriple=riscv64 -run-pass=instruction-select -simplify-mir \
 # RUN: -verify-machineinstrs %s -o - | FileCheck %s
 ---
 name:            add_i64

>From ec10c36b07766f04576afac014d7f7aaab4395ae Mon Sep 17 00:00:00 2001
From: lntue <35648136+lntue at users.noreply.github.com>
Date: Thu, 19 Oct 2023 19:49:59 -0400
Subject: [PATCH 72/76] [libc][NFC] Forcing data type in gettimeofday_test when
 comparing the diff. (#69652)

---
 libc/test/src/time/gettimeofday_test.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libc/test/src/time/gettimeofday_test.cpp b/libc/test/src/time/gettimeofday_test.cpp
index 44250787c5381ef..2deb7726264ee39 100644
--- a/libc/test/src/time/gettimeofday_test.cpp
+++ b/libc/test/src/time/gettimeofday_test.cpp
@@ -18,22 +18,22 @@ namespace cpp = LIBC_NAMESPACE::cpp;
 TEST(LlvmLibcGettimeofday, SmokeTest) {
   using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;
   void *tz = nullptr;
-  struct timeval tv;
+  timeval tv;
 
-  int sleep_times[2] = {200, 1000};
+  suseconds_t sleep_times[2] = {200, 1000};
   for (int i = 0; i < 2; i++) {
     int ret = LIBC_NAMESPACE::gettimeofday(&tv, tz);
     ASSERT_EQ(ret, 0);
 
-    int sleep_time = sleep_times[i];
+    suseconds_t sleep_time = sleep_times[i];
     // Sleep for {sleep_time} microsceconds.
-    struct timespec tim = {0, sleep_time * 1000};
-    struct timespec tim2 = {0, 0};
+    timespec tim = {0, sleep_time * 1000};
+    timespec tim2 = {0, 0};
     ret = LIBC_NAMESPACE::nanosleep(&tim, &tim2);
 
     // Call gettimeofday again and verify that it is more {sleep_time}
     // microscecods.
-    struct timeval tv1;
+    timeval tv1;
     ret = LIBC_NAMESPACE::gettimeofday(&tv1, tz);
     ASSERT_EQ(ret, 0);
     ASSERT_GE(tv1.tv_usec - tv.tv_usec, sleep_time);

>From 3d89c088af5bd5242f6c80efb10a7191d0c4e71e Mon Sep 17 00:00:00 2001
From: Aart Bik <39774503+aartbik at users.noreply.github.com>
Date: Thu, 19 Oct 2023 16:56:52 -0700
Subject: [PATCH 73/76] [mlir][sparse] support BSR for cuSPARSE (libgen path
 only) (#69646)

---
 .../Transforms/SparseGPUCodegen.cpp           |  69 +++++--
 .../GPU/CUDA/sparse-sampled-matmul-lib.mlir   |  13 +-
 .../GPU/CUDA/sparse-sddmm-lib.mlir            | 189 ++++++++++++++++++
 3 files changed, 246 insertions(+), 25 deletions(-)
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
index a6e963181816f7b..9c836c16dab2b7c 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
@@ -39,7 +39,7 @@ enum class CuSparseFormat {
   kCOO,
   kCSR,
   kCSC,
-  kBSR, // TODO: coming soon!
+  kBSR,
 };
 
 //===----------------------------------------------------------------------===//
@@ -428,6 +428,19 @@ static bool isAdmissibleCSC(SparseTensorType &aTp) {
          aTp.isOrderedLvl(1) && aTp.isUniqueLvl(1) && isAdmissibleMetaData(aTp);
 }
 
+/// Test for BSR matrix with suitable metadata.
+static bool isAdmissibleBSR(SparseTensorType &aTp) {
+  if (aTp.getDimRank() == 2 && aTp.getLvlRank() == 4 && aTp.isDenseLvl(0) &&
+      aTp.isCompressedLvl(1) && aTp.isOrderedLvl(1) && aTp.isUniqueLvl(1) &&
+      aTp.isDenseLvl(2) && aTp.isDenseLvl(3) && isAdmissibleMetaData(aTp)) {
+    // CuSparse only supports "square" blocks currently.
+    SmallVector<unsigned> dims = getBlockSize(aTp.getDimToLvl());
+    assert(dims.size() == 2);
+    return dims[0] = dims[1] && dims[0] > 1;
+  }
+  return false;
+}
+
 /// Returns a suitable sparse format for the operation and given operand
 /// types with cuSparse, or kNone if none is available.
 static CuSparseFormat getCuSparseFormat(SparseTensorType aTp,
@@ -448,6 +461,8 @@ static CuSparseFormat getCuSparseFormat(SparseTensorType aTp,
     return CuSparseFormat::kCSR;
   if (isAdmissibleCSC(aTp))
     return CuSparseFormat::kCSC;
+  if (isAdmissibleBSR(aTp))
+    return CuSparseFormat::kBSR;
   return CuSparseFormat::kNone;
 }
 
@@ -475,9 +490,10 @@ static Value genSecondCrds(OpBuilder &builder, Location loc, Value a,
 }
 
 /// Generates the sparse matrix handle.
-static Operation *genSpMat(OpBuilder &builder, Location loc, Type handleTp,
-                           Type tokenTp, Value token, Value sz1, Value sz2,
-                           Value nseA, Value rowA, Value colA, Value valA,
+static Operation *genSpMat(OpBuilder &builder, Location loc,
+                           SparseTensorType &aTp, Type handleTp, Type tokenTp,
+                           Value token, Value sz1, Value sz2, Value nseA,
+                           Value rowA, Value colA, Value valA,
                            CuSparseFormat format, bool enableRT) {
   if (format == CuSparseFormat::kCOO) {
     // Library uses SoA COO, direct IR uses AoS COO.
@@ -498,9 +514,24 @@ static Operation *genSpMat(OpBuilder &builder, Location loc, Type handleTp,
   if (format == CuSparseFormat::kCSR)
     return builder.create<gpu::CreateCsrOp>(loc, handleTp, tokenTp, token, sz1,
                                             sz2, nseA, rowA, colA, valA);
-  assert(format == CuSparseFormat::kCSC);
-  return builder.create<gpu::CreateCscOp>(loc, handleTp, tokenTp, token, sz1,
-                                          sz2, nseA, rowA, colA, valA);
+  if (format == CuSparseFormat::kCSC)
+    return builder.create<gpu::CreateCscOp>(loc, handleTp, tokenTp, token, sz1,
+                                            sz2, nseA, rowA, colA, valA);
+  // BSR requires a bit more work since we need to pass in the block size
+  // and all others sizes in terms of blocks (#block-rows, #block-cols,
+  // #nonzero-blocks).
+  assert(format == CuSparseFormat::kBSR);
+  SmallVector<unsigned> dims = getBlockSize(aTp.getDimToLvl());
+  assert(dims.size() == 2 && dims[0] == dims[1]);
+  uint64_t b = dims[0];
+  Value bSz = constantIndex(builder, loc, b);
+  Value bRows = builder.create<arith::DivUIOp>(loc, sz1, bSz);
+  Value bCols = builder.create<arith::DivUIOp>(loc, sz2, bSz);
+  Value bNum = builder.create<arith::DivUIOp>(
+      loc, nseA, constantIndex(builder, loc, b * b));
+  return builder.create<gpu::CreateBsrOp>(loc, handleTp, tokenTp, token, bRows,
+                                          bCols, bNum, bSz, bSz, rowA, colA,
+                                          valA);
 }
 
 /// Match and rewrite SpMV kernel.
@@ -566,8 +597,8 @@ rewriteSpMV(PatternRewriter &rewriter, linalg::GenericOp op, bool enableRT,
   Type tokenTp = rewriter.getType<gpu::AsyncTokenType>();
   Value token = genFirstWait(rewriter, loc);
   Operation *spGenA =
-      genSpMat(rewriter, loc, spmatHandleTp, tokenTp, token, szY, szX, nseA,
-               rowA, colA, valA, format, enableRT);
+      genSpMat(rewriter, loc, aTp, spmatHandleTp, tokenTp, token, szY, szX,
+               nseA, rowA, colA, valA, format, enableRT);
   Value spMatA = spGenA->getResult(0);
   token = spGenA->getResult(1);
   auto dvecX = rewriter.create<gpu::CreateDnTensorOp>(
@@ -691,8 +722,8 @@ rewriteSpMM(PatternRewriter &rewriter, linalg::GenericOp op, bool enableRT,
   Type tokenTp = rewriter.getType<gpu::AsyncTokenType>();
   Value token = genFirstWait(rewriter, loc);
   Operation *spGenA =
-      genSpMat(rewriter, loc, spMatHandleTp, tokenTp, token, szm, szk, nseA,
-               rowA, colA, valA, format, enableRT);
+      genSpMat(rewriter, loc, aTp, spMatHandleTp, tokenTp, token, szm, szk,
+               nseA, rowA, colA, valA, format, enableRT);
   Value spMatA = spGenA->getResult(0);
   token = spGenA->getResult(1);
   auto dmatB = rewriter.create<gpu::CreateDnTensorOp>(
@@ -806,13 +837,13 @@ rewriteSpGEMM(PatternRewriter &rewriter, linalg::GenericOp op, bool enableRT,
   Type tokenTp = rewriter.getType<gpu::AsyncTokenType>();
   Value token = genFirstWait(rewriter, loc);
   Operation *spGenA =
-      genSpMat(rewriter, loc, spmatHandleTp, tokenTp, token, szm, szk, nseA,
-               rowA, colA, valA, format, enableRT);
+      genSpMat(rewriter, loc, aTp, spmatHandleTp, tokenTp, token, szm, szk,
+               nseA, rowA, colA, valA, format, enableRT);
   Value spMatA = spGenA->getResult(0);
   token = spGenA->getResult(1);
   Operation *spGenB =
-      genSpMat(rewriter, loc, spmatHandleTp, tokenTp, token, szk, szn, nseB,
-               rowB, colB, valB, format, enableRT);
+      genSpMat(rewriter, loc, bTp, spmatHandleTp, tokenTp, token, szk, szn,
+               nseB, rowB, colB, valB, format, enableRT);
   Value spMatB = spGenB->getResult(0);
   token = spGenB->getResult(1);
 
@@ -830,8 +861,8 @@ rewriteSpGEMM(PatternRewriter &rewriter, linalg::GenericOp op, bool enableRT,
   Value valC = e3.getResult(0); // no free needed
   token = e3.getAsyncToken();
   Operation *spGenC =
-      genSpMat(rewriter, loc, spmatHandleTp, tokenTp, token, szm, szn, zero,
-               rowC, colC, valC, format, enableRT);
+      genSpMat(rewriter, loc, cTp, spmatHandleTp, tokenTp, token, szm, szn,
+               zero, rowC, colC, valC, format, enableRT);
   Value spMatC = spGenC->getResult(0);
   token = spGenC->getResult(1);
 
@@ -1137,8 +1168,8 @@ rewriteSDDMM(PatternRewriter &rewriter, linalg::GenericOp op, bool enableRT,
   Value dnB = dmatB.getResult(0);
   token = dmatB.getAsyncToken();
   Operation *spGenC =
-      genSpMat(rewriter, loc, spMatHandleTp, tokenTp, token, szm, szn, nseC,
-               rowC, colC, valC, format, enableRT);
+      genSpMat(rewriter, loc, cTp, spMatHandleTp, tokenTp, token, szm, szn,
+               nseC, rowC, colC, valC, format, enableRT);
   Value spMatC = spGenC->getResult(0);
   token = spGenC->getResult(1);
   auto dnCType = llvm::cast<ShapedType>(c.getType()).getElementType();
diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sampled-matmul-lib.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sampled-matmul-lib.mlir
index 61de57564beda2e..ac5c0f8bead0773 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sampled-matmul-lib.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sampled-matmul-lib.mlir
@@ -3,7 +3,8 @@
 //
 // DEFINE: %{compile} = mlir-opt %s \
 // DEFINE:   --sparse-compiler="enable-gpu-libgen gpu-triple=nvptx64-nvidia-cuda gpu-chip=sm_80 gpu-features=+ptx71 gpu-format=%gpu_compilation_format
-// DEFINE: %{run} = TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
+// DEFINE: %{run} = \
+// DEFINE:   env TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
 // DEFINE:   mlir-cpu-runner \
 // DEFINE:   --shared-libs=%mlir_cuda_runtime \
 // DEFINE:   --shared-libs=%mlir_c_runner_utils \
@@ -12,16 +13,16 @@
 //
 // with RT lib:
 //
-//  RUN:  %{compile} enable-runtime-library=true" | %{run}
-//  RUN:  %{compile} enable-runtime-library=true gpu-data-transfer-strategy=pinned-dma" | %{run}
-//  Tracker #64316
-//  RUNNOT: %{compile} enable-runtime-library=true gpu-data-transfer-strategy=zero-copy" | %{run}
+// RUN:  %{compile} enable-runtime-library=true" | %{run}
+// RUN:  %{compile} enable-runtime-library=true gpu-data-transfer-strategy=pinned-dma" | %{run}
+// TODO: Tracker #64316
+// RUNNOT: %{compile} enable-runtime-library=true gpu-data-transfer-strategy=zero-copy" | %{run}
 //
 // without RT lib:
 //
 // RUN:  %{compile} enable-runtime-library=false" | %{run}
 // RUN:  %{compile} enable-runtime-library=false gpu-data-transfer-strategy=pinned-dma" | %{run}
-//  Tracker #64316
+// TODO:  Tracker #64316
 // RUNNOT: %{compile} enable-runtime-library=false gpu-data-transfer-strategy=zero-copy" | %{run}
 //
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
new file mode 100644
index 000000000000000..54408d629ec22ec
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
@@ -0,0 +1,189 @@
+//
+// NOTE: this test requires gpu-sm80
+//
+// DEFINE: %{compile} = mlir-opt %s \
+// DEFINE:   --sparse-compiler="enable-gpu-libgen gpu-triple=nvptx64-nvidia-cuda gpu-chip=sm_80 gpu-features=+ptx71 gpu-format=%gpu_compilation_format
+// DEFINE: %{run} = \
+// DEFINE:   env TENSOR0="%mlir_src_dir/test/Integration/data/block.mtx" \
+// DEFINE:   mlir-cpu-runner \
+// DEFINE:   --shared-libs=%mlir_cuda_runtime \
+// DEFINE:   --shared-libs=%mlir_c_runner_utils \
+// DEFINE:   --e entry --entry-point-result=void \
+// DEFINE: | FileCheck %s
+//
+// with RT lib:
+//
+// RUN:  %{compile} enable-runtime-library=true" | %{run}
+//
+// without RT lib:
+//
+// TODO: make this work
+// R_UN:  %{compile} enable-runtime-library=false" | %{run}
+//
+
+!Filename = !llvm.ptr<i8>
+
+#CSR = #sparse_tensor.encoding<{
+  map = (d0, d1) -> (d0 : dense, d1 : compressed)
+}>
+
+#BSR = #sparse_tensor.encoding<{
+  map = (i, j) -> (
+    i floordiv 2 : dense,
+    j floordiv 2 : compressed,
+    i mod 2 : dense,
+    j mod 2 : dense)
+}>
+
+#trait_SDDMM = {
+  indexing_maps = [
+    affine_map<(i,j,k) -> (i,k)>,  // A
+    affine_map<(i,j,k) -> (k,j)>,  // B
+    affine_map<(i,j,k) -> (i,j)>   // S (in/out)
+  ],
+  iterator_types = ["parallel", "parallel", "reduction"],
+  doc = "S(i,j) += spy[S(i,j)] x SUM_k A(i,k) B(k,j)"
+}
+
+//
+// Integration test that lowers a kernel annotated as sparse to
+// actual sparse code, initializes sparse storage schemes, and
+// runs the resulting code with the JIT compiler.
+//
+module {
+  llvm.func @mgpuCreateSparseEnv()
+  llvm.func @mgpuDestroySparseEnv()
+
+  //
+  // A kernel that computes a CSR sampled dense matrix matrix multiplication
+  // using a "spy" function and in-place update of the sampling sparse matrix.
+  //
+  func.func @SDDMM(%args: tensor<?x?xf32, #CSR>,
+                   %arga: tensor<?x?xf32>,
+                   %argb: tensor<?x?xf32>) -> tensor<?x?xf32, #CSR> {
+    %result = linalg.generic #trait_SDDMM
+      ins(%arga, %argb: tensor<?x?xf32>, tensor<?x?xf32>)
+      outs(%args: tensor<?x?xf32, #CSR>) {
+        ^bb(%a: f32, %b: f32, %s: f32):
+           %f0 = arith.constant 0.0 : f32
+           %u = sparse_tensor.unary %s : f32 to f32
+             present={
+                ^bb0(%p: f32):
+                  %mul = arith.mulf %a, %b : f32
+                  sparse_tensor.yield %mul : f32
+             }
+             absent={}
+           %r = sparse_tensor.reduce %s, %u, %f0 : f32 {
+              ^bb0(%p: f32, %q: f32):
+                %add = arith.addf %p, %q : f32
+                sparse_tensor.yield %add : f32
+            }
+           linalg.yield %r : f32
+      } -> tensor<?x?xf32, #CSR>
+    return %result : tensor<?x?xf32, #CSR>
+  }
+
+  //
+  // A kernel that computes a BSR sampled dense matrix matrix multiplication
+  // using a "spy" function and in-place update of the sampling sparse matrix.
+  //
+  func.func @SDDMM_block(%args: tensor<?x?xf32, #BSR>,
+                         %arga: tensor<?x?xf32>,
+                         %argb: tensor<?x?xf32>) -> tensor<?x?xf32, #BSR> {
+    %result = linalg.generic #trait_SDDMM
+      ins(%arga, %argb: tensor<?x?xf32>, tensor<?x?xf32>)
+      outs(%args: tensor<?x?xf32, #BSR>) {
+        ^bb(%a: f32, %b: f32, %s: f32):
+           %f0 = arith.constant 0.0 : f32
+           %u = sparse_tensor.unary %s : f32 to f32
+             present={
+                ^bb0(%p: f32):
+                  %mul = arith.mulf %a, %b : f32
+                  sparse_tensor.yield %mul : f32
+             }
+             absent={}
+           %r = sparse_tensor.reduce %s, %u, %f0 : f32 {
+              ^bb0(%p: f32, %q: f32):
+                %add = arith.addf %p, %q : f32
+                sparse_tensor.yield %add : f32
+            }
+           linalg.yield %r : f32
+      } -> tensor<?x?xf32, #BSR>
+    return %result : tensor<?x?xf32, #BSR>
+  }
+
+  func.func private @getTensorFilename(index) -> (!Filename)
+
+  //
+  // Main driver.
+  //
+  func.func @entry() {
+    llvm.call @mgpuCreateSparseEnv() : () -> ()
+    %d0 = arith.constant 0.0 : f32
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c4 = arith.constant 4 : index
+    %c6 = arith.constant 6 : index
+
+    // Initialize dense matrices.
+    %a = tensor.generate %c4, %c4 {
+    ^bb0(%i: index, %j: index):
+      %p = arith.addi %i, %c1 : index
+      %q = arith.index_cast %p : index to i32
+      %d = arith.sitofp %q : i32 to f32
+      tensor.yield %d : f32
+    } : tensor<?x?xf32>
+    %b = tensor.generate %c4, %c6 {
+    ^bb0(%i: index, %j: index):
+      %p = arith.addi %j, %c1 : index
+      %q = arith.index_cast %p : index to i32
+      %d = arith.sitofp %q : i32 to f32
+      tensor.yield %d : f32
+    } : tensor<?x?xf32>
+
+    // Read the sparse matrix from file, construct sparse storage.
+    //
+    //      +-----+-----+-----+
+    //      | 1 2 | . . | 4 . |
+    //      | . 3 | . . | . 5 |
+    //      +-----+-----+-----+
+    //      | . . | 6 7 | . . |
+    //      | . . | 8 . | . . |
+    //      +-----+-----+-----+
+    //
+    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+    %m_csr = sparse_tensor.new %fileName : !Filename to tensor<?x?xf32, #CSR>
+    %m_bsr = sparse_tensor.new %fileName : !Filename to tensor<?x?xf32, #BSR>
+
+    // Call the kernel.
+    %0 = call @SDDMM(%m_csr, %a, %b)
+       : (tensor<?x?xf32, #CSR>,
+          tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32, #CSR>
+    %1 = call @SDDMM_block(%m_bsr, %a, %b)
+       : (tensor<?x?xf32, #BSR>,
+          tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32, #BSR>
+
+    //
+    // Print the result for verification. Note that the "spy" determines what
+    // dot products are sampled, but the original contents are added back to
+    // the result (which is why the block sparse version has actual results
+    // in the original zero positions).
+    //
+    // CHECK:      ( 5, 10, 24, 19, 53, 42, 55, 56 )
+    // CHECK-NEXT: ( 5, 10, 8, 19, 24, 24, 40, 53, 42, 55, 56, 64 )
+    //
+    %v0 = sparse_tensor.values %0 : tensor<?x?xf32, #CSR> to memref<?xf32>
+    %vv0 = vector.transfer_read %v0[%c0], %d0 : memref<?xf32>, vector<8xf32>
+    vector.print %vv0 : vector<8xf32>
+    %v1 = sparse_tensor.values %1 : tensor<?x?xf32, #BSR> to memref<?xf32>
+    %vv1 = vector.transfer_read %v1[%c0], %d0 : memref<?xf32>, vector<12xf32>
+    vector.print %vv1 : vector<12xf32>
+
+    // Release the resources.
+    bufferization.dealloc_tensor %0 : tensor<?x?xf32, #CSR>
+    bufferization.dealloc_tensor %1 : tensor<?x?xf32, #BSR>
+
+    llvm.call @mgpuDestroySparseEnv() : () -> ()
+    return
+  }
+}

>From eb6ec1720628cb55677dd0db8d4ee75c43f37514 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Thu, 19 Oct 2023 17:25:05 -0700
Subject: [PATCH 74/76] [flang][openacc] Do not error when bind symbol is
 defined later or external (#69657)

The symbol in bind clause on acc routine refers to a function or a
subroutine. This patch avoids to raise error when the function or
subroutine is declared later in the code or is external. This is in line
with normal procedure name resolution in Fortran code.
---
 flang/lib/Semantics/resolve-directives.cpp            |  8 ++++++--
 flang/test/Lower/OpenACC/acc-routine.f90              | 11 +++++++++++
 flang/test/Semantics/OpenACC/acc-routine-validity.f90 |  2 --
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index e8448a36a7b273c..f7720fcf43e5768 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -913,6 +913,10 @@ Symbol *AccAttributeVisitor::ResolveFctName(const parser::Name &name) {
   Symbol *prev{currScope().FindSymbol(name.source)};
   if (!prev || (prev && prev->IsFuncResult())) {
     prev = currScope().parent().FindSymbol(name.source);
+    if (!prev) {
+      prev = &context_.globalScope().MakeSymbol(
+          name.source, Attrs{}, ProcEntityDetails{});
+    }
   }
   if (prev != name.symbol) {
     name.symbol = prev;
@@ -965,7 +969,7 @@ void AccAttributeVisitor::AddRoutineInfoToSymbol(
                      std::get_if<Fortran::parser::AccClause::Bind>(&clause.u)) {
         if (const auto *name =
                 std::get_if<Fortran::parser::Name>(&bindClause->v.u)) {
-          if (Symbol *sym = ResolveName(*name, true)) {
+          if (Symbol *sym = ResolveFctName(*name)) {
             info.set_bindName(sym->name().ToString());
           } else {
             context_.Say((*name).source,
@@ -1008,7 +1012,7 @@ bool AccAttributeVisitor::Pre(const parser::OpenACCRoutineConstruct &x) {
 
 bool AccAttributeVisitor::Pre(const parser::AccBindClause &x) {
   if (const auto *name{std::get_if<parser::Name>(&x.u)}) {
-    if (!ResolveName(*name, true)) {
+    if (!ResolveFctName(*name)) {
       context_.Say(name->source,
           "No function or subroutine declared for '%s'"_err_en_US,
           name->source);
diff --git a/flang/test/Lower/OpenACC/acc-routine.f90 b/flang/test/Lower/OpenACC/acc-routine.f90
index 7514e0a8819fae9..ffa889730918f9c 100644
--- a/flang/test/Lower/OpenACC/acc-routine.f90
+++ b/flang/test/Lower/OpenACC/acc-routine.f90
@@ -96,3 +96,14 @@ subroutine acc_routine11(a)
   end interface
 
 end subroutine
+
+subroutine acc_routine13()
+  !$acc routine bind(acc_routine14)
+end subroutine
+
+subroutine acc_routine14()
+end subroutine
+
+subroutine acc_routine15()
+  !$acc routine bind(acc_routine16)
+end subroutine
diff --git a/flang/test/Semantics/OpenACC/acc-routine-validity.f90 b/flang/test/Semantics/OpenACC/acc-routine-validity.f90
index 5b8ecdec4f5267f..c135c2b86aac12c 100644
--- a/flang/test/Semantics/OpenACC/acc-routine-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-routine-validity.f90
@@ -15,7 +15,6 @@ module openacc_routine_validity
   !ERROR: ROUTINE directive without name must appear within the specification part of a subroutine or function definition, or within an interface body for a subroutine or function in an interface block
   !$acc routine seq
 
-  !ERROR: No function or subroutine declared for 'dummy'
   !$acc routine(dummy) seq
 
 contains
@@ -70,7 +69,6 @@ end function fct4
 
   subroutine sub6(a)
     real :: a(:)
-    !ERROR: No function or subroutine declared for 'dummy_sub'
     !$acc routine seq bind(dummy_sub)
   end subroutine sub6
 

>From bce3b505931cee9dc79d1c56c021983b4a8fb819 Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard at google.com>
Date: Thu, 19 Oct 2023 17:27:01 -0700
Subject: [PATCH 75/76] [libc++][Android] Mark tests XFAIL/UNSUPPORTED (#69271)

Mark tests as necessary to accommodate Android L (5.0 / API 21) and up.

Add three Android lit features:
 - android
 - android-device-api=(21,22,23,...)
 - LIBCXX-ANDROID-FIXME (for failures that need follow-up work)

Enable an AIX workaround in filesystem_test_helper.h for the broken
chmod on older Android devices.

Mark failing test with XFAIL or UNSUPPORTED:
 - Mark modules tests as UNSUPPORTED, matching other configurations.
 - Mark a gdb test as UNSUPPORTED.
 - XFAIL tests for old devices that lack an API (fmemopen).
- XFAIL various FS tests (because SELinux blocks FIFO and hard linking,
because fchmodat is broken on old devices).
- XFAIL various locale tests (because Bionic has limited locale
support). (Also XFAIL an re.traits test.)
- XFAIL some print.fun tests because the error exception has no system
error string.
- Mark std::{cin,wcin} tests UNSUPPORTED because they hang with
adb_run.py on old devices.
 - Mark a few tests UNSUPPORTED because they allocate too much memory.
 - notify_one.pass.cpp is flaky on Android.
- XFAIL libc++abi demangler test because of Android's special long
double on x86[-64].

N.B. The `__ANDROID_API__` macro specifies a minimum required API level
at build-time, whereas the android-device-api lit feature is the
detected API level of the device at run-time. The android-device-api
value will be >= `__ANDROID_API__`.

This commit was split out from https://reviews.llvm.org/D139147.

Fixes: https://github.com/llvm/llvm-project/issues/69270
---
 .../test/libcxx/clang_modules_include.gen.py  |  4 +--
 .../libcxx/gdb/gdb_pretty_printer_test.sh.cpp |  6 +++++
 .../print.fun/vprint_unicode_posix.pass.cpp   |  3 +++
 .../fstreams/filebuf.members/close.pass.cpp   |  5 ++++
 .../directory_entry.obs/file_size.pass.cpp    |  4 +++
 .../file_type_obs.pass.cpp                    |  4 +++
 .../hard_link_count.pass.cpp                  |  4 +++
 .../rec.dir.itr.members/increment.pass.cpp    |  4 +++
 .../fs.op.funcs/fs.op.copy/copy.pass.cpp      |  4 +++
 .../fs.op.copy_file/copy_file.pass.cpp        |  4 +++
 .../create_hard_link.pass.cpp                 |  4 +++
 .../fs.op.equivalent/equivalent.pass.cpp      |  4 +++
 .../fs.op.hard_lk_ct/hard_link_count.pass.cpp |  4 +++
 .../fs.op.is_empty/is_empty.pass.cpp          |  4 +++
 .../fs.op.permissions/permissions.pass.cpp    |  5 ++++
 .../fs.op.funcs/fs.op.status/status.pass.cpp  |  4 +++
 .../symlink_status.pass.cpp                   |  4 +++
 .../ext.manip/get_money.pass.cpp              |  3 +++
 .../ext.manip/put_money.pass.cpp              |  3 +++
 .../print.fun/no_file_description.pass.cpp    |  3 +++
 .../print.fun/print.file.pass.cpp             |  3 +++
 .../print.fun/println.file.pass.cpp           |  3 +++
 .../print.fun/vprint_nonunicode.file.pass.cpp |  3 +++
 .../print.fun/vprint_unicode.file.pass.cpp    |  3 +++
 .../narrow.stream.objects/cin.sh.cpp          |  4 +++
 .../wide.stream.objects/wcin-imbue.sh.cpp     |  4 +++
 .../wide.stream.objects/wcin.sh.cpp           |  4 +++
 .../streambuf.put.area/pbump2gig.pass.cpp     |  5 ++--
 .../stringstream.members/gcount.pass.cpp      |  4 +++
 .../locale.collate.byname/compare.pass.cpp    |  3 +++
 .../locale.collate.byname/transform.pass.cpp  |  3 +++
 .../locale.ctype.byname/is_1.pass.cpp         |  3 +++
 .../locale.ctype.byname/is_many.pass.cpp      |  3 +++
 .../locale.ctype.byname/scan_is.pass.cpp      |  3 +++
 .../locale.ctype.byname/scan_not.pass.cpp     |  3 +++
 .../locale.ctype.byname/tolower_1.pass.cpp    |  3 +++
 .../locale.ctype.byname/tolower_many.pass.cpp |  3 +++
 .../locale.ctype.byname/toupper_1.pass.cpp    |  3 +++
 .../locale.ctype.byname/toupper_many.pass.cpp |  3 +++
 .../get_long_double_en_US.pass.cpp            |  3 +++
 .../get_string_en_US.pass.cpp                 |  3 +++
 .../put_long_double_en_US.pass.cpp            |  3 +++
 .../put_string_en_US.pass.cpp                 |  3 +++
 .../put_long_double.pass.cpp                  |  4 +++
 .../re/re.traits/translate_nocase.pass.cpp    |  4 +++
 .../notify_one.pass.cpp                       |  3 +++
 .../locale-specific_form.pass.cpp             |  3 +++
 libcxx/test/support/filesystem_test_helper.h  | 13 ++++++++--
 libcxx/utils/libcxx/test/features.py          | 25 +++++++++++++++++++
 libcxxabi/test/test_demangle.pass.cpp         |  4 +++
 50 files changed, 204 insertions(+), 7 deletions(-)

diff --git a/libcxx/test/libcxx/clang_modules_include.gen.py b/libcxx/test/libcxx/clang_modules_include.gen.py
index b4c5c79a47f6823..04c8e03affee594 100644
--- a/libcxx/test/libcxx/clang_modules_include.gen.py
+++ b/libcxx/test/libcxx/clang_modules_include.gen.py
@@ -34,7 +34,7 @@
 // UNSUPPORTED{BLOCKLIT}: LIBCXX-AIX-FIXME
 
 // The Android headers don't appear to be compatible with modules yet
-// XFAIL{BLOCKLIT}: LIBCXX-ANDROID-FIXME
+// UNSUPPORTED{BLOCKLIT}: LIBCXX-ANDROID-FIXME
 
 // TODO: Investigate this failure
 // UNSUPPORTED{BLOCKLIT}: LIBCXX-FREEBSD-FIXME
@@ -64,7 +64,7 @@
 // UNSUPPORTED{BLOCKLIT}: LIBCXX-AIX-FIXME
 
 // The Android headers don't appear to be compatible with modules yet
-// XFAIL{BLOCKLIT}: LIBCXX-ANDROID-FIXME
+// UNSUPPORTED{BLOCKLIT}: LIBCXX-ANDROID-FIXME
 
 // TODO: Investigate this failure
 // UNSUPPORTED{BLOCKLIT}: LIBCXX-FREEBSD-FIXME
diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp
index 394a3369e036ed8..29315e76ec09de4 100644
--- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp
+++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp
@@ -17,6 +17,12 @@
 // TODO: Investigate this failure on GCC 13 (in Ubuntu Jammy)
 // UNSUPPORTED: gcc-13
 
+// The Android libc++ tests are run on a non-Android host, connected to an
+// Android device over adb. gdb needs special support to make this work (e.g.
+// gdbclient.py, ndk-gdb.py, gdbserver), and the Android organization doesn't
+// support gdb anymore, favoring lldb instead.
+// UNSUPPORTED: android
+
 // RUN: %{cxx} %{flags} %s -o %t.exe %{compile_flags} -g %{link_flags}
 // Ensure locale-independence for unicode tests.
 // RUN: env LANG=en_US.UTF-8 %{gdb} -nx -batch -iex "set autoload off" -ex "source %S/../../../utils/gdb/libcxx/printers.py" -ex "python register_libcxx_printer_loader()" -ex "source %S/gdb_pretty_printer_test.py" %t.exe
diff --git a/libcxx/test/libcxx/input.output/iostream.format/print.fun/vprint_unicode_posix.pass.cpp b/libcxx/test/libcxx/input.output/iostream.format/print.fun/vprint_unicode_posix.pass.cpp
index dd4314f4f5f6266..9a50770d97dbcb2 100644
--- a/libcxx/test/libcxx/input.output/iostream.format/print.fun/vprint_unicode_posix.pass.cpp
+++ b/libcxx/test/libcxx/input.output/iostream.format/print.fun/vprint_unicode_posix.pass.cpp
@@ -11,6 +11,9 @@
 
 // XFAIL: availability-fp_to_chars-missing
 
+// fmemopen is available starting in Android M (API 23)
+// XFAIL: target={{.+}}-android{{(eabi)?(21|22)}}
+
 // REQUIRES: has-unix-headers
 
 // <print>
diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/close.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/close.pass.cpp
index b545041e4283370..e0338e6f619b716 100644
--- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/close.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.members/close.pass.cpp
@@ -10,6 +10,11 @@
 
 // basic_filebuf<charT,traits>* close();
 
+// This test closes an fd that belongs to a std::filebuf, and Bionic's fdsan
+// detects this and aborts the process, starting in Android R (API 30).
+// See D137129.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{2[1-9]}}
+
 #include <fstream>
 #include <cassert>
 #if defined(__unix__)
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
index fa105a74b6be73f..3a17e89d2e7631b 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
@@ -12,6 +12,10 @@
 // against already-released libc++'s.
 // XFAIL: stdlib=apple-libc++ && target={{.+}}-apple-macosx{{10.15|11.0}}
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a FIFO file.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
index 6591d5191d9d5bf..b490f3379e9fe04 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
@@ -8,6 +8,10 @@
 
 // UNSUPPORTED: c++03
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a hard link.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
index e8c4742290bab0b..ebf200df4024c92 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
@@ -12,6 +12,10 @@
 // against already-released libc++'s.
 // XFAIL: stdlib=apple-libc++ && target={{.+}}-apple-macosx{{10.15|11.0}}
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a hard link.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
index da982918fbb3521..244f6a295fedfc7 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
@@ -10,6 +10,10 @@
 // UNSUPPORTED: no-filesystem
 // UNSUPPORTED: availability-filesystem-missing
 
+// On Android L, ~scoped_test_env() is unable to delete the temp dir using
+// chmod+rm because chmod is too broken.
+// XFAIL: LIBCXX-ANDROID-FIXME && android-device-api={{21|22}}
+
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
index afcd5ab01ca66bc..09e00450dc5153a 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
@@ -10,6 +10,10 @@
 // UNSUPPORTED: no-filesystem
 // UNSUPPORTED: availability-filesystem-missing
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a FIFO file.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // void copy(const path& from, const path& to);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp
index 3083d41850a05a1..717f68901654c10 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy_file/copy_file.pass.cpp
@@ -14,6 +14,10 @@
 // against already-released libc++'s.
 // XFAIL: stdlib=apple-libc++ && target={{.+}}-apple-macosx{{10.15|11.0}}
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a FIFO file.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // bool copy_file(const path& from, const path& to);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_hard_link/create_hard_link.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_hard_link/create_hard_link.pass.cpp
index 171405979cfc42f..36a8216f1f0e813 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_hard_link/create_hard_link.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_hard_link/create_hard_link.pass.cpp
@@ -10,6 +10,10 @@
 // UNSUPPORTED: no-filesystem
 // UNSUPPORTED: availability-filesystem-missing
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a hard link.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // void create_hard_link(const path& existing_symlink, const path& new_symlink);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
index 7fd1bf32e190726..f227cec8f2eaeec 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
@@ -10,6 +10,10 @@
 // UNSUPPORTED: no-filesystem
 // UNSUPPORTED: availability-filesystem-missing
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a hard link.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // bool equivalent(path const& lhs, path const& rhs);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
index 65a42e979370480..41cbf82684a3ee7 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
@@ -10,6 +10,10 @@
 // UNSUPPORTED: no-filesystem
 // UNSUPPORTED: availability-filesystem-missing
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a hard link.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // uintmax_t hard_link_count(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
index 74040ba102a4853..424fd1b84bda8fe 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
@@ -10,6 +10,10 @@
 // UNSUPPORTED: no-filesystem
 // UNSUPPORTED: availability-filesystem-missing
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a FIFO file.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // bool is_empty(path const& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp
index ff1493fa447b861..36e91548a8aa2eb 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp
@@ -10,6 +10,11 @@
 // UNSUPPORTED: no-filesystem
 // UNSUPPORTED: availability-filesystem-missing
 
+// Android's fchmodat seems broken on various OS versions -- see D140183. This
+// test probably passes on new-enough phones (not the emulator).
+// XFAIL: LIBCXX-ANDROID-FIXME && target={{i686|x86_64}}-{{.+}}-android{{.*}}
+// XFAIL: LIBCXX-ANDROID-FIXME && android-device-api={{21|22}}
+
 // <filesystem>
 
 // void permissions(const path& p, perms prms,
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
index 6d3119114424d9b..64c05cf41524127 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
@@ -10,6 +10,10 @@
 // UNSUPPORTED: no-filesystem
 // UNSUPPORTED: availability-filesystem-missing
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a FIFO file.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // file_status status(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
index a58d35756a8ae7b..81e8389fb2fa7c8 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
@@ -10,6 +10,10 @@
 // UNSUPPORTED: no-filesystem
 // UNSUPPORTED: availability-filesystem-missing
 
+// Starting in Android N (API 24), SELinux policy prevents the shell user from
+// creating a FIFO file.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22|23}}
+
 // <filesystem>
 
 // file_status symlink_status(const path& p);
diff --git a/libcxx/test/std/input.output/iostream.format/ext.manip/get_money.pass.cpp b/libcxx/test/std/input.output/iostream.format/ext.manip/get_money.pass.cpp
index 92b82e5f0278672..d468d7154dad5ae 100644
--- a/libcxx/test/std/input.output/iostream.format/ext.manip/get_money.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/ext.manip/get_money.pass.cpp
@@ -10,6 +10,9 @@
 
 // template <class moneyT> T7 get_money(moneyT& mon, bool intl = false);
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // REQUIRES: locale.en_US.UTF-8
 
 #include <iomanip>
diff --git a/libcxx/test/std/input.output/iostream.format/ext.manip/put_money.pass.cpp b/libcxx/test/std/input.output/iostream.format/ext.manip/put_money.pass.cpp
index 0cfbcee0c4e286a..d0ff2c87689d144 100644
--- a/libcxx/test/std/input.output/iostream.format/ext.manip/put_money.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/ext.manip/put_money.pass.cpp
@@ -10,6 +10,9 @@
 
 // template <class charT, class moneyT> T8 put_money(const moneyT& mon, bool intl = false);
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // REQUIRES: locale.en_US.UTF-8
 
 #include <iomanip>
diff --git a/libcxx/test/std/input.output/iostream.format/print.fun/no_file_description.pass.cpp b/libcxx/test/std/input.output/iostream.format/print.fun/no_file_description.pass.cpp
index ee23f728465de0a..c9297318cd5d667 100644
--- a/libcxx/test/std/input.output/iostream.format/print.fun/no_file_description.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/print.fun/no_file_description.pass.cpp
@@ -12,6 +12,9 @@
 // XFAIL: msvc, target={{.+}}-windows-gnu
 // XFAIL: availability-fp_to_chars-missing
 
+// fmemopen is available starting in Android M (API 23)
+// XFAIL: target={{.+}}-android{{(eabi)?(21|22)}}
+
 // <print>
 
 // The FILE returned by fmemopen does not have file descriptor.
diff --git a/libcxx/test/std/input.output/iostream.format/print.fun/print.file.pass.cpp b/libcxx/test/std/input.output/iostream.format/print.fun/print.file.pass.cpp
index 9fc2fcc4ca36f4c..4a397d3e3d6328f 100644
--- a/libcxx/test/std/input.output/iostream.format/print.fun/print.file.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/print.fun/print.file.pass.cpp
@@ -11,6 +11,9 @@
 
 // XFAIL: availability-fp_to_chars-missing
 
+// The error exception has no system error string.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // <print>
 
 // template<class... Args>
diff --git a/libcxx/test/std/input.output/iostream.format/print.fun/println.file.pass.cpp b/libcxx/test/std/input.output/iostream.format/print.fun/println.file.pass.cpp
index 7c46d35215f6985..ebdddd074faf51c 100644
--- a/libcxx/test/std/input.output/iostream.format/print.fun/println.file.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/print.fun/println.file.pass.cpp
@@ -11,6 +11,9 @@
 
 // XFAIL: availability-fp_to_chars-missing
 
+// The error exception has no system error string.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // <print>
 
 // template<class... Args>
diff --git a/libcxx/test/std/input.output/iostream.format/print.fun/vprint_nonunicode.file.pass.cpp b/libcxx/test/std/input.output/iostream.format/print.fun/vprint_nonunicode.file.pass.cpp
index b851c3bb7c191e8..9eb85f3b7b2d874 100644
--- a/libcxx/test/std/input.output/iostream.format/print.fun/vprint_nonunicode.file.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/print.fun/vprint_nonunicode.file.pass.cpp
@@ -16,6 +16,9 @@
 
 // XFAIL: availability-fp_to_chars-missing
 
+// The error exception has no system error string.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // <print>
 
 // void vprint_nonunicode(FILE* stream, string_view fmt, format_args args);
diff --git a/libcxx/test/std/input.output/iostream.format/print.fun/vprint_unicode.file.pass.cpp b/libcxx/test/std/input.output/iostream.format/print.fun/vprint_unicode.file.pass.cpp
index 63b33f2db12fbc2..28379b9db50ed58 100644
--- a/libcxx/test/std/input.output/iostream.format/print.fun/vprint_unicode.file.pass.cpp
+++ b/libcxx/test/std/input.output/iostream.format/print.fun/vprint_unicode.file.pass.cpp
@@ -17,6 +17,9 @@
 
 // XFAIL: availability-fp_to_chars-missing
 
+// The error exception has no system error string.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // <print>
 
 // void vprint_unicode(FILE* stream, string_view fmt, format_args args);
diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.sh.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.sh.cpp
index b39cd57ab212f1d..28ea650e58b1cde 100644
--- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.sh.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cin.sh.cpp
@@ -9,6 +9,10 @@
 // TODO: Investigate
 // UNSUPPORTED: LIBCXX-AIX-FIXME
 
+// This test hangs on Android devices that lack shell_v2, which was added in
+// Android N (API 24).
+// UNSUPPORTED: LIBCXX-ANDROID-FIXME && android-device-api={{2[1-3]}}
+
 // <iostream>
 
 // istream cin;
diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin-imbue.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin-imbue.sh.cpp
index 6bdffc93f3b6693..027e4fa936f11d3 100644
--- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin-imbue.sh.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin-imbue.sh.cpp
@@ -9,6 +9,10 @@
 // TODO: Investigate
 // UNSUPPORTED: LIBCXX-AIX-FIXME
 
+// This test hangs on Android devices that lack shell_v2, which was added in
+// Android N (API 24).
+// UNSUPPORTED: LIBCXX-ANDROID-FIXME && android-device-api={{2[1-3]}}
+
 // <iostream>
 
 // wistream wcin;
diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp
index c0f2c3258b540f6..30972da2f093485 100644
--- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp
+++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcin.sh.cpp
@@ -9,6 +9,10 @@
 // TODO: Investigate
 // UNSUPPORTED: LIBCXX-AIX-FIXME
 
+// This test hangs on Android devices that lack shell_v2, which was added in
+// Android N (API 24).
+// UNSUPPORTED: LIBCXX-ANDROID-FIXME && android-device-api={{2[1-3]}}
+
 // <iostream>
 
 // wistream wcin;
diff --git a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump2gig.pass.cpp b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump2gig.pass.cpp
index 4c63cd1ab31d2ef..6c747964e33b80c 100644
--- a/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump2gig.pass.cpp
+++ b/libcxx/test/std/input.output/stream.buffers/streambuf/streambuf.protected/streambuf.put.area/pbump2gig.pass.cpp
@@ -22,9 +22,8 @@
 // UNSUPPORTED: no-exceptions
 
 // Android devices frequently don't have enough memory to run this test. Rather
-// than throw std::bad_alloc, exhausting memory tends to trigger the OOM Killer
-// and/or crash the device (killing adb, rebooting it, etc).
-// UNSUPPORTED: android
+// than throw std::bad_alloc, exhausting memory triggers the OOM Killer.
+// UNSUPPORTED: LIBCXX-ANDROID-FIXME
 
 #include <sstream>
 #include <cassert>
diff --git a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp
index 8dc74421e789590..a9079dc63b6b563 100644
--- a/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp
+++ b/libcxx/test/std/input.output/string.streams/stringstream/stringstream.members/gcount.pass.cpp
@@ -9,6 +9,10 @@
 // UNSUPPORTED: 32-bit-pointer
 // REQUIRES: large_tests
 
+// Android devices frequently don't have enough memory to run this test. Rather
+// than throw std::bad_alloc, exhausting memory triggers the OOM Killer.
+// UNSUPPORTED: LIBCXX-ANDROID-FIXME
+
 // Test that tellp() does not break the stringstream after INT_MAX, due to use
 // of pbump() that accept int.
 
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp
index 69f41c80749832d..158bd5182ecc586 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp
@@ -6,6 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // REQUIRES: locale.en_US.UTF-8
 
 // <locale>
diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/transform.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/transform.pass.cpp
index 8d076fdeba998c4..4978db116ef3907 100644
--- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/transform.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/transform.pass.cpp
@@ -9,6 +9,9 @@
 // NetBSD does not support LC_COLLATE at the moment
 // XFAIL: netbsd
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // <locale>
 
 // template <class charT> class collate_byname
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp
index d08a71219fce850..5cebc079c470398 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp
@@ -15,6 +15,9 @@
 // REQUIRES: locale.en_US.UTF-8
 // XFAIL: no-wide-characters
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 #include <locale>
 #include <type_traits>
 #include <cassert>
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp
index 8aa1aa85d97e898..2869378b42b92c5 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp
@@ -15,6 +15,9 @@
 // REQUIRES: locale.en_US.UTF-8
 // XFAIL: no-wide-characters
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 #include <locale>
 #include <string>
 #include <vector>
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp
index 540efde1ea6adc2..ea03f042f2d45c8 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp
@@ -15,6 +15,9 @@
 // REQUIRES: locale.en_US.UTF-8
 // XFAIL: no-wide-characters
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 #include <locale>
 #include <string>
 #include <vector>
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp
index 73e3c138575d379..2f7acb843e59167 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp
@@ -15,6 +15,9 @@
 // REQUIRES: locale.en_US.UTF-8
 // XFAIL: no-wide-characters
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 #include <locale>
 #include <string>
 #include <vector>
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
index 49b56aa9312cb6e..447353de8af72fd 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_1.pass.cpp
@@ -9,6 +9,9 @@
 // REQUIRES: locale.en_US.UTF-8
 // XFAIL: win32-broken-utf8-wchar-ctype
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // <locale>
 
 // template <class charT> class ctype_byname;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
index 952dcf4a6b0111d..94ecc6c8fa85b03 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/tolower_many.pass.cpp
@@ -9,6 +9,9 @@
 // REQUIRES: locale.en_US.UTF-8
 // XFAIL: win32-broken-utf8-wchar-ctype
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // <locale>
 
 // template <class charT> class ctype_byname;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
index 24f8b3d93b65deb..2b37737a26bc365 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_1.pass.cpp
@@ -9,6 +9,9 @@
 // REQUIRES: locale.en_US.UTF-8
 // XFAIL: win32-broken-utf8-wchar-ctype
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // <locale>
 
 // template <class charT> class ctype_byname;
diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
index f5077634257f60a..016d8d736ad22a2 100644
--- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/toupper_many.pass.cpp
@@ -9,6 +9,9 @@
 // REQUIRES: locale.en_US.UTF-8
 // XFAIL: win32-broken-utf8-wchar-ctype
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // <locale>
 
 // template <class charT> class ctype_byname;
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp
index 21117da3c2887dc..9997b07134563b1 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_en_US.pass.cpp
@@ -13,6 +13,9 @@
 // iter_type get(iter_type b, iter_type e, bool intl, ios_base& iob,
 //               ios_base::iostate& err, long double& v) const;
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // REQUIRES: locale.en_US.UTF-8
 
 #include <locale>
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp
index 80e84a425ee2abe..478df7964f6d2b9 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_string_en_US.pass.cpp
@@ -13,6 +13,9 @@
 // iter_type get(iter_type b, iter_type e, bool intl, ios_base& iob,
 //               ios_base::iostate& err, string_type& v) const;
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // REQUIRES: locale.en_US.UTF-8
 
 #include <locale>
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp
index 7092c236eb20425..4b767fae871fa77 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_en_US.pass.cpp
@@ -13,6 +13,9 @@
 // iter_type put(iter_type s, bool intl, ios_base& f, char_type fill,
 //               long double units) const;
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // REQUIRES: locale.en_US.UTF-8
 
 #include <locale>
diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp
index bb7047a16216df0..1c8710a008f27f2 100644
--- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_string_en_US.pass.cpp
@@ -13,6 +13,9 @@
 // iter_type put(iter_type s, bool intl, ios_base& f, char_type fill,
 //               const string_type& units) const;
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // REQUIRES: locale.en_US.UTF-8
 
 #include <locale>
diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp
index 3327a35a6045016..8bb9ff1c202ae95 100644
--- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp
@@ -14,6 +14,10 @@
 
 // XFAIL: win32-broken-printf-g-precision
 
+// Needs more investigation, but this is probably failing on Android M (API 23)
+// and up because the printf formatting of NAN changed.
+// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22}}
+
 #include <locale>
 #include <ios>
 #include <cassert>
diff --git a/libcxx/test/std/re/re.traits/translate_nocase.pass.cpp b/libcxx/test/std/re/re.traits/translate_nocase.pass.cpp
index 1048e585973d335..be81286e72cbd77 100644
--- a/libcxx/test/std/re/re.traits/translate_nocase.pass.cpp
+++ b/libcxx/test/std/re/re.traits/translate_nocase.pass.cpp
@@ -15,6 +15,10 @@
 // REQUIRES: locale.en_US.UTF-8
 // XFAIL: win32-broken-utf8-wchar-ctype
 
+// Prior to Android O (API 26), in the "en_US.UTF-8" locale, towlower(L'\xDA')
+// returned 0xDA instead of 0xFA.
+// XFAIL: LIBCXX-ANDROID-FIXME && android-device-api={{21|22|23|24|25}}
+
 #include <regex>
 #include <cassert>
 
diff --git a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp
index b0c476fa19dec8b..bc6c3016eb1a8e4 100644
--- a/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp
+++ b/libcxx/test/std/thread/thread.condition/thread.condition.condvar/notify_one.pass.cpp
@@ -8,6 +8,9 @@
 //
 // UNSUPPORTED: no-threads
 
+// This test occasionally fails on Android.
+// UNSUPPORTED: LIBCXX-ANDROID-FIXME
+
 // <condition_variable>
 
 // class condition_variable;
diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
index 44bf84d55e55019..b25fd25f4e194bc 100644
--- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
@@ -11,6 +11,9 @@
 
 // XFAIL: availability-fp_to_chars-missing
 
+// Bionic has minimal locale support, investigate this later.
+// XFAIL: LIBCXX-ANDROID-FIXME
+
 // REQUIRES: locale.en_US.UTF-8
 
 // <format>
diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h
index d63b1e61b5f9b53..f924f909fe15073 100644
--- a/libcxx/test/support/filesystem_test_helper.h
+++ b/libcxx/test/support/filesystem_test_helper.h
@@ -180,13 +180,22 @@ struct scoped_test_env
         std::string cmd = "chmod -R 777 " + test_root.string();
 #endif // defined(__MVS__)
         int ret = std::system(cmd.c_str());
-#if !defined(_AIX)
+#  if !defined(_AIX) && !defined(__ANDROID__)
         // On AIX the chmod command will return non-zero when trying to set
         // the permissions on a directory that contains a bad symlink. This triggers
         // the assert, despite being able to delete everything with the following
         // `rm -r` command.
+        //
+        // Android's chmod was buggy in old OSs, but skipping this assert is
+        // sufficient to ensure that the `rm -rf` succeeds for almost all tests:
+        //  - Android L: chmod aborts after one error
+        //  - Android L and M: chmod -R tries to set permissions of a symlink
+        //    target.
+        // LIBCXX-ANDROID-FIXME: Other fixes to consider: place a toybox chmod
+        // onto old devices, re-enable this assert for devices running Android N
+        // and up, rewrite this chmod+rm in C or C++.
         assert(ret == 0);
-#endif
+#  endif
 
         cmd = "rm -rf " + test_root.string();
         ret = std::system(cmd.c_str());
diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py
index 3779af1094d5d0f..11f60a9620546f4 100644
--- a/libcxx/utils/libcxx/test/features.py
+++ b/libcxx/utils/libcxx/test/features.py
@@ -43,6 +43,22 @@ def _getSuitableClangTidy(cfg):
         return None
 
 
+def _getAndroidDeviceApi(cfg):
+    return int(
+        programOutput(
+            cfg,
+            r"""
+                #include <android/api-level.h>
+                #include <stdio.h>
+                int main() {
+                    printf("%d\n", android_get_device_api_level());
+                    return 0;
+                }
+            """,
+        )
+    )
+
+
 DEFAULT_FEATURES = [
     Feature(
         name="thread-safety",
@@ -387,6 +403,15 @@ def _getSuitableClangTidy(cfg):
         actions=[AddCompileFlag("-DTEST_WINDOWS_DLL")],
     ),
     Feature(name="linux", when=lambda cfg: "__linux__" in compilerMacros(cfg)),
+    Feature(name="android", when=lambda cfg: "__ANDROID__" in compilerMacros(cfg)),
+    Feature(
+        name=lambda cfg: "android-device-api={}".format(_getAndroidDeviceApi(cfg)),
+        when=lambda cfg: "__ANDROID__" in compilerMacros(cfg),
+    ),
+    Feature(
+        name="LIBCXX-ANDROID-FIXME",
+        when=lambda cfg: "__ANDROID__" in compilerMacros(cfg),
+    ),
     Feature(name="netbsd", when=lambda cfg: "__NetBSD__" in compilerMacros(cfg)),
     Feature(name="freebsd", when=lambda cfg: "__FreeBSD__" in compilerMacros(cfg)),
     Feature(
diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp
index df7bedc73c289ba..77741a952850ab9 100644
--- a/libcxxabi/test/test_demangle.pass.cpp
+++ b/libcxxabi/test/test_demangle.pass.cpp
@@ -14,6 +14,10 @@
 // UNSUPPORTED: stdlib=apple-libc++ && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}
 // UNSUPPORTED: stdlib=apple-libc++ && target={{.+}}-apple-macosx11.0
 
+// Android's long double on x86[-64] is (64/128)-bits instead of Linux's usual
+// 80-bit format, and this demangling test is failing on it.
+// XFAIL: LIBCXX-ANDROID-FIXME && target={{i686|x86_64}}-{{.+}}-android{{.*}}
+
 #include "support/timer.h"
 #include <algorithm>
 #include <cassert>

>From ff15e9621d8bd687443634aef5e3bbecfc3edcb6 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Wed, 18 Oct 2023 21:31:03 -0400
Subject: [PATCH 76/76] [SVE][InstCombine] Fold ld1d and splice into ld1ro

Perform the transform when the value of ld1d only used by splice
Fixes https://github.com/llvm/llvm-project/issues/69440.
---
 .../AArch64/AArch64TargetTransformInfo.cpp    | 32 ++++++++++++++++
 .../AArch64/sve-intrinsic-ld1ro.ll            | 37 +++++++++++++++++++
 2 files changed, 69 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-ld1ro.ll

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d8a0e68d7123759..2a65fe0eb0bc84a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1258,6 +1258,36 @@ instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
   return IC.eraseInstFromFunction(II);
 }
 
+static std::optional<Instruction *>
+instCombineSVESplice(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL,
+                     const AArch64Subtarget *ST) {
+  Value *Pred = II.getOperand(0);
+  Value *VecOp0 = II.getOperand(1);
+  Value *VecOp1 = II.getOperand(2);
+  Value *PtrOp;
+  unsigned MinSVESize = ST->getMinSVEVectorSizeInBits();
+  unsigned MaxSVESize = ST->getMaxSVEVectorSizeInBits();
+  if (!ST->hasMatMulFP64() || VecOp0 != VecOp1 || MinSVESize != MaxSVESize ||
+      VecOp0->hasNUsesOrMore(3) ||
+      !match(VecOp0,
+             m_Intrinsic<Intrinsic::masked_load>(m_Value(PtrOp), m_Value(),
+                                                 m_Specific(Pred), m_Zero())))
+    return std::nullopt;
+
+  unsigned BitsPerElt = II.getType()->getScalarSizeInBits();
+  unsigned HalfLen = MinSVESize / BitsPerElt / 2;
+  const APInt *CI;
+  // The ld1ro load contiguous 256 bits, so half of 512 should match it.
+  if (!match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_whilelt>(m_Zero(),
+                                                               m_APInt(CI))) ||
+      HalfLen == 0 || CI->getZExtValue() != HalfLen || MinSVESize != 512)
+    return std::nullopt;
+
+  CallInst *Res = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ld1ro,
+                                             {II.getType()}, {Pred, PtrOp});
+  return IC.replaceInstUsesWith(II, Res);
+}
+
 static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
   switch (Intrinsic) {
   case Intrinsic::aarch64_sve_fmul_u:
@@ -1894,6 +1924,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
     return instCombineSVELD1(IC, II, DL);
   case Intrinsic::aarch64_sve_st1:
     return instCombineSVEST1(IC, II, DL);
+  case Intrinsic::aarch64_sve_splice:
+    return instCombineSVESplice(IC, II, DL, ST);
   case Intrinsic::aarch64_sve_sdiv:
     return instCombineSVESDIV(IC, II);
   case Intrinsic::aarch64_sve_sel:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-ld1ro.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-ld1ro.ll
new file mode 100644
index 000000000000000..3aee8cb8e169f39
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-ld1ro.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mattr=+sve,+f64mm -passes=instcombine -aarch64-sve-vector-bits-min=512 -aarch64-sve-vector-bits-max=512 < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <vscale x 2 x double> @combine_ld1ro_double(ptr %addr) {
+; CHECK-LABEL: @combine_ld1ro_double(
+; CHECK-NEXT:    [[PRED:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 4)
+; CHECK-NEXT:    [[RES:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1ro.nxv2f64(<vscale x 2 x i1> [[PRED]], ptr [[ADDR:%.*]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[RES]]
+;
+  %pred = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 4)   ; half = 512/bits(type double)/2 = 4
+  %a = call <vscale x 2 x double> @llvm.masked.load.nxv2f64(ptr %addr, i32 8, <vscale x 2 x i1> %pred, <vscale x 2 x double> zeroinitializer)
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pred, <vscale x 2 x double> %a, <vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+; Negative test: More than 2 uses
+define <vscale x 2 x double> @combine_ld1ro_double_3uses(ptr %addr) {
+; CHECK-LABEL: @combine_ld1ro_double_3uses(
+; CHECK-NEXT:    [[PRED:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 4)
+; CHECK-NEXT:    [[A:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[ADDR:%.*]], i32 8, <vscale x 2 x i1> [[PRED]], <vscale x 2 x double> zeroinitializer)
+; CHECK-NEXT:    call void @use_double(<vscale x 2 x double> [[A]])
+; CHECK-NEXT:    [[RES:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> [[PRED]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[A]])
+; CHECK-NEXT:    ret <vscale x 2 x double> [[RES]]
+;
+  %pred = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 4)   ; half = 512/bits(type double)/2 = 4
+  %a = call <vscale x 2 x double> @llvm.masked.load.nxv2f64(ptr %addr, i32 8, <vscale x 2 x i1> %pred, <vscale x 2 x double> zeroinitializer)
+  call void @use_double(<vscale x 2 x double> %a)
+  %res = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pred, <vscale x 2 x double> %a, <vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %res
+}
+
+declare void @use_double(<vscale x 2 x double>)
+declare <vscale x 2 x double> @llvm.masked.load.nxv2f64(<vscale x 2 x double>*, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64, i64)



More information about the llvm-commits mailing list