[llvm] e4fa829 - [AArch64] Allow copying of SVE registers in Streaming SVE

Thu Mar 3 01:51:42 PST 2022

Author: Cullen Rhodes
Date: 2022-03-03T09:51:14Z
New Revision: e4fa8291a277fb3eb84a1ee54b4cc7f7434c7f00

URL: https://github.com/llvm/llvm-project/commit/e4fa8291a277fb3eb84a1ee54b4cc7f7434c7f00
DIFF: https://github.com/llvm/llvm-project/commit/e4fa8291a277fb3eb84a1ee54b4cc7f7434c7f00.diff

LOG: [AArch64] Allow copying of SVE registers in Streaming SVE

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D118562

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index ec8ecf4c3746f..f10e20f551819 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3462,7 +3462,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Copy a Predicate register by ORRing with itself.
   if (AArch64::PPRRegClass.contains(DestReg) &&
       AArch64::PPRRegClass.contains(SrcReg)) {
-    assert(Subtarget.hasSVE() && "Unexpected SVE register.");
+    assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
+           "Unexpected SVE register.");
     BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
       .addReg(SrcReg) // Pg
       .addReg(SrcReg)
@@ -3473,7 +3474,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Copy a Z register by ORRing with itself.
   if (AArch64::ZPRRegClass.contains(DestReg) &&
       AArch64::ZPRRegClass.contains(SrcReg)) {
-    assert(Subtarget.hasSVE() && "Unexpected SVE register.");
+    assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
+           "Unexpected SVE register.");
     BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
       .addReg(SrcReg)
       .addReg(SrcReg, getKillRegState(KillSrc));
@@ -3483,6 +3485,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Copy a Z register pair by copying the individual sub-registers.
   if (AArch64::ZPR2RegClass.contains(DestReg) &&
       AArch64::ZPR2RegClass.contains(SrcReg)) {
+    assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
+           "Unexpected SVE register.");
     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
                      Indices);
@@ -3492,6 +3496,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Copy a Z register triple by copying the individual sub-registers.
   if (AArch64::ZPR3RegClass.contains(DestReg) &&
       AArch64::ZPR3RegClass.contains(SrcReg)) {
+    assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
+           "Unexpected SVE register.");
     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
                                        AArch64::zsub2};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
@@ -3502,6 +3508,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   // Copy a Z register quad by copying the individual sub-registers.
   if (AArch64::ZPR4RegClass.contains(DestReg) &&
       AArch64::ZPR4RegClass.contains(SrcReg)) {
+    assert((Subtarget.hasSVE() || Subtarget.hasStreamingSVE()) &&
+           "Unexpected SVE register.");
     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
                                        AArch64::zsub2, AArch64::zsub3};
     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
index 2cae21f6c449c..f846e6a345a01 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
@@ -1,10 +1,11 @@
 ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=1 < %s | FileCheck %s
+; RUN: llc -mtriple aarch64 -mattr=+streaming-sve -asm-verbose=1 < %s | FileCheck %s
 
 ;
 ; SVCREATE2 (i8)
 ;
 
-define <vscale x 16 x i8> @test_svcreate2_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) local_unnamed_addr #0 {
+define <vscale x 16 x i8> @test_svcreate2_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s8_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -17,7 +18,7 @@ L2:
   ret <vscale x 16 x i8> %extract
 }
 
-define <vscale x 16 x i8> @test_svcreate2_s8_vec1(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) local_unnamed_addr #0 {
+define <vscale x 16 x i8> @test_svcreate2_s8_vec1(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s8_vec1:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z1.d
@@ -35,7 +36,7 @@ L2:
 ; SVCREATE2 (i16)
 ;
 
-define <vscale x 8 x i16> @test_svcreate2_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) local_unnamed_addr #0 {
+define <vscale x 8 x i16> @test_svcreate2_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s16_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -48,7 +49,7 @@ L2:
   ret <vscale x 8 x i16> %extract
 }
 
-define <vscale x 8 x i16> @test_svcreate2_s16_vec1(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) local_unnamed_addr #0 {
+define <vscale x 8 x i16> @test_svcreate2_s16_vec1(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s16_vec1:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z1.d
@@ -66,7 +67,7 @@ L2:
 ; SVCREATE2 (half)
 ;
 
-define <vscale x 8 x half> @test_svcreate2_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) local_unnamed_addr #0 {
+define <vscale x 8 x half> @test_svcreate2_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f16_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -79,7 +80,7 @@ L2:
   ret <vscale x 8 x half> %extract
 }
 
-define <vscale x 8 x half> @test_svcreate2_f16_vec1(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) local_unnamed_addr #0 {
+define <vscale x 8 x half> @test_svcreate2_f16_vec1(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f16_vec1:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z1.d
@@ -97,7 +98,7 @@ L2:
 ; SVCREATE2 (bfloat)
 ;
 
-define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) local_unnamed_addr #1 {
+define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) #1 {
 ; CHECK-LABEL: test_svcreate2_bf16_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -110,7 +111,7 @@ L2:
   ret <vscale x 8 x bfloat> %extract
 }
 
-define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec1(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) local_unnamed_addr #1 {
+define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec1(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) #1 {
 ; CHECK-LABEL: test_svcreate2_bf16_vec1:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z1.d
@@ -128,7 +129,7 @@ L2:
 ; SVCREATE2 (i32)
 ;
 
-define <vscale x 4 x i32> @test_svcreate2_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) local_unnamed_addr #0 {
+define <vscale x 4 x i32> @test_svcreate2_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s32_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -141,7 +142,7 @@ L2:
   ret <vscale x 4 x i32> %extract
 }
 
-define <vscale x 4 x i32> @test_svcreate2_s32_vec1(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) local_unnamed_addr #0 {
+define <vscale x 4 x i32> @test_svcreate2_s32_vec1(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s32_vec1:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z1.d
@@ -159,7 +160,7 @@ L2:
 ; SVCREATE2 (float)
 ;
 
-define <vscale x 4 x float> @test_svcreate2_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) local_unnamed_addr #0 {
+define <vscale x 4 x float> @test_svcreate2_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f32_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -172,7 +173,7 @@ L2:
   ret <vscale x 4 x float> %extract
 }
 
-define <vscale x 4 x float> @test_svcreate2_f32_vec1(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) local_unnamed_addr #0 {
+define <vscale x 4 x float> @test_svcreate2_f32_vec1(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f32_vec1:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z1.d
@@ -190,7 +191,7 @@ L2:
 ; SVCREATE2 (i64)
 ;
 
-define <vscale x 2 x i64> @test_svcreate2_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) local_unnamed_addr #0 {
+define <vscale x 2 x i64> @test_svcreate2_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s64_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -203,7 +204,7 @@ L2:
   ret <vscale x 2 x i64> %extract
 }
 
-define <vscale x 2 x i64> @test_svcreate2_s64_vec1(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) local_unnamed_addr #0 {
+define <vscale x 2 x i64> @test_svcreate2_s64_vec1(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_s64_vec1:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z1.d
@@ -221,7 +222,7 @@ L2:
 ; SVCREATE2 (double)
 ;
 
-define <vscale x 2 x double> @test_svcreate2_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) local_unnamed_addr #0 {
+define <vscale x 2 x double> @test_svcreate2_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f64_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -234,7 +235,7 @@ L2:
   ret <vscale x 2 x double> %extract
 }
 
-define <vscale x 2 x double> @test_svcreate2_f64_vec1(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) local_unnamed_addr #0 {
+define <vscale x 2 x double> @test_svcreate2_f64_vec1(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) #0 {
 ; CHECK-LABEL: test_svcreate2_f64_vec1:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z1.d
@@ -252,7 +253,7 @@ L2:
 ; SVCREATE3 (i8)
 ;
 
-define <vscale x 16 x i8> @test_svcreate3_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) local_unnamed_addr #0 {
+define <vscale x 16 x i8> @test_svcreate3_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s8_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -265,7 +266,7 @@ L2:
   ret <vscale x 16 x i8> %extract
 }
 
-define <vscale x 16 x i8> @test_svcreate3_s8_vec2(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) local_unnamed_addr #0 {
+define <vscale x 16 x i8> @test_svcreate3_s8_vec2(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s8_vec2:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z2.d
@@ -283,7 +284,7 @@ L2:
 ; SVCREATE3 (i16)
 ;
 
-define <vscale x 8 x i16> @test_svcreate3_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) local_unnamed_addr #0 {
+define <vscale x 8 x i16> @test_svcreate3_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s16_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -296,7 +297,7 @@ L2:
   ret <vscale x 8 x i16> %extract
 }
 
-define <vscale x 8 x i16> @test_svcreate3_s16_vec2(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) local_unnamed_addr #0 {
+define <vscale x 8 x i16> @test_svcreate3_s16_vec2(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s16_vec2:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z2.d
@@ -314,7 +315,7 @@ L2:
 ; SVCREATE3 (half)
 ;
 
-define <vscale x 8 x half> @test_svcreate3_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) local_unnamed_addr #0 {
+define <vscale x 8 x half> @test_svcreate3_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f16_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -327,7 +328,7 @@ L2:
   ret <vscale x 8 x half> %extract
 }
 
-define <vscale x 8 x half> @test_svcreate3_f16_vec2(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) local_unnamed_addr #0 {
+define <vscale x 8 x half> @test_svcreate3_f16_vec2(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f16_vec2:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z2.d
@@ -345,7 +346,7 @@ L2:
 ; SVCREATE3 (bfloat)
 ;
 
-define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) local_unnamed_addr #1 {
+define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) #1 {
 ; CHECK-LABEL: test_svcreate3_bf16_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -358,7 +359,7 @@ L2:
   ret <vscale x 8 x bfloat> %extract
 }
 
-define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec2(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) local_unnamed_addr #1 {
+define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec2(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) #1 {
 ; CHECK-LABEL: test_svcreate3_bf16_vec2:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z2.d
@@ -376,7 +377,7 @@ L2:
 ; SVCREATE3 (i32)
 ;
 
-define <vscale x 4 x i32> @test_svcreate3_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) local_unnamed_addr #0 {
+define <vscale x 4 x i32> @test_svcreate3_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s32_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -389,7 +390,7 @@ L2:
   ret <vscale x 4 x i32> %extract
 }
 
-define <vscale x 4 x i32> @test_svcreate3_s32_vec2(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) local_unnamed_addr #0 {
+define <vscale x 4 x i32> @test_svcreate3_s32_vec2(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s32_vec2:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z2.d
@@ -407,7 +408,7 @@ L2:
 ; SVCREATE3 (float)
 ;
 
-define <vscale x 4 x float> @test_svcreate3_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) local_unnamed_addr #0 {
+define <vscale x 4 x float> @test_svcreate3_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f32_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -420,7 +421,7 @@ L2:
   ret <vscale x 4 x float> %extract
 }
 
-define <vscale x 4 x float> @test_svcreate3_f32_vec2(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) local_unnamed_addr #0 {
+define <vscale x 4 x float> @test_svcreate3_f32_vec2(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f32_vec2:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z2.d
@@ -438,7 +439,7 @@ L2:
 ; SVCREATE3 (i64)
 ;
 
-define <vscale x 2 x i64> @test_svcreate3_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) local_unnamed_addr #0 {
+define <vscale x 2 x i64> @test_svcreate3_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s64_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -451,7 +452,7 @@ L2:
   ret <vscale x 2 x i64> %extract
 }
 
-define <vscale x 2 x i64> @test_svcreate3_s64_vec2(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) local_unnamed_addr #0 {
+define <vscale x 2 x i64> @test_svcreate3_s64_vec2(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_s64_vec2:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z2.d
@@ -469,7 +470,7 @@ L2:
 ; SVCREATE3 (double)
 ;
 
-define <vscale x 2 x double> @test_svcreate3_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) local_unnamed_addr #0 {
+define <vscale x 2 x double> @test_svcreate3_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f64_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -482,7 +483,7 @@ L2:
   ret <vscale x 2 x double> %extract
 }
 
-define <vscale x 2 x double> @test_svcreate3_f64_vec2(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) local_unnamed_addr #0 {
+define <vscale x 2 x double> @test_svcreate3_f64_vec2(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
 ; CHECK-LABEL: test_svcreate3_f64_vec2:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z2.d
@@ -500,7 +501,7 @@ L2:
 ; SVCREATE4 (i8)
 ;
 
-define <vscale x 16 x i8> @test_svcreate4_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) local_unnamed_addr #0 {
+define <vscale x 16 x i8> @test_svcreate4_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s8_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -513,7 +514,7 @@ L2:
   ret <vscale x 16 x i8> %extract
 }
 
-define <vscale x 16 x i8> @test_svcreate4_s8_vec3(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) local_unnamed_addr #0 {
+define <vscale x 16 x i8> @test_svcreate4_s8_vec3(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s8_vec3:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z3.d
@@ -531,7 +532,7 @@ L2:
 ; SVCREATE4 (i16)
 ;
 
-define <vscale x 8 x i16> @test_svcreate4_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) local_unnamed_addr #0 {
+define <vscale x 8 x i16> @test_svcreate4_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s16_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -544,7 +545,7 @@ L2:
   ret <vscale x 8 x i16> %extract
 }
 
-define <vscale x 8 x i16> @test_svcreate4_s16_vec3(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) local_unnamed_addr #0 {
+define <vscale x 8 x i16> @test_svcreate4_s16_vec3(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s16_vec3:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z3.d
@@ -562,7 +563,7 @@ L2:
 ; SVCREATE4 (half)
 ;
 
-define <vscale x 8 x half> @test_svcreate4_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) local_unnamed_addr #0 {
+define <vscale x 8 x half> @test_svcreate4_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f16_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -575,7 +576,7 @@ L2:
   ret <vscale x 8 x half> %extract
 }
 
-define <vscale x 8 x half> @test_svcreate4_f16_vec3(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) local_unnamed_addr #0 {
+define <vscale x 8 x half> @test_svcreate4_f16_vec3(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f16_vec3:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z3.d
@@ -593,7 +594,7 @@ L2:
 ; SVCREATE4 (bfloat)
 ;
 
-define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) local_unnamed_addr #1 {
+define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) #1 {
 ; CHECK-LABEL: test_svcreate4_bf16_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -606,7 +607,7 @@ L2:
   ret <vscale x 8 x bfloat> %extract
 }
 
-define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec3(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) local_unnamed_addr #1 {
+define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec3(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) #1 {
 ; CHECK-LABEL: test_svcreate4_bf16_vec3:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z3.d
@@ -624,7 +625,7 @@ L2:
 ; SVCREATE4 (i32)
 ;
 
-define <vscale x 4 x i32> @test_svcreate4_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) local_unnamed_addr #0 {
+define <vscale x 4 x i32> @test_svcreate4_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s32_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -637,7 +638,7 @@ L2:
   ret <vscale x 4 x i32> %extract
 }
 
-define <vscale x 4 x i32> @test_svcreate4_s32_vec3(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) local_unnamed_addr #0 {
+define <vscale x 4 x i32> @test_svcreate4_s32_vec3(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s32_vec3:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z3.d
@@ -655,7 +656,7 @@ L2:
 ; SVCREATE4 (float)
 ;
 
-define <vscale x 4 x float> @test_svcreate4_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) local_unnamed_addr #0 {
+define <vscale x 4 x float> @test_svcreate4_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f32_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -668,7 +669,7 @@ L2:
   ret <vscale x 4 x float> %extract
 }
 
-define <vscale x 4 x float> @test_svcreate4_f32_vec3(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) local_unnamed_addr #0 {
+define <vscale x 4 x float> @test_svcreate4_f32_vec3(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f32_vec3:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z3.d
@@ -686,7 +687,7 @@ L2:
 ; SVCREATE4 (i64)
 ;
 
-define <vscale x 2 x i64> @test_svcreate4_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) local_unnamed_addr #0 {
+define <vscale x 2 x i64> @test_svcreate4_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s64_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -699,7 +700,7 @@ L2:
   ret <vscale x 2 x i64> %extract
 }
 
-define <vscale x 2 x i64> @test_svcreate4_s64_vec3(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) local_unnamed_addr #0 {
+define <vscale x 2 x i64> @test_svcreate4_s64_vec3(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_s64_vec3:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z3.d
@@ -717,7 +718,7 @@ L2:
 ; SVCREATE4 (double)
 ;
 
-define <vscale x 2 x double> @test_svcreate4_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) local_unnamed_addr #0 {
+define <vscale x 2 x double> @test_svcreate4_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f64_vec0:
 ; CHECK: // %L2
 ; CHECK-NEXT: ret
@@ -730,7 +731,7 @@ L2:
   ret <vscale x 2 x double> %extract
 }
 
-define <vscale x 2 x double> @test_svcreate4_f64_vec3(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) local_unnamed_addr #0 {
+define <vscale x 2 x double> @test_svcreate4_f64_vec3(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
 ; CHECK-LABEL: test_svcreate4_f64_vec3:
 ; CHECK: // %L2
 ; CHECK-NEXT: mov z0.d, z3.d
@@ -744,9 +745,9 @@ L2:
   ret <vscale x 2 x double> %extract
 }
 
-attributes #0 = { nounwind "target-features"="+sve" }
+attributes #0 = { nounwind }
 ; +bf16 is required for the bfloat version.
-attributes #1 = { nounwind "target-features"="+sve,+bf16" }
+attributes #1 = { nounwind "target-features"="+bf16" }
 
 declare <vscale x 4 x double>  @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
 declare <vscale x 8 x float>  @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
index 287f7245dce46..113ddc912f41c 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+; RUN: llc -mtriple aarch64 -mattr=+streaming-sve -asm-verbose=0 < %s | FileCheck %s
 
 ; All these tests create a vector tuple, insert z5 into one of the elements,
 ; and finally extracts that element from the wide vector to return it.  These
@@ -228,7 +229,7 @@ define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple)
   ret <vscale x 4 x i32> %ext
 }
 
-attributes #0 = { nounwind "target-features"="+sve" }
+attributes #0 = { nounwind }
 
 declare <vscale x 8 x i32>  @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
index d167e95ff5517..533d941f17003 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+streaming-sve < %s | FileCheck %s
 
 ;
 ; BRKA