[llvm] 2dde272 - [SVE] Refactor sve-bitcast.ll to include all combinations for legal types.

Fri Jun 3 04:11:46 PDT 2022

Author: Paul Walker
Date: 2022-06-03T12:09:19+01:00
New Revision: 2dde272db767dbc74ebc326b787d25aa82a05b2e

URL: https://github.com/llvm/llvm-project/commit/2dde272db767dbc74ebc326b787d25aa82a05b2e
DIFF: https://github.com/llvm/llvm-project/commit/2dde272db767dbc74ebc326b787d25aa82a05b2e.diff

LOG: [SVE] Refactor sve-bitcast.ll to include all combinations for legal types.

Patch enables custom lowering for MVT::nxv4bf16 because otherwise
the refactored test file triggers a selection failure.

The reason for the refactoring it to highlight cases where the
generated code is wrong.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve-bitcast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1126bd5b7372..11e8e4adc6f0 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1136,7 +1136,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
     // Legalize unpacked bitcasts to REINTERPRET_CAST.
     for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
-                    MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
+                    MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
       setOperationAction(ISD::BITCAST, VT, Custom);
 
     for (auto VT :

diff  --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
index bab42f389917..7b7600b70ce7 100644
--- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
@@ -1,519 +1,1031 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: not --crash llc -mtriple=aarch64_be -mattr=+sve < %s
+; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+; RUN: not --crash llc -mtriple=aarch64_be < %s
 
-define <vscale x 16 x i8> @bitcast_i16_to_i8(<vscale x 8 x i16> %v) {
-; CHECK-LABEL: bitcast_i16_to_i8:
+;
+; bitcast to nxv16i8
+;
+
+define <vscale x 16 x i8> @bitcast_nxv8i16_to_nxv16i8(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i16_to_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x i16> %v to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %bc
 }
 
-define <vscale x 16 x i8> @bitcast_i32_to_i8(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: bitcast_i32_to_i8:
+define <vscale x 16 x i8> @bitcast_nxv4i32_to_nxv16i8(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i32_to_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x i32> %v to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %bc
 }
 
-define <vscale x 16 x i8> @bitcast_i64_to_i8(<vscale x 2 x i64> %v) {
-; CHECK-LABEL: bitcast_i64_to_i8:
+define <vscale x 16 x i8> @bitcast_nxv2i64_to_nxv16i8(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i64_to_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x i64> %v to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %bc
 }
 
-define <vscale x 16 x i8> @bitcast_half_to_i8(<vscale x 8 x half> %v) {
-; CHECK-LABEL: bitcast_half_to_i8:
+define <vscale x 16 x i8> @bitcast_nxv8f16_to_nxv16i8(<vscale x 8 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8f16_to_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x half> %v to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %bc
 }
 
-define <vscale x 16 x i8> @bitcast_float_to_i8(<vscale x 4 x float> %v) {
-; CHECK-LABEL: bitcast_float_to_i8:
+define <vscale x 16 x i8> @bitcast_nxv4f32_to_nxv16i8(<vscale x 4 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f32_to_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x float> %v to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %bc
 }
 
-define <vscale x 16 x i8> @bitcast_double_to_i8(<vscale x 2 x double> %v) {
-; CHECK-LABEL: bitcast_double_to_i8:
+define <vscale x 16 x i8> @bitcast_nxv2f64_to_nxv16i8(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f64_to_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x double> %v to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %bc
 }
 
-define <vscale x 8 x i16> @bitcast_i8_to_i16(<vscale x 16 x i8> %v) {
-; CHECK-LABEL: bitcast_i8_to_i16:
+define <vscale x 16 x i8> @bitcast_nxv8bf16_to_nxv16i8(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8bf16_to_nxv16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %bc
+}
+
+;
+; bitcast to nxv8i16
+;
+
+define <vscale x 8 x i16> @bitcast_nxv16i8_to_nxv8i16(<vscale x 16 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv16i8_to_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 16 x i8> %v to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %bc
 }
 
-define <vscale x 8 x i16> @bitcast_i32_to_i16(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: bitcast_i32_to_i16:
+define <vscale x 8 x i16> @bitcast_nxv4i32_to_nxv8i16(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i32_to_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x i32> %v to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %bc
 }
 
-define <vscale x 8 x i16> @bitcast_i64_to_i16(<vscale x 2 x i64> %v) {
-; CHECK-LABEL: bitcast_i64_to_i16:
+define <vscale x 8 x i16> @bitcast_nxv2i64_to_nxv8i16(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i64_to_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x i64> %v to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %bc
 }
 
-define <vscale x 8 x i16> @bitcast_half_to_i16(<vscale x 8 x half> %v) {
-; CHECK-LABEL: bitcast_half_to_i16:
+define <vscale x 8 x i16> @bitcast_nxv8f16_to_nxv8i16(<vscale x 8 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8f16_to_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x half> %v to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %bc
 }
 
-define <vscale x 8 x i16> @bitcast_float_to_i16(<vscale x 4 x float> %v) {
-; CHECK-LABEL: bitcast_float_to_i16:
+define <vscale x 8 x i16> @bitcast_nxv4f32_to_nxv8i16(<vscale x 4 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f32_to_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x float> %v to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %bc
 }
 
-define <vscale x 8 x i16> @bitcast_double_to_i16(<vscale x 2 x double> %v) {
-; CHECK-LABEL: bitcast_double_to_i16:
+define <vscale x 8 x i16> @bitcast_nxv2f64_to_nxv8i16(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f64_to_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x double> %v to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %bc
 }
 
-define <vscale x 4 x i32> @bitcast_i8_to_i32(<vscale x 16 x i8> %v) {
-; CHECK-LABEL: bitcast_i8_to_i32:
+define <vscale x 8 x i16> @bitcast_nxv8bf16_to_nxv8i16(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8bf16_to_nxv8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %bc
+}
+
+;
+; bitcast to nxv4i32
+;
+
+define <vscale x 4 x i32> @bitcast_nxv16i8_to_nxv4i32(<vscale x 16 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv16i8_to_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 16 x i8> %v to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %bc
 }
 
-define <vscale x 4 x i32> @bitcast_i16_to_i32(<vscale x 8 x i16> %v) {
-; CHECK-LABEL: bitcast_i16_to_i32:
+define <vscale x 4 x i32> @bitcast_nxv8i16_to_nxv4i32(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i16_to_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x i16> %v to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %bc
 }
 
-define <vscale x 4 x i32> @bitcast_i64_to_i32(<vscale x 2 x i64> %v) {
-; CHECK-LABEL: bitcast_i64_to_i32:
+define <vscale x 4 x i32> @bitcast_nxv2i64_to_nxv4i32(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i64_to_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x i64> %v to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %bc
 }
 
-define <vscale x 4 x i32> @bitcast_half_to_i32(<vscale x 8 x half> %v) {
-; CHECK-LABEL: bitcast_half_to_i32:
+define <vscale x 4 x i32> @bitcast_nxv8f16_to_nxv4i32(<vscale x 8 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8f16_to_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x half> %v to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %bc
 }
 
-define <vscale x 4 x i32> @bitcast_float_to_i32(<vscale x 4 x float> %v) {
-; CHECK-LABEL: bitcast_float_to_i32:
+define <vscale x 4 x i32> @bitcast_nxv4f32_to_nxv4i32(<vscale x 4 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f32_to_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x float> %v to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %bc
 }
 
-define <vscale x 4 x i32> @bitcast_double_to_i32(<vscale x 2 x double> %v) {
-; CHECK-LABEL: bitcast_double_to_i32:
+define <vscale x 4 x i32> @bitcast_nxv2f64_to_nxv4i32(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f64_to_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x double> %v to <vscale x 4 x i32>
   ret <vscale x 4 x i32> %bc
 }
 
-define <vscale x 2 x i64> @bitcast_i8_to_i64(<vscale x 16 x i8> %v) {
-; CHECK-LABEL: bitcast_i8_to_i64:
+define <vscale x 4 x i32> @bitcast_nxv8bf16_to_nxv4i32(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8bf16_to_nxv4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %bc
+}
+
+;
+; bitcast to nxv2i64
+;
+
+define <vscale x 2 x i64> @bitcast_nxv16i8_to_nxv2i64(<vscale x 16 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv16i8_to_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 16 x i8> %v to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %bc
 }
 
-define <vscale x 2 x i64> @bitcast_i16_to_i64(<vscale x 8 x i16> %v) {
-; CHECK-LABEL: bitcast_i16_to_i64:
+define <vscale x 2 x i64> @bitcast_nxv8i16_to_nxv2i64(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i16_to_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x i16> %v to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %bc
 }
 
-define <vscale x 2 x i64> @bitcast_i32_to_i64(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: bitcast_i32_to_i64:
+define <vscale x 2 x i64> @bitcast_nxv4i32_to_nxv2i64(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i32_to_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x i32> %v to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %bc
 }
 
-define <vscale x 2 x i64> @bitcast_half_to_i64(<vscale x 8 x half> %v) {
-; CHECK-LABEL: bitcast_half_to_i64:
+define <vscale x 2 x i64> @bitcast_nxv8f16_to_nxv2i64(<vscale x 8 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8f16_to_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x half> %v to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %bc
 }
 
-define <vscale x 2 x i64> @bitcast_float_to_i64(<vscale x 4 x float> %v) {
-; CHECK-LABEL: bitcast_float_to_i64:
+define <vscale x 2 x i64> @bitcast_nxv4f32_to_nxv2i64(<vscale x 4 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f32_to_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x float> %v to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %bc
 }
 
-define <vscale x 2 x i64> @bitcast_double_to_i64(<vscale x 2 x double> %v) {
-; CHECK-LABEL: bitcast_double_to_i64:
+define <vscale x 2 x i64> @bitcast_nxv2f64_to_nxv2i64(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f64_to_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x double> %v to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %bc
 }
 
-define <vscale x 8 x half> @bitcast_i8_to_half(<vscale x 16 x i8> %v) {
-; CHECK-LABEL: bitcast_i8_to_half:
+define <vscale x 2 x i64> @bitcast_nxv8bf16_to_nxv2i64(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8bf16_to_nxv2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %bc
+}
+
+;
+; bitcast to nxv8f16
+;
+
+define <vscale x 8 x half> @bitcast_nxv16i8_to_nxv8f16(<vscale x 16 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv16i8_to_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 16 x i8> %v to <vscale x 8 x half>
   ret <vscale x 8 x half> %bc
 }
 
-define <vscale x 8 x half> @bitcast_i16_to_half(<vscale x 8 x i16> %v) {
-; CHECK-LABEL: bitcast_i16_to_half:
+define <vscale x 8 x half> @bitcast_nxv8i16_to_nxv8f16(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i16_to_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x i16> %v to <vscale x 8 x half>
   ret <vscale x 8 x half> %bc
 }
 
-define <vscale x 8 x half> @bitcast_i32_to_half(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: bitcast_i32_to_half:
+define <vscale x 8 x half> @bitcast_nxv4i32_to_nxv8f16(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i32_to_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x i32> %v to <vscale x 8 x half>
   ret <vscale x 8 x half> %bc
 }
 
-define <vscale x 8 x half> @bitcast_i64_to_half(<vscale x 2 x i64> %v) {
-; CHECK-LABEL: bitcast_i64_to_half:
+define <vscale x 8 x half> @bitcast_nxv2i64_to_nxv8f16(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i64_to_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x i64> %v to <vscale x 8 x half>
   ret <vscale x 8 x half> %bc
 }
 
-define <vscale x 8 x half> @bitcast_float_to_half(<vscale x 4 x float> %v) {
-; CHECK-LABEL: bitcast_float_to_half:
+define <vscale x 8 x half> @bitcast_nxv4f32_to_nxv8f16(<vscale x 4 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f32_to_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x float> %v to <vscale x 8 x half>
   ret <vscale x 8 x half> %bc
 }
 
-define <vscale x 8 x half> @bitcast_double_to_half(<vscale x 2 x double> %v) {
-; CHECK-LABEL: bitcast_double_to_half:
+define <vscale x 8 x half> @bitcast_nxv2f64_to_nxv8f16(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f64_to_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x double> %v to <vscale x 8 x half>
   ret <vscale x 8 x half> %bc
 }
 
-define <vscale x 4 x float> @bitcast_i8_to_float(<vscale x 16 x i8> %v) {
-; CHECK-LABEL: bitcast_i8_to_float:
+define <vscale x 8 x half> @bitcast_nxv8bf16_to_nxv8f16(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8bf16_to_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x half>
+  ret <vscale x 8 x half> %bc
+}
+
+;
+; bitcast to nxv4f32
+;
+
+define <vscale x 4 x float> @bitcast_nxv16i8_to_nxv4f32(<vscale x 16 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv16i8_to_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 16 x i8> %v to <vscale x 4 x float>
   ret <vscale x 4 x float> %bc
 }
 
-define <vscale x 4 x float> @bitcast_i16_to_float(<vscale x 8 x i16> %v) {
-; CHECK-LABEL: bitcast_i16_to_float:
+define <vscale x 4 x float> @bitcast_nxv8i16_to_nxv4f32(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i16_to_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x i16> %v to <vscale x 4 x float>
   ret <vscale x 4 x float> %bc
 }
 
-define <vscale x 4 x float> @bitcast_i32_to_float(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: bitcast_i32_to_float:
+define <vscale x 4 x float> @bitcast_nxv4i32_to_nxv4f32(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i32_to_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x i32> %v to <vscale x 4 x float>
   ret <vscale x 4 x float> %bc
 }
 
-define <vscale x 4 x float> @bitcast_i64_to_float(<vscale x 2 x i64> %v) {
-; CHECK-LABEL: bitcast_i64_to_float:
+define <vscale x 4 x float> @bitcast_nxv2i64_to_nxv4f32(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i64_to_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x i64> %v to <vscale x 4 x float>
   ret <vscale x 4 x float> %bc
 }
 
-define <vscale x 4 x float> @bitcast_half_to_float(<vscale x 8 x half> %v) {
-; CHECK-LABEL: bitcast_half_to_float:
+define <vscale x 4 x float> @bitcast_nxv8f16_to_nxv4f32(<vscale x 8 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8f16_to_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x half> %v to <vscale x 4 x float>
   ret <vscale x 4 x float> %bc
 }
 
-define <vscale x 4 x float> @bitcast_double_to_float(<vscale x 2 x double> %v) {
-; CHECK-LABEL: bitcast_double_to_float:
+define <vscale x 4 x float> @bitcast_nxv2f64_to_nxv4f32(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f64_to_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x double> %v to <vscale x 4 x float>
   ret <vscale x 4 x float> %bc
 }
 
-define <vscale x 2 x double> @bitcast_i8_to_double(<vscale x 16 x i8> %v) {
-; CHECK-LABEL: bitcast_i8_to_double:
+define <vscale x 4 x float> @bitcast_nxv8bf16_to_nxv4f32(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8bf16_to_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x float>
+  ret <vscale x 4 x float> %bc
+}
+
+;
+; bitcast to nxv2f64
+;
+
+define <vscale x 2 x double> @bitcast_nxv16i8_to_nxv2f64(<vscale x 16 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv16i8_to_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 16 x i8> %v to <vscale x 2 x double>
   ret <vscale x 2 x double> %bc
 }
 
-define <vscale x 2 x double> @bitcast_i16_to_double(<vscale x 8 x i16> %v) {
-; CHECK-LABEL: bitcast_i16_to_double:
+define <vscale x 2 x double> @bitcast_nxv8i16_to_nxv2f64(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i16_to_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x i16> %v to <vscale x 2 x double>
   ret <vscale x 2 x double> %bc
 }
 
-define <vscale x 2 x double> @bitcast_i32_to_double(<vscale x 4 x i32> %v) {
-; CHECK-LABEL: bitcast_i32_to_double:
+define <vscale x 2 x double> @bitcast_nxv4i32_to_nxv2f64(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i32_to_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x i32> %v to <vscale x 2 x double>
   ret <vscale x 2 x double> %bc
 }
 
-define <vscale x 2 x double> @bitcast_i64_to_double(<vscale x 2 x i64> %v) {
-; CHECK-LABEL: bitcast_i64_to_double:
+define <vscale x 2 x double> @bitcast_nxv2i64_to_nxv2f64(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i64_to_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 2 x i64> %v to <vscale x 2 x double>
   ret <vscale x 2 x double> %bc
 }
 
-define <vscale x 2 x double> @bitcast_half_to_double(<vscale x 8 x half> %v) {
-; CHECK-LABEL: bitcast_half_to_double:
+define <vscale x 2 x double> @bitcast_nxv8f16_to_nxv2f64(<vscale x 8 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8f16_to_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 8 x half> %v to <vscale x 2 x double>
   ret <vscale x 2 x double> %bc
 }
 
-define <vscale x 2 x double> @bitcast_float_to_double(<vscale x 4 x float> %v) {
-; CHECK-LABEL: bitcast_float_to_double:
+define <vscale x 2 x double> @bitcast_nxv4f32_to_nxv2f64(<vscale x 4 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f32_to_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x float> %v to <vscale x 2 x double>
   ret <vscale x 2 x double> %bc
 }
 
-define <vscale x 16 x i8> @bitcast_bfloat_to_i8(<vscale x 8 x bfloat> %v) #0 {
-; CHECK-LABEL: bitcast_bfloat_to_i8:
+define <vscale x 2 x double> @bitcast_nxv8bf16_to_nxv2f64(<vscale x 8 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8bf16_to_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 16 x i8>
-  ret <vscale x 16 x i8> %bc
+  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x double>
+  ret <vscale x 2 x double> %bc
 }
 
-define <vscale x 8 x i16> @bitcast_bfloat_to_i16(<vscale x 8 x bfloat> %v) #0 {
-; CHECK-LABEL: bitcast_bfloat_to_i16:
+;
+; bitcast to nxv8bf16
+;
+
+define <vscale x 8 x bfloat> @bitcast_nxv16i8_to_nxv8bf16(<vscale x 16 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv16i8_to_nxv8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x i16>
-  ret <vscale x 8 x i16> %bc
+  %bc = bitcast <vscale x 16 x i8> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
 }
 
-define <vscale x 4 x i32> @bitcast_bfloat_to_i32(<vscale x 8 x bfloat> %v) #0 {
-; CHECK-LABEL: bitcast_bfloat_to_i32:
+define <vscale x 8 x bfloat> @bitcast_nxv8i16_to_nxv8bf16(<vscale x 8 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i16_to_nxv8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x i32>
-  ret <vscale x 4 x i32> %bc
+  %bc = bitcast <vscale x 8 x i16> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
 }
 
-define <vscale x 2 x i64> @bitcast_bfloat_to_i64(<vscale x 8 x bfloat> %v) #0 {
-; CHECK-LABEL: bitcast_bfloat_to_i64:
+define <vscale x 8 x bfloat> @bitcast_nxv4i32_to_nxv8bf16(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i32_to_nxv8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x i64>
-  ret <vscale x 2 x i64> %bc
+  %bc = bitcast <vscale x 4 x i32> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
 }
 
-define <vscale x 8 x half> @bitcast_bfloat_to_half(<vscale x 8 x bfloat> %v) #0 {
-; CHECK-LABEL: bitcast_bfloat_to_half:
+define <vscale x 8 x bfloat> @bitcast_nxv2i64_to_nxv8bf16(<vscale x 2 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i64_to_nxv8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x half>
-  ret <vscale x 8 x half> %bc
+  %bc = bitcast <vscale x 2 x i64> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
 }
 
-define <vscale x 4 x float> @bitcast_bfloat_to_float(<vscale x 8 x bfloat> %v) #0 {
-; CHECK-LABEL: bitcast_bfloat_to_float:
+define <vscale x 8 x bfloat> @bitcast_nxv8f16_to_nxv8bf16(<vscale x 8 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8f16_to_nxv8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x float>
-  ret <vscale x 4 x float> %bc
+  %bc = bitcast <vscale x 8 x half> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
 }
 
-define <vscale x 2 x double> @bitcast_bfloat_to_double(<vscale x 8 x bfloat> %v) #0 {
-; CHECK-LABEL: bitcast_bfloat_to_double:
+define <vscale x 8 x bfloat> @bitcast_nxv4f32_to_nxv8bf16(<vscale x 4 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f32_to_nxv8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x double>
-  ret <vscale x 2 x double> %bc
+  %bc = bitcast <vscale x 4 x float> %v to <vscale x 8 x bfloat>
+  ret <vscale x 8 x bfloat> %bc
 }
 
-define <vscale x 8 x bfloat> @bitcast_i8_to_bfloat(<vscale x 16 x i8> %v) #0 {
-; CHECK-LABEL: bitcast_i8_to_bfloat:
+define <vscale x 8 x bfloat> @bitcast_nxv2f64_to_nxv8bf16(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f64_to_nxv8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 16 x i8> %v to <vscale x 8 x bfloat>
+  %bc = bitcast <vscale x 2 x double> %v to <vscale x 8 x bfloat>
   ret <vscale x 8 x bfloat> %bc
 }
 
-define <vscale x 8 x bfloat> @bitcast_i16_to_bfloat(<vscale x 8 x i16> %v) #0 {
-; CHECK-LABEL: bitcast_i16_to_bfloat:
+;
+; bitcast to nxv8i8
+;
+
+define <vscale x 8 x i8> @bitcast_nxv4i16_to_nxv8i8(<vscale x 4 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i16_to_nxv8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 8 x i16> %v to <vscale x 8 x bfloat>
-  ret <vscale x 8 x bfloat> %bc
+  %bc = bitcast <vscale x 4 x i16> %v to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %bc
 }
 
-define <vscale x 8 x bfloat> @bitcast_i32_to_bfloat(<vscale x 4 x i32> %v) #0 {
-; CHECK-LABEL: bitcast_i32_to_bfloat:
+define <vscale x 8 x i8> @bitcast_nxv2i32_to_nxv8i8(<vscale x 2 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i32_to_nxv8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 4 x i32> %v to <vscale x 8 x bfloat>
-  ret <vscale x 8 x bfloat> %bc
+  %bc = bitcast <vscale x 2 x i32> %v to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %bc
 }
 
-define <vscale x 8 x bfloat> @bitcast_i64_to_bfloat(<vscale x 2 x i64> %v) #0 {
-; CHECK-LABEL: bitcast_i64_to_bfloat:
+define <vscale x 8 x i8> @bitcast_nxv4f16_to_nxv8i8(<vscale x 4 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f16_to_nxv8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 2 x i64> %v to <vscale x 8 x bfloat>
-  ret <vscale x 8 x bfloat> %bc
+  %bc = bitcast <vscale x 4 x half> %v to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %bc
 }
 
-define <vscale x 8 x bfloat> @bitcast_half_to_bfloat(<vscale x 8 x half> %v) #0 {
-; CHECK-LABEL: bitcast_half_to_bfloat:
+define <vscale x 8 x i8> @bitcast_nxv2f32_to_nxv8i8(<vscale x 2 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f32_to_nxv8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 8 x half> %v to <vscale x 8 x bfloat>
-  ret <vscale x 8 x bfloat> %bc
+  %bc = bitcast <vscale x 2 x float> %v to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %bc
 }
 
-define <vscale x 8 x bfloat> @bitcast_float_to_bfloat(<vscale x 4 x float> %v) #0 {
-; CHECK-LABEL: bitcast_float_to_bfloat:
+define <vscale x 8 x i8> @bitcast_nxv4bf16_to_nxv8i8(<vscale x 4 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4bf16_to_nxv8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 4 x float> %v to <vscale x 8 x bfloat>
-  ret <vscale x 8 x bfloat> %bc
+  %bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %bc
 }
 
-define <vscale x 8 x bfloat> @bitcast_double_to_bfloat(<vscale x 2 x double> %v) #0 {
-; CHECK-LABEL: bitcast_double_to_bfloat:
+;
+; bitcast to nxv4i16
+;
+
+define <vscale x 4 x i16> @bitcast_nxv8i8_to_nxv4i16(<vscale x 8 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i8_to_nxv4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 2 x double> %v to <vscale x 8 x bfloat>
-  ret <vscale x 8 x bfloat> %bc
+  %bc = bitcast <vscale x 8 x i8> %v to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %bc
 }
 
-define <vscale x 2 x i16> @bitcast_short2_half_to_i16(<vscale x 2 x half> %v) {
-; CHECK-LABEL: bitcast_short2_half_to_i16:
+define <vscale x 4 x i16> @bitcast_nxv2i32_to_nxv4i16(<vscale x 2 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i32_to_nxv4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 2 x half> %v to <vscale x 2 x i16>
-  ret <vscale x 2 x i16> %bc
+  %bc = bitcast <vscale x 2 x i32> %v to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %bc
 }
 
-define <vscale x 4 x i16> @bitcast_short4_half_to_i16(<vscale x 4 x half> %v) {
-; CHECK-LABEL: bitcast_short4_half_to_i16:
+define <vscale x 4 x i16> @bitcast_nxv4f16_to_nxv4i16(<vscale x 4 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f16_to_nxv4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x half> %v to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %bc
 }
 
-define <vscale x 2 x i16> @bitcast_short2_bfloat_to_i16(<vscale x 2 x bfloat> %v) #0 {
-; CHECK-LABEL: bitcast_short2_bfloat_to_i16:
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 4 x i16> @bitcast_nxv2f32_to_nxv4i16(<vscale x 2 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f32_to_nxv4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 2 x i16>
-  ret <vscale x 2 x i16> %bc
+  %bc = bitcast <vscale x 2 x float> %v to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %bc
 }
 
-define <vscale x 4 x i16> @bitcast_short4_bfloat_to_i16(<vscale x 4 x bfloat> %v) #0 {
-; CHECK-LABEL: bitcast_short4_bfloat_to_i16:
+define <vscale x 4 x i16> @bitcast_nxv4bf16_to_nxv4i16(<vscale x 4 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4bf16_to_nxv4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %bc
 }
 
-define <vscale x 2 x half> @bitcast_short2_i16_to_half(<vscale x 2 x i16> %v) {
-; CHECK-LABEL: bitcast_short2_i16_to_half:
+;
+; bitcast to nxv2i32
+;
+
+define <vscale x 2 x i32> @bitcast_nxv8i8_to_nxv2i32(<vscale x 8 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i8_to_nxv2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 2 x i16> %v to <vscale x 2 x half>
-  ret <vscale x 2 x half> %bc
+  %bc = bitcast <vscale x 8 x i8> %v to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %bc
 }
 
-define <vscale x 4 x half> @bitcast_short4_i16_to_half(<vscale x 4 x i16> %v) {
-; CHECK-LABEL: bitcast_short4_i16_to_half:
+define <vscale x 2 x i32> @bitcast_nxv4i16_to_nxv2i32(<vscale x 4 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i16_to_nxv2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x i16> %v to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %bc
+}
+
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 2 x i32> @bitcast_nxv4f16_to_nxv2i32(<vscale x 4 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f16_to_nxv2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x half> %v to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %bc
+}
+
+define <vscale x 2 x i32> @bitcast_nxv2f32_to_nxv2i32(<vscale x 2 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f32_to_nxv2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x float> %v to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %bc
+}
+
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 2 x i32> @bitcast_nxv4bf16_to_nxv2i32(<vscale x 4 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4bf16_to_nxv2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %bc
+}
+
+;
+; bitcast to nxv4f16
+;
+
+define <vscale x 4 x half> @bitcast_nxv8i8_to_nxv4f16(<vscale x 8 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i8_to_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x i8> %v to <vscale x 4 x half>
+  ret <vscale x 4 x half> %bc
+}
+
+define <vscale x 4 x half> @bitcast_nxv4i16_to_nxv4f16(<vscale x 4 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i16_to_nxv4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x i16> %v to <vscale x 4 x half>
   ret <vscale x 4 x half> %bc
 }
 
-define <vscale x 2 x bfloat> @bitcast_short2_i16_to_bfloat(<vscale x 2 x i16> %v) #0 {
-; CHECK-LABEL: bitcast_short2_i16_to_bfloat:
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 4 x half> @bitcast_nxv2i32_to_nxv4f16(<vscale x 2 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i32_to_nxv4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
-  %bc = bitcast <vscale x 2 x i16> %v to <vscale x 2 x bfloat>
-  ret <vscale x 2 x bfloat> %bc
+  %bc = bitcast <vscale x 2 x i32> %v to <vscale x 4 x half>
+  ret <vscale x 4 x half> %bc
+}
+
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 4 x half> @bitcast_nxv2f32_to_nxv4f16(<vscale x 2 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f32_to_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x float> %v to <vscale x 4 x half>
+  ret <vscale x 4 x half> %bc
+}
+
+define <vscale x 4 x half> @bitcast_nxv4bf16_to_nxv4f16(<vscale x 4 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4bf16_to_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 4 x half>
+  ret <vscale x 4 x half> %bc
+}
+
+;
+; bitcast to nxv2f32
+;
+
+define <vscale x 2 x float> @bitcast_nxv8i8_to_nxv2f32(<vscale x 8 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i8_to_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x i8> %v to <vscale x 2 x float>
+  ret <vscale x 2 x float> %bc
 }
 
-define <vscale x 4 x bfloat> @bitcast_short4_i16_to_bfloat(<vscale x 4 x i16> %v) #0 {
-; CHECK-LABEL: bitcast_short4_i16_to_bfloat:
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 2 x float> @bitcast_nxv4i16_to_nxv2f32(<vscale x 4 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i16_to_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x i16> %v to <vscale x 2 x float>
+  ret <vscale x 2 x float> %bc
+}
+
+define <vscale x 2 x float> @bitcast_nxv2i32_to_nxv2f32(<vscale x 2 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i32_to_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i32> %v to <vscale x 2 x float>
+  ret <vscale x 2 x float> %bc
+}
+
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 2 x float> @bitcast_nxv4f16_to_nxv2f32(<vscale x 4 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f16_to_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x half> %v to <vscale x 2 x float>
+  ret <vscale x 2 x float> %bc
+}
+
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 2 x float> @bitcast_nxv4bf16_to_nxv2f32(<vscale x 4 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4bf16_to_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 2 x float>
+  ret <vscale x 2 x float> %bc
+}
+
+;
+; bitcast to nxv4bf16
+;
+
+define <vscale x 4 x bfloat> @bitcast_nxv8i8_to_nxv4bf16(<vscale x 8 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i8_to_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x i8> %v to <vscale x 4 x bfloat>
+  ret <vscale x 4 x bfloat> %bc
+}
+
+define <vscale x 4 x bfloat> @bitcast_nxv4i16_to_nxv4bf16(<vscale x 4 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i16_to_nxv4bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ret
   %bc = bitcast <vscale x 4 x i16> %v to <vscale x 4 x bfloat>
   ret <vscale x 4 x bfloat> %bc
 }
 
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 4 x bfloat> @bitcast_nxv2i32_to_nxv4bf16(<vscale x 2 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i32_to_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i32> %v to <vscale x 4 x bfloat>
+  ret <vscale x 4 x bfloat> %bc
+}
+
+define <vscale x 4 x bfloat> @bitcast_nxv4f16_to_nxv4bf16(<vscale x 4 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f16_to_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x half> %v to <vscale x 4 x bfloat>
+  ret <vscale x 4 x bfloat> %bc
+}
+
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
+define <vscale x 4 x bfloat> @bitcast_nxv2f32_to_nxv4bf16(<vscale x 2 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f32_to_nxv4bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x float> %v to <vscale x 4 x bfloat>
+  ret <vscale x 4 x bfloat> %bc
+}
+
+;
+; bitcast to nxv4i8
+;
+
+define <vscale x 4 x i8> @bitcast_nxv2i16_to_nxv4i8(<vscale x 2 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i16_to_nxv4i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1h { z0.d }, p0, [sp, #3, mul vl]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i16> %v to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %bc
+}
+
+define <vscale x 4 x i8> @bitcast_nxv2f16_to_nxv4i8(<vscale x 2 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f16_to_nxv4i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1h { z0.d }, p0, [sp, #3, mul vl]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x half> %v to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %bc
+}
+
+define <vscale x 4 x i8> @bitcast_nxv2bf16_to_nxv4i8(<vscale x 2 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2bf16_to_nxv4i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1h { z0.d }, p0, [sp, #3, mul vl]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1b { z0.s }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %bc
+}
+
+;
+; bitcast to nxv2i16
+;
+
+define <vscale x 2 x i16> @bitcast_nxv4i8_to_nxv2i16(<vscale x 4 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i8_to_nxv2i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1b { z0.s }, p0, [sp, #3, mul vl]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x i8> %v to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %bc
+}
+
+define <vscale x 2 x i16> @bitcast_nxv2f16_to_nxv2i16(<vscale x 2 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f16_to_nxv2i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x half> %v to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %bc
+}
+
+define <vscale x 2 x i16> @bitcast_nxv2bf16_to_nxv2i16(<vscale x 2 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2bf16_to_nxv2i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %bc
+}
+
+;
+; bitcast to nxv2f16
+;
+
+define <vscale x 2 x half> @bitcast_nxv4i8_to_nxv2f16(<vscale x 4 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i8_to_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1b { z0.s }, p0, [sp, #3, mul vl]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x i8> %v to <vscale x 2 x half>
+  ret <vscale x 2 x half> %bc
+}
+
+define <vscale x 2 x half> @bitcast_nxv2i16_to_nxv2f16(<vscale x 2 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i16_to_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i16> %v to <vscale x 2 x half>
+  ret <vscale x 2 x half> %bc
+}
+
+define <vscale x 2 x half> @bitcast_nxv2bf16_to_nxv2f16(<vscale x 2 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2bf16_to_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 2 x half>
+  ret <vscale x 2 x half> %bc
+}
+
+;
+; bitcast to nxv2bf16
+;
+
+define <vscale x 2 x bfloat> @bitcast_nxv4i8_to_nxv2bf16(<vscale x 4 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i8_to_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1b { z0.s }, p0, [sp, #3, mul vl]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ld1h { z0.d }, p0/z, [sp, #3, mul vl]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x i8> %v to <vscale x 2 x bfloat>
+  ret <vscale x 2 x bfloat> %bc
+}
+
+define <vscale x 2 x bfloat> @bitcast_nxv2i16_to_nxv2bf16(<vscale x 2 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i16_to_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i16> %v to <vscale x 2 x bfloat>
+  ret <vscale x 2 x bfloat> %bc
+}
+
+define <vscale x 2 x bfloat> @bitcast_nxv2f16_to_nxv2bf16(<vscale x 2 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f16_to_nxv2bf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x half> %v to <vscale x 2 x bfloat>
+  ret <vscale x 2 x bfloat> %bc
+}
+
+;
+; Other
+;
+
 define <vscale x 2 x i32> @bitcast_short_float_to_i32(<vscale x 2 x double> %v) #0 {
 ; CHECK-LABEL: bitcast_short_float_to_i32:
 ; CHECK:       // %bb.0:
@@ -537,6 +1049,8 @@ define <vscale x 2 x double> @bitcast_short_i32_to_float(<vscale x 2 x i64> %v)
   ret <vscale x 2 x double> %extended
 }
 
+; TODO: Invalid code generation because the bitcast must change the in-register
+; layout when casting between unpacked scalable vector types.
 define <vscale x 2 x float> @bitcast_short_half_to_float(<vscale x 4 x half> %v) #0 {
 ; CHECK-LABEL: bitcast_short_half_to_float:
 ; CHECK:       // %bb.0:
@@ -549,4 +1063,4 @@ define <vscale x 2 x float> @bitcast_short_half_to_float(<vscale x 4 x half> %v)
 }
 
 ; +bf16 is required for the bfloat version.
-attributes #0 = { "target-features"="+sve,+bf16" }
+attributes #0 = { nounwind "target-features"="+sve,+bf16" }