[llvm] d883542 - [SelectionDAG] Remove invalid TypeSize conversion from PromoteIntRes_BITCAST.

Wed Jun 8 02:31:37 PDT 2022

Author: Paul Walker
Date: 2022-06-08T10:30:07+01:00
New Revision: d88354213cbbbbb55b4e2c00b86432fe7acb21c3

URL: https://github.com/llvm/llvm-project/commit/d88354213cbbbbb55b4e2c00b86432fe7acb21c3
DIFF: https://github.com/llvm/llvm-project/commit/d88354213cbbbbb55b4e2c00b86432fe7acb21c3.diff

LOG: [SelectionDAG] Remove invalid TypeSize conversion from PromoteIntRes_BITCAST.

Extend the TypeWidenVector case of PromoteIntRes_BITCAST to work
with TypeSize directly rather than silently casting to unsigned.

To accomplish this I've extended TypeSize with an interface that
essentially allows TypeSize division when both operands have the
same number of dimensions.

There still exists combinations of scalable vector bitcasts that
cause compiler crashes. I call these out by adding "is missing"
entries to sve-bitcast.

Depends on D126957.
Fixes: #55114

Differential Revision: https://reviews.llvm.org/D127126

Added: 
    

Modified: 
    llvm/include/llvm/Support/TypeSize.h
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/test/CodeGen/AArch64/sve-bitcast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h
index 96f33c1e3d8cc..0b40e970e8c94 100644

--- a/llvm/include/llvm/Support/TypeSize.h
+++ b/llvm/include/llvm/Support/TypeSize.h
@@ -373,6 +373,20 @@ class LinearPolySize : public UnivariateLinearPolyBase<LeafTy> {
         isScalable()));
   }
 
+  /// Returns true if there exists a value X where RHS.multiplyCoefficientBy(X)
+  /// will result in a value whose size matches our own.
+  bool hasKnownScalarFactor(const LinearPolySize &RHS) const {
+    return isScalable() == RHS.isScalable() &&
+           getKnownMinValue() % RHS.getKnownMinValue() == 0;
+  }
+
+  /// Returns a value X where RHS.multiplyCoefficientBy(X) will result in a
+  /// value whose size matches our own.
+  ScalarTy getKnownScalarFactor(const LinearPolySize &RHS) const {
+    assert(hasKnownScalarFactor(RHS) && "Expected RHS to be a known factor!");
+    return getKnownMinValue() / RHS.getKnownMinValue();
+  }
+
   /// Printing function.
   void print(raw_ostream &OS) const {
     if (isScalable())

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 123ccfd3550f9..a4198c4329f78 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -453,13 +453,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     // as the widened input type would be a legal type, we can widen the bitcast
     // and handle the promotion after.
     if (NOutVT.isVector()) {
-      unsigned WidenInSize = NInVT.getSizeInBits();
-      unsigned OutSize = OutVT.getSizeInBits();
-      if (WidenInSize % OutSize == 0) {
-        unsigned Scale = WidenInSize / OutSize;
-        EVT WideOutVT = EVT::getVectorVT(*DAG.getContext(),
-                                         OutVT.getVectorElementType(),
-                                         OutVT.getVectorNumElements() * Scale);
+      TypeSize WidenInSize = NInVT.getSizeInBits();
+      TypeSize OutSize = OutVT.getSizeInBits();
+      if (WidenInSize.hasKnownScalarFactor(OutSize)) {
+        unsigned Scale = WidenInSize.getKnownScalarFactor(OutSize);
+        EVT WideOutVT =
+            EVT::getVectorVT(*DAG.getContext(), OutVT.getVectorElementType(),
+                             OutVT.getVectorElementCount() * Scale);
         if (isTypeLegal(WideOutVT)) {
           InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp));
           InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp,

diff  --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
index 1b161f47664d3..0227b89e5a303 100644
--- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
@@ -518,6 +518,15 @@ define <vscale x 8 x i8> @bitcast_nxv2i32_to_nxv8i8(<vscale x 2 x i32> %v) #0 {
   ret <vscale x 8 x i8> %bc
 }
 
+define <vscale x 8 x i8> @bitcast_nxv1i64_to_nxv8i8(<vscale x 1 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1i64_to_nxv8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x i64> %v to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %bc
+}
+
 define <vscale x 8 x i8> @bitcast_nxv4f16_to_nxv8i8(<vscale x 4 x half> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4f16_to_nxv8i8:
 ; CHECK:       // %bb.0:
@@ -550,6 +559,15 @@ define <vscale x 8 x i8> @bitcast_nxv2f32_to_nxv8i8(<vscale x 2 x float> %v) #0
   ret <vscale x 8 x i8> %bc
 }
 
+define <vscale x 8 x i8> @bitcast_nxv1f64_to_nxv8i8(<vscale x 1 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1f64_to_nxv8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x double> %v to <vscale x 8 x i8>
+  ret <vscale x 8 x i8> %bc
+}
+
 define <vscale x 8 x i8> @bitcast_nxv4bf16_to_nxv8i8(<vscale x 4 x bfloat> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4bf16_to_nxv8i8:
 ; CHECK:       // %bb.0:
@@ -602,6 +620,15 @@ define <vscale x 4 x i16> @bitcast_nxv2i32_to_nxv4i16(<vscale x 2 x i32> %v) #0
   ret <vscale x 4 x i16> %bc
 }
 
+define <vscale x 4 x i16> @bitcast_nxv1i64_to_nxv4i16(<vscale x 1 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1i64_to_nxv4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x i64> %v to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %bc
+}
+
 define <vscale x 4 x i16> @bitcast_nxv4f16_to_nxv4i16(<vscale x 4 x half> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4f16_to_nxv4i16:
 ; CHECK:       // %bb.0:
@@ -626,6 +653,15 @@ define <vscale x 4 x i16> @bitcast_nxv2f32_to_nxv4i16(<vscale x 2 x float> %v) #
   ret <vscale x 4 x i16> %bc
 }
 
+define <vscale x 4 x i16> @bitcast_nxv1f64_to_nxv4i16(<vscale x 1 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1f64_to_nxv4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x double> %v to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %bc
+}
+
 define <vscale x 4 x i16> @bitcast_nxv4bf16_to_nxv4i16(<vscale x 4 x bfloat> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4bf16_to_nxv4i16:
 ; CHECK:       // %bb.0:
@@ -670,6 +706,15 @@ define <vscale x 2 x i32> @bitcast_nxv4i16_to_nxv2i32(<vscale x 4 x i16> %v) #0
   ret <vscale x 2 x i32> %bc
 }
 
+define <vscale x 2 x i32> @bitcast_nxv1i64_to_nxv2i32(<vscale x 1 x i64> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1i64_to_nxv2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x i64> %v to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %bc
+}
+
 define <vscale x 2 x i32> @bitcast_nxv4f16_to_nxv2i32(<vscale x 4 x half> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4f16_to_nxv2i32:
 ; CHECK:       // %bb.0:
@@ -694,6 +739,15 @@ define <vscale x 2 x i32> @bitcast_nxv2f32_to_nxv2i32(<vscale x 2 x float> %v) #
   ret <vscale x 2 x i32> %bc
 }
 
+define <vscale x 2 x i32> @bitcast_nxv1f64_to_nxv2i32(<vscale x 1 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1f64_to_nxv2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x double> %v to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %bc
+}
+
 define <vscale x 2 x i32> @bitcast_nxv4bf16_to_nxv2i32(<vscale x 4 x bfloat> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4bf16_to_nxv2i32:
 ; CHECK:       // %bb.0:
@@ -710,6 +764,72 @@ define <vscale x 2 x i32> @bitcast_nxv4bf16_to_nxv2i32(<vscale x 4 x bfloat> %v)
   ret <vscale x 2 x i32> %bc
 }
 
+;
+; bitcast to nxv1i64
+;
+
+define <vscale x 1 x i64> @bitcast_nxv8i8_to_nxv1i64(<vscale x 8 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv8i8_to_nxv1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 8 x i8> %v to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %bc
+}
+
+define <vscale x 1 x i64> @bitcast_nxv4i16_to_nxv1i64(<vscale x 4 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i16_to_nxv1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x i16> %v to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %bc
+}
+
+define <vscale x 1 x i64> @bitcast_nxv2i32_to_nxv1i64(<vscale x 2 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i32_to_nxv1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i32> %v to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %bc
+}
+
+define <vscale x 1 x i64> @bitcast_nxv4f16_to_nxv1i64(<vscale x 4 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4f16_to_nxv1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x half> %v to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %bc
+}
+
+define <vscale x 1 x i64> @bitcast_nxv2f32_to_nxv1i64(<vscale x 2 x float> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f32_to_nxv1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x float> %v to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %bc
+}
+
+define <vscale x 1 x i64> @bitcast_nxv1f64_to_nxv1i64(<vscale x 1 x double> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1f64_to_nxv1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x double> %v to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %bc
+}
+
+define <vscale x 1 x i64> @bitcast_nxv4bf16_to_nxv1i64(<vscale x 4 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4bf16_to_nxv1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 1 x i64>
+  ret <vscale x 1 x i64> %bc
+}
+
 ;
 ; bitcast to nxv4f16
 ;
@@ -754,6 +874,8 @@ define <vscale x 4 x half> @bitcast_nxv2i32_to_nxv4f16(<vscale x 2 x i32> %v) #0
   ret <vscale x 4 x half> %bc
 }
 
+; @bitcast_nxv1i64_to_nxv4f16 is missing
+
 define <vscale x 4 x half> @bitcast_nxv2f32_to_nxv4f16(<vscale x 2 x float> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv2f32_to_nxv4f16:
 ; CHECK:       // %bb.0:
@@ -770,6 +892,8 @@ define <vscale x 4 x half> @bitcast_nxv2f32_to_nxv4f16(<vscale x 2 x float> %v)
   ret <vscale x 4 x half> %bc
 }
 
+; @bitcast_nxv1f64_to_nxv4f16 is missing
+
 define <vscale x 4 x half> @bitcast_nxv4bf16_to_nxv4f16(<vscale x 4 x bfloat> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4bf16_to_nxv4f16:
 ; CHECK:       // %bb.0:
@@ -822,6 +946,8 @@ define <vscale x 2 x float> @bitcast_nxv2i32_to_nxv2f32(<vscale x 2 x i32> %v) #
   ret <vscale x 2 x float> %bc
 }
 
+; @bitcast_nxv1i64_to_nxv2f32 is missing
+
 define <vscale x 2 x float> @bitcast_nxv4f16_to_nxv2f32(<vscale x 4 x half> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4f16_to_nxv2f32:
 ; CHECK:       // %bb.0:
@@ -838,6 +964,8 @@ define <vscale x 2 x float> @bitcast_nxv4f16_to_nxv2f32(<vscale x 4 x half> %v)
   ret <vscale x 2 x float> %bc
 }
 
+; @bitcast_nxv1f64_to_nxv2f32 is missing
+
 define <vscale x 2 x float> @bitcast_nxv4bf16_to_nxv2f32(<vscale x 4 x bfloat> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4bf16_to_nxv2f32:
 ; CHECK:       // %bb.0:
@@ -854,6 +982,18 @@ define <vscale x 2 x float> @bitcast_nxv4bf16_to_nxv2f32(<vscale x 4 x bfloat> %
   ret <vscale x 2 x float> %bc
 }
 
+;
+; bitcast to nxv1f64
+;
+
+; @bitcast_nxv8i8_to_nxv1f64 is missing
+; @bitcast_nxv4i16_to_nxv1f64 is missing
+; @bitcast_nxv2i32_to_nxv1f64 is missing
+; @bitcast_nxv1i64_to_nxv1f64 is missing
+; @bitcast_nxv4f16_to_nxv1f64 is missing
+; @bitcast_nxv2f32_to_nxv1f64 is missing
+; @bitcast_nxv4bf16_to_nxv1f64 is missing
+
 ;
 ; bitcast to nxv4bf16
 ;
@@ -898,6 +1038,8 @@ define <vscale x 4 x bfloat> @bitcast_nxv2i32_to_nxv4bf16(<vscale x 2 x i32> %v)
   ret <vscale x 4 x bfloat> %bc
 }
 
+; @bitcast_nxv1i64_to_nxv4bf16 is missing
+
 define <vscale x 4 x bfloat> @bitcast_nxv4f16_to_nxv4bf16(<vscale x 4 x half> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv4f16_to_nxv4bf16:
 ; CHECK:       // %bb.0:
@@ -922,6 +1064,8 @@ define <vscale x 4 x bfloat> @bitcast_nxv2f32_to_nxv4bf16(<vscale x 2 x float> %
   ret <vscale x 4 x bfloat> %bc
 }
 
+; @bitcast_nxv1f64_to_nxv4bf16 is missing
+
 ;
 ; bitcast to nxv4i8
 ;
@@ -942,6 +1086,16 @@ define <vscale x 4 x i8> @bitcast_nxv2i16_to_nxv4i8(<vscale x 2 x i16> %v) #0 {
   ret <vscale x 4 x i8> %bc
 }
 
+define <vscale x 4 x i8> @bitcast_nxv1i32_to_nxv4i8(<vscale x 1 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1i32_to_nxv4i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x i32> %v to <vscale x 4 x i8>
+  ret <vscale x 4 x i8> %bc
+}
+
 define <vscale x 4 x i8> @bitcast_nxv2f16_to_nxv4i8(<vscale x 2 x half> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv2f16_to_nxv4i8:
 ; CHECK:       // %bb.0:
@@ -958,6 +1112,8 @@ define <vscale x 4 x i8> @bitcast_nxv2f16_to_nxv4i8(<vscale x 2 x half> %v) #0 {
   ret <vscale x 4 x i8> %bc
 }
 
+; @bitcast_nxv1f32_to_nxv4i8 is missing
+
 define <vscale x 4 x i8> @bitcast_nxv2bf16_to_nxv4i8(<vscale x 2 x bfloat> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv2bf16_to_nxv4i8:
 ; CHECK:       // %bb.0:
@@ -994,6 +1150,16 @@ define <vscale x 2 x i16> @bitcast_nxv4i8_to_nxv2i16(<vscale x 4 x i8> %v) #0 {
   ret <vscale x 2 x i16> %bc
 }
 
+define <vscale x 2 x i16> @bitcast_nxv1i32_to_nxv2i16(<vscale x 1 x i32> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1i32_to_nxv2i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x i32> %v to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %bc
+}
+
 define <vscale x 2 x i16> @bitcast_nxv2f16_to_nxv2i16(<vscale x 2 x half> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv2f16_to_nxv2i16:
 ; CHECK:       // %bb.0:
@@ -1002,6 +1168,8 @@ define <vscale x 2 x i16> @bitcast_nxv2f16_to_nxv2i16(<vscale x 2 x half> %v) #0
   ret <vscale x 2 x i16> %bc
 }
 
+; @bitcast_nxv1f32_to_nxv2i16 is missing
+
 define <vscale x 2 x i16> @bitcast_nxv2bf16_to_nxv2i16(<vscale x 2 x bfloat> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv2bf16_to_nxv2i16:
 ; CHECK:       // %bb.0:
@@ -1010,6 +1178,64 @@ define <vscale x 2 x i16> @bitcast_nxv2bf16_to_nxv2i16(<vscale x 2 x bfloat> %v)
   ret <vscale x 2 x i16> %bc
 }
 
+;
+; bitcast to nxv1i32
+;
+
+define <vscale x 1 x i32> @bitcast_nxv4i8_to_nxv1i32(<vscale x 4 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv4i8_to_nxv1i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 4 x i8> %v to <vscale x 1 x i32>
+  ret <vscale x 1 x i32> %bc
+}
+
+define <vscale x 1 x i32> @bitcast_nxv2i16_to_nxv1i32(<vscale x 2 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i16_to_nxv1i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i16> %v to <vscale x 1 x i32>
+  ret <vscale x 1 x i32> %bc
+}
+
+define <vscale x 1 x i32> @bitcast_nxv2f16_to_nxv1i32(<vscale x 2 x half> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2f16_to_nxv1i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1h { z0.d }, p0, [sp]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [sp]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x half> %v to <vscale x 1 x i32>
+  ret <vscale x 1 x i32> %bc
+}
+
+; @bitcast_nxv1f32_to_nxv1i32 is missing
+
+define <vscale x 1 x i32> @bitcast_nxv2bf16_to_nxv1i32(<vscale x 2 x bfloat> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2bf16_to_nxv1i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1h { z0.d }, p0, [sp]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [sp]
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 1 x i32>
+  ret <vscale x 1 x i32> %bc
+}
+
 ;
 ; bitcast to nxv2f16
 ;
@@ -1038,6 +1264,9 @@ define <vscale x 2 x half> @bitcast_nxv2i16_to_nxv2f16(<vscale x 2 x i16> %v) #0
   ret <vscale x 2 x half> %bc
 }
 
+; @bitcast_nxv1i32_to_nxv2f16 is missing
+; @bitcast_nxv1f32_to_nxv2f16 is missing
+
 define <vscale x 2 x half> @bitcast_nxv2bf16_to_nxv2f16(<vscale x 2 x bfloat> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv2bf16_to_nxv2f16:
 ; CHECK:       // %bb.0:
@@ -1074,6 +1303,8 @@ define <vscale x 2 x bfloat> @bitcast_nxv2i16_to_nxv2bf16(<vscale x 2 x i16> %v)
   ret <vscale x 2 x bfloat> %bc
 }
 
+; @bitcast_nxv1i32_to_nxv2bf16 is missing
+
 define <vscale x 2 x bfloat> @bitcast_nxv2f16_to_nxv2bf16(<vscale x 2 x half> %v) #0 {
 ; CHECK-LABEL: bitcast_nxv2f16_to_nxv2bf16:
 ; CHECK:       // %bb.0:
@@ -1082,6 +1313,44 @@ define <vscale x 2 x bfloat> @bitcast_nxv2f16_to_nxv2bf16(<vscale x 2 x half> %v
   ret <vscale x 2 x bfloat> %bc
 }
 
+; @bitcast_nxv1f32_to_nxv2bf16 is missing
+
+;
+; bitcast to nxv2i8
+;
+
+define <vscale x 2 x i8> @bitcast_nxv1i16_to_nxv2i8(<vscale x 1 x i16> %v) #0 {
+; CHECK-LABEL: bitcast_nxv1i16_to_nxv2i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 1 x i16> %v to <vscale x 2 x i8>
+  ret <vscale x 2 x i8> %bc
+}
+
+; @bitcast_nxv1f16_to_nxv2i8 is missing
+; @bitcast_nxv1bf16_to_nxv2i8 is missing
+
+;
+; bitcast to nxv1i16
+;
+
+define <vscale x 1 x i16> @bitcast_nxv2i8_to_nxv1i16(<vscale x 2 x i8> %v) #0 {
+; CHECK-LABEL: bitcast_nxv2i8_to_nxv1i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    ret
+  %bc = bitcast <vscale x 2 x i8> %v to <vscale x 1 x i16>
+  ret <vscale x 1 x i16> %bc
+}
+
+; @bitcast_nxv1f16_to_nxv1i16 is missing
+; @bitcast_nxv1bf16_to_nxv1i16 is missing
+
 ;
 ; Other
 ;