[llvm] 3d65f82 - [SVE] Expand scalable vector ISD::BITCASTs when targeting big-endian.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 10 04:03:13 PDT 2023
Author: Paul Walker
Date: 2023-08-10T11:02:01Z
New Revision: 3d65f8211f8d2ebf584bc2ac08a6a2f098130d79
URL: https://github.com/llvm/llvm-project/commit/3d65f8211f8d2ebf584bc2ac08a6a2f098130d79
DIFF: https://github.com/llvm/llvm-project/commit/3d65f8211f8d2ebf584bc2ac08a6a2f098130d79.diff
LOG: [SVE] Expand scalable vector ISD::BITCASTs when targeting big-endian.
Whilst sub-optimial, it's better than the current selection failure.
Fixes: #64406
Differential Revision: https://reviews.llvm.org/D157406
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-bitcast.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a04188903748fc..4d322cecaf8df2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1337,6 +1337,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::AVGFLOORU, VT, Custom);
setOperationAction(ISD::AVGCEILS, VT, Custom);
setOperationAction(ISD::AVGCEILU, VT, Custom);
+
+ if (!Subtarget->isLittleEndian())
+ setOperationAction(ISD::BITCAST, VT, Expand);
}
// Illegal unpacked integer vector types.
@@ -1486,6 +1489,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETONE, VT, Expand);
+
+ if (!Subtarget->isLittleEndian())
+ setOperationAction(ISD::BITCAST, VT, Expand);
}
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
@@ -1495,6 +1501,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
+
+ if (!Subtarget->isLittleEndian())
+ setOperationAction(ISD::BITCAST, VT, Expand);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
index 40d57e4a451ca8..5f8fcb3d56e4b2 100644
--- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
-; RUN: not --crash llc -mtriple=aarch64_be < %s
+; RUN: llc -mtriple=aarch64_be < %s | FileCheck %s --check-prefix=CHECK_BE
;
; bitcast to nxv16i8
@@ -10,6 +10,18 @@ define <vscale x 16 x i8> @bitcast_nxv8i16_to_nxv16i8(<vscale x 8 x i16> %v) #0
; CHECK-LABEL: bitcast_nxv8i16_to_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv16i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i16> %v to <vscale x 16 x i8>
ret <vscale x 16 x i8> %bc
}
@@ -18,6 +30,18 @@ define <vscale x 16 x i8> @bitcast_nxv4i32_to_nxv16i8(<vscale x 4 x i32> %v) #0
; CHECK-LABEL: bitcast_nxv4i32_to_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv16i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i32> %v to <vscale x 16 x i8>
ret <vscale x 16 x i8> %bc
}
@@ -26,6 +50,18 @@ define <vscale x 16 x i8> @bitcast_nxv2i64_to_nxv16i8(<vscale x 2 x i64> %v) #0
; CHECK-LABEL: bitcast_nxv2i64_to_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv16i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i64> %v to <vscale x 16 x i8>
ret <vscale x 16 x i8> %bc
}
@@ -34,6 +70,18 @@ define <vscale x 16 x i8> @bitcast_nxv8f16_to_nxv16i8(<vscale x 8 x half> %v) #0
; CHECK-LABEL: bitcast_nxv8f16_to_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv16i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x half> %v to <vscale x 16 x i8>
ret <vscale x 16 x i8> %bc
}
@@ -42,6 +90,18 @@ define <vscale x 16 x i8> @bitcast_nxv4f32_to_nxv16i8(<vscale x 4 x float> %v) #
; CHECK-LABEL: bitcast_nxv4f32_to_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv16i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x float> %v to <vscale x 16 x i8>
ret <vscale x 16 x i8> %bc
}
@@ -50,6 +110,18 @@ define <vscale x 16 x i8> @bitcast_nxv2f64_to_nxv16i8(<vscale x 2 x double> %v)
; CHECK-LABEL: bitcast_nxv2f64_to_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv16i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x double> %v to <vscale x 16 x i8>
ret <vscale x 16 x i8> %bc
}
@@ -58,6 +130,18 @@ define <vscale x 16 x i8> @bitcast_nxv8bf16_to_nxv16i8(<vscale x 8 x bfloat> %v)
; CHECK-LABEL: bitcast_nxv8bf16_to_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv16i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 16 x i8>
ret <vscale x 16 x i8> %bc
}
@@ -70,6 +154,18 @@ define <vscale x 8 x i16> @bitcast_nxv16i8_to_nxv8i16(<vscale x 16 x i8> %v) #0
; CHECK-LABEL: bitcast_nxv16i8_to_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv8i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 16 x i8> %v to <vscale x 8 x i16>
ret <vscale x 8 x i16> %bc
}
@@ -78,6 +174,18 @@ define <vscale x 8 x i16> @bitcast_nxv4i32_to_nxv8i16(<vscale x 4 x i32> %v) #0
; CHECK-LABEL: bitcast_nxv4i32_to_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv8i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i32> %v to <vscale x 8 x i16>
ret <vscale x 8 x i16> %bc
}
@@ -86,6 +194,18 @@ define <vscale x 8 x i16> @bitcast_nxv2i64_to_nxv8i16(<vscale x 2 x i64> %v) #0
; CHECK-LABEL: bitcast_nxv2i64_to_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv8i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i64> %v to <vscale x 8 x i16>
ret <vscale x 8 x i16> %bc
}
@@ -94,6 +214,17 @@ define <vscale x 8 x i16> @bitcast_nxv8f16_to_nxv8i16(<vscale x 8 x half> %v) #0
; CHECK-LABEL: bitcast_nxv8f16_to_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv8i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x half> %v to <vscale x 8 x i16>
ret <vscale x 8 x i16> %bc
}
@@ -102,6 +233,18 @@ define <vscale x 8 x i16> @bitcast_nxv4f32_to_nxv8i16(<vscale x 4 x float> %v) #
; CHECK-LABEL: bitcast_nxv4f32_to_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv8i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x float> %v to <vscale x 8 x i16>
ret <vscale x 8 x i16> %bc
}
@@ -110,6 +253,18 @@ define <vscale x 8 x i16> @bitcast_nxv2f64_to_nxv8i16(<vscale x 2 x double> %v)
; CHECK-LABEL: bitcast_nxv2f64_to_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv8i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x double> %v to <vscale x 8 x i16>
ret <vscale x 8 x i16> %bc
}
@@ -118,6 +273,17 @@ define <vscale x 8 x i16> @bitcast_nxv8bf16_to_nxv8i16(<vscale x 8 x bfloat> %v)
; CHECK-LABEL: bitcast_nxv8bf16_to_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv8i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x i16>
ret <vscale x 8 x i16> %bc
}
@@ -130,6 +296,18 @@ define <vscale x 4 x i32> @bitcast_nxv16i8_to_nxv4i32(<vscale x 16 x i8> %v) #0
; CHECK-LABEL: bitcast_nxv16i8_to_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv4i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 16 x i8> %v to <vscale x 4 x i32>
ret <vscale x 4 x i32> %bc
}
@@ -138,6 +316,18 @@ define <vscale x 4 x i32> @bitcast_nxv8i16_to_nxv4i32(<vscale x 8 x i16> %v) #0
; CHECK-LABEL: bitcast_nxv8i16_to_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv4i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i16> %v to <vscale x 4 x i32>
ret <vscale x 4 x i32> %bc
}
@@ -146,6 +336,18 @@ define <vscale x 4 x i32> @bitcast_nxv2i64_to_nxv4i32(<vscale x 2 x i64> %v) #0
; CHECK-LABEL: bitcast_nxv2i64_to_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv4i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i64> %v to <vscale x 4 x i32>
ret <vscale x 4 x i32> %bc
}
@@ -154,6 +356,18 @@ define <vscale x 4 x i32> @bitcast_nxv8f16_to_nxv4i32(<vscale x 8 x half> %v) #0
; CHECK-LABEL: bitcast_nxv8f16_to_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv4i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x half> %v to <vscale x 4 x i32>
ret <vscale x 4 x i32> %bc
}
@@ -162,6 +376,17 @@ define <vscale x 4 x i32> @bitcast_nxv4f32_to_nxv4i32(<vscale x 4 x float> %v) #
; CHECK-LABEL: bitcast_nxv4f32_to_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv4i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x float> %v to <vscale x 4 x i32>
ret <vscale x 4 x i32> %bc
}
@@ -170,6 +395,18 @@ define <vscale x 4 x i32> @bitcast_nxv2f64_to_nxv4i32(<vscale x 2 x double> %v)
; CHECK-LABEL: bitcast_nxv2f64_to_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv4i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x double> %v to <vscale x 4 x i32>
ret <vscale x 4 x i32> %bc
}
@@ -178,6 +415,18 @@ define <vscale x 4 x i32> @bitcast_nxv8bf16_to_nxv4i32(<vscale x 8 x bfloat> %v)
; CHECK-LABEL: bitcast_nxv8bf16_to_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv4i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x i32>
ret <vscale x 4 x i32> %bc
}
@@ -190,6 +439,18 @@ define <vscale x 2 x i64> @bitcast_nxv16i8_to_nxv2i64(<vscale x 16 x i8> %v) #0
; CHECK-LABEL: bitcast_nxv16i8_to_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv2i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 16 x i8> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %bc
}
@@ -198,6 +459,18 @@ define <vscale x 2 x i64> @bitcast_nxv8i16_to_nxv2i64(<vscale x 8 x i16> %v) #0
; CHECK-LABEL: bitcast_nxv8i16_to_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv2i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i16> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %bc
}
@@ -206,6 +479,18 @@ define <vscale x 2 x i64> @bitcast_nxv4i32_to_nxv2i64(<vscale x 4 x i32> %v) #0
; CHECK-LABEL: bitcast_nxv4i32_to_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv2i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i32> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %bc
}
@@ -214,6 +499,18 @@ define <vscale x 2 x i64> @bitcast_nxv8f16_to_nxv2i64(<vscale x 8 x half> %v) #0
; CHECK-LABEL: bitcast_nxv8f16_to_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv2i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x half> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %bc
}
@@ -222,6 +519,18 @@ define <vscale x 2 x i64> @bitcast_nxv4f32_to_nxv2i64(<vscale x 4 x float> %v) #
; CHECK-LABEL: bitcast_nxv4f32_to_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv2i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x float> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %bc
}
@@ -230,6 +539,17 @@ define <vscale x 2 x i64> @bitcast_nxv2f64_to_nxv2i64(<vscale x 2 x double> %v)
; CHECK-LABEL: bitcast_nxv2f64_to_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv2i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x double> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %bc
}
@@ -238,6 +558,18 @@ define <vscale x 2 x i64> @bitcast_nxv8bf16_to_nxv2i64(<vscale x 8 x bfloat> %v)
; CHECK-LABEL: bitcast_nxv8bf16_to_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv2i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x i64>
ret <vscale x 2 x i64> %bc
}
@@ -250,6 +582,18 @@ define <vscale x 8 x half> @bitcast_nxv16i8_to_nxv8f16(<vscale x 16 x i8> %v) #0
; CHECK-LABEL: bitcast_nxv16i8_to_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv8f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 16 x i8> %v to <vscale x 8 x half>
ret <vscale x 8 x half> %bc
}
@@ -258,6 +602,17 @@ define <vscale x 8 x half> @bitcast_nxv8i16_to_nxv8f16(<vscale x 8 x i16> %v) #0
; CHECK-LABEL: bitcast_nxv8i16_to_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv8f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i16> %v to <vscale x 8 x half>
ret <vscale x 8 x half> %bc
}
@@ -266,6 +621,18 @@ define <vscale x 8 x half> @bitcast_nxv4i32_to_nxv8f16(<vscale x 4 x i32> %v) #0
; CHECK-LABEL: bitcast_nxv4i32_to_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv8f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i32> %v to <vscale x 8 x half>
ret <vscale x 8 x half> %bc
}
@@ -274,6 +641,18 @@ define <vscale x 8 x half> @bitcast_nxv2i64_to_nxv8f16(<vscale x 2 x i64> %v) #0
; CHECK-LABEL: bitcast_nxv2i64_to_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv8f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i64> %v to <vscale x 8 x half>
ret <vscale x 8 x half> %bc
}
@@ -282,6 +661,18 @@ define <vscale x 8 x half> @bitcast_nxv4f32_to_nxv8f16(<vscale x 4 x float> %v)
; CHECK-LABEL: bitcast_nxv4f32_to_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv8f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x float> %v to <vscale x 8 x half>
ret <vscale x 8 x half> %bc
}
@@ -290,6 +681,18 @@ define <vscale x 8 x half> @bitcast_nxv2f64_to_nxv8f16(<vscale x 2 x double> %v)
; CHECK-LABEL: bitcast_nxv2f64_to_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv8f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x double> %v to <vscale x 8 x half>
ret <vscale x 8 x half> %bc
}
@@ -298,6 +701,17 @@ define <vscale x 8 x half> @bitcast_nxv8bf16_to_nxv8f16(<vscale x 8 x bfloat> %v
; CHECK-LABEL: bitcast_nxv8bf16_to_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv8f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 8 x half>
ret <vscale x 8 x half> %bc
}
@@ -310,6 +724,18 @@ define <vscale x 4 x float> @bitcast_nxv16i8_to_nxv4f32(<vscale x 16 x i8> %v) #
; CHECK-LABEL: bitcast_nxv16i8_to_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv4f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 16 x i8> %v to <vscale x 4 x float>
ret <vscale x 4 x float> %bc
}
@@ -318,6 +744,18 @@ define <vscale x 4 x float> @bitcast_nxv8i16_to_nxv4f32(<vscale x 8 x i16> %v) #
; CHECK-LABEL: bitcast_nxv8i16_to_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv4f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i16> %v to <vscale x 4 x float>
ret <vscale x 4 x float> %bc
}
@@ -326,6 +764,17 @@ define <vscale x 4 x float> @bitcast_nxv4i32_to_nxv4f32(<vscale x 4 x i32> %v) #
; CHECK-LABEL: bitcast_nxv4i32_to_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv4f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i32> %v to <vscale x 4 x float>
ret <vscale x 4 x float> %bc
}
@@ -334,6 +783,18 @@ define <vscale x 4 x float> @bitcast_nxv2i64_to_nxv4f32(<vscale x 2 x i64> %v) #
; CHECK-LABEL: bitcast_nxv2i64_to_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv4f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i64> %v to <vscale x 4 x float>
ret <vscale x 4 x float> %bc
}
@@ -342,6 +803,18 @@ define <vscale x 4 x float> @bitcast_nxv8f16_to_nxv4f32(<vscale x 8 x half> %v)
; CHECK-LABEL: bitcast_nxv8f16_to_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv4f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x half> %v to <vscale x 4 x float>
ret <vscale x 4 x float> %bc
}
@@ -350,6 +823,18 @@ define <vscale x 4 x float> @bitcast_nxv2f64_to_nxv4f32(<vscale x 2 x double> %v
; CHECK-LABEL: bitcast_nxv2f64_to_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv4f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x double> %v to <vscale x 4 x float>
ret <vscale x 4 x float> %bc
}
@@ -358,6 +843,18 @@ define <vscale x 4 x float> @bitcast_nxv8bf16_to_nxv4f32(<vscale x 8 x bfloat> %
; CHECK-LABEL: bitcast_nxv8bf16_to_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv4f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 4 x float>
ret <vscale x 4 x float> %bc
}
@@ -370,6 +867,18 @@ define <vscale x 2 x double> @bitcast_nxv16i8_to_nxv2f64(<vscale x 16 x i8> %v)
; CHECK-LABEL: bitcast_nxv16i8_to_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv2f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 16 x i8> %v to <vscale x 2 x double>
ret <vscale x 2 x double> %bc
}
@@ -378,6 +887,18 @@ define <vscale x 2 x double> @bitcast_nxv8i16_to_nxv2f64(<vscale x 8 x i16> %v)
; CHECK-LABEL: bitcast_nxv8i16_to_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv2f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i16> %v to <vscale x 2 x double>
ret <vscale x 2 x double> %bc
}
@@ -386,6 +907,18 @@ define <vscale x 2 x double> @bitcast_nxv4i32_to_nxv2f64(<vscale x 4 x i32> %v)
; CHECK-LABEL: bitcast_nxv4i32_to_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv2f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i32> %v to <vscale x 2 x double>
ret <vscale x 2 x double> %bc
}
@@ -394,6 +927,17 @@ define <vscale x 2 x double> @bitcast_nxv2i64_to_nxv2f64(<vscale x 2 x i64> %v)
; CHECK-LABEL: bitcast_nxv2i64_to_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv2f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i64> %v to <vscale x 2 x double>
ret <vscale x 2 x double> %bc
}
@@ -402,6 +946,18 @@ define <vscale x 2 x double> @bitcast_nxv8f16_to_nxv2f64(<vscale x 8 x half> %v)
; CHECK-LABEL: bitcast_nxv8f16_to_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv2f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x half> %v to <vscale x 2 x double>
ret <vscale x 2 x double> %bc
}
@@ -410,6 +966,18 @@ define <vscale x 2 x double> @bitcast_nxv4f32_to_nxv2f64(<vscale x 4 x float> %v
; CHECK-LABEL: bitcast_nxv4f32_to_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv2f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x float> %v to <vscale x 2 x double>
ret <vscale x 2 x double> %bc
}
@@ -418,6 +986,18 @@ define <vscale x 2 x double> @bitcast_nxv8bf16_to_nxv2f64(<vscale x 8 x bfloat>
; CHECK-LABEL: bitcast_nxv8bf16_to_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8bf16_to_nxv2f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x bfloat> %v to <vscale x 2 x double>
ret <vscale x 2 x double> %bc
}
@@ -430,6 +1010,18 @@ define <vscale x 8 x bfloat> @bitcast_nxv16i8_to_nxv8bf16(<vscale x 16 x i8> %v)
; CHECK-LABEL: bitcast_nxv16i8_to_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv16i8_to_nxv8bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 16 x i8> %v to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %bc
}
@@ -438,6 +1030,17 @@ define <vscale x 8 x bfloat> @bitcast_nxv8i16_to_nxv8bf16(<vscale x 8 x i16> %v)
; CHECK-LABEL: bitcast_nxv8i16_to_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i16_to_nxv8bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i16> %v to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %bc
}
@@ -446,6 +1049,18 @@ define <vscale x 8 x bfloat> @bitcast_nxv4i32_to_nxv8bf16(<vscale x 4 x i32> %v)
; CHECK-LABEL: bitcast_nxv4i32_to_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i32_to_nxv8bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i32> %v to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %bc
}
@@ -454,6 +1069,18 @@ define <vscale x 8 x bfloat> @bitcast_nxv2i64_to_nxv8bf16(<vscale x 2 x i64> %v)
; CHECK-LABEL: bitcast_nxv2i64_to_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i64_to_nxv8bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i64> %v to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %bc
}
@@ -462,6 +1089,17 @@ define <vscale x 8 x bfloat> @bitcast_nxv8f16_to_nxv8bf16(<vscale x 8 x half> %v
; CHECK-LABEL: bitcast_nxv8f16_to_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8f16_to_nxv8bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x half> %v to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %bc
}
@@ -470,6 +1108,18 @@ define <vscale x 8 x bfloat> @bitcast_nxv4f32_to_nxv8bf16(<vscale x 4 x float> %
; CHECK-LABEL: bitcast_nxv4f32_to_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f32_to_nxv8bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x float> %v to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %bc
}
@@ -478,6 +1128,18 @@ define <vscale x 8 x bfloat> @bitcast_nxv2f64_to_nxv8bf16(<vscale x 2 x double>
; CHECK-LABEL: bitcast_nxv2f64_to_nxv8bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f64_to_nxv8bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x double> %v to <vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %bc
}
@@ -498,6 +1160,18 @@ define <vscale x 8 x i8> @bitcast_nxv4i16_to_nxv8i8(<vscale x 4 x i16> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv8i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i16> %v to <vscale x 8 x i8>
ret <vscale x 8 x i8> %bc
}
@@ -514,6 +1188,18 @@ define <vscale x 8 x i8> @bitcast_nxv2i32_to_nxv8i8(<vscale x 2 x i32> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv8i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i32> %v to <vscale x 8 x i8>
ret <vscale x 8 x i8> %bc
}
@@ -523,6 +1209,19 @@ define <vscale x 8 x i8> @bitcast_nxv1i64_to_nxv8i8(<vscale x 1 x i64> %v) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv8i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.h, z0.b
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i64> %v to <vscale x 8 x i8>
ret <vscale x 8 x i8> %bc
}
@@ -539,6 +1238,18 @@ define <vscale x 8 x i8> @bitcast_nxv4f16_to_nxv8i8(<vscale x 4 x half> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv8i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x half> %v to <vscale x 8 x i8>
ret <vscale x 8 x i8> %bc
}
@@ -555,6 +1266,18 @@ define <vscale x 8 x i8> @bitcast_nxv2f32_to_nxv8i8(<vscale x 2 x float> %v) #0
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv8i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x float> %v to <vscale x 8 x i8>
ret <vscale x 8 x i8> %bc
}
@@ -564,6 +1287,19 @@ define <vscale x 8 x i8> @bitcast_nxv1f64_to_nxv8i8(<vscale x 1 x double> %v) #0
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv8i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.h, z0.b
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x double> %v to <vscale x 8 x i8>
ret <vscale x 8 x i8> %bc
}
@@ -580,6 +1316,18 @@ define <vscale x 8 x i8> @bitcast_nxv4bf16_to_nxv8i8(<vscale x 4 x bfloat> %v) #
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv8i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 8 x i8>
ret <vscale x 8 x i8> %bc
}
@@ -600,6 +1348,18 @@ define <vscale x 4 x i16> @bitcast_nxv8i8_to_nxv4i16(<vscale x 8 x i8> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i8_to_nxv4i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i8> %v to <vscale x 4 x i16>
ret <vscale x 4 x i16> %bc
}
@@ -616,6 +1376,18 @@ define <vscale x 4 x i16> @bitcast_nxv2i32_to_nxv4i16(<vscale x 2 x i32> %v) #0
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv4i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i32> %v to <vscale x 4 x i16>
ret <vscale x 4 x i16> %bc
}
@@ -625,6 +1397,19 @@ define <vscale x 4 x i16> @bitcast_nxv1i64_to_nxv4i16(<vscale x 1 x i64> %v) #0
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv4i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.s, z0.h
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i64> %v to <vscale x 4 x i16>
ret <vscale x 4 x i16> %bc
}
@@ -633,6 +1418,18 @@ define <vscale x 4 x i16> @bitcast_nxv4f16_to_nxv4i16(<vscale x 4 x half> %v) #0
; CHECK-LABEL: bitcast_nxv4f16_to_nxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv4i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x half> %v to <vscale x 4 x i16>
ret <vscale x 4 x i16> %bc
}
@@ -649,6 +1446,18 @@ define <vscale x 4 x i16> @bitcast_nxv2f32_to_nxv4i16(<vscale x 2 x float> %v) #
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv4i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x float> %v to <vscale x 4 x i16>
ret <vscale x 4 x i16> %bc
}
@@ -658,6 +1467,19 @@ define <vscale x 4 x i16> @bitcast_nxv1f64_to_nxv4i16(<vscale x 1 x double> %v)
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv4i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.s, z0.h
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x double> %v to <vscale x 4 x i16>
ret <vscale x 4 x i16> %bc
}
@@ -666,6 +1488,18 @@ define <vscale x 4 x i16> @bitcast_nxv4bf16_to_nxv4i16(<vscale x 4 x bfloat> %v)
; CHECK-LABEL: bitcast_nxv4bf16_to_nxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv4i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 4 x i16>
ret <vscale x 4 x i16> %bc
}
@@ -686,6 +1520,18 @@ define <vscale x 2 x i32> @bitcast_nxv8i8_to_nxv2i32(<vscale x 8 x i8> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i8_to_nxv2i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i8> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %bc
}
@@ -702,6 +1548,18 @@ define <vscale x 2 x i32> @bitcast_nxv4i16_to_nxv2i32(<vscale x 4 x i16> %v) #0
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv2i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i16> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %bc
}
@@ -711,6 +1569,19 @@ define <vscale x 2 x i32> @bitcast_nxv1i64_to_nxv2i32(<vscale x 1 x i64> %v) #0
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv2i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.d, z0.s
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i64> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %bc
}
@@ -727,6 +1598,18 @@ define <vscale x 2 x i32> @bitcast_nxv4f16_to_nxv2i32(<vscale x 4 x half> %v) #0
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv2i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x half> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %bc
}
@@ -735,6 +1618,18 @@ define <vscale x 2 x i32> @bitcast_nxv2f32_to_nxv2i32(<vscale x 2 x float> %v) #
; CHECK-LABEL: bitcast_nxv2f32_to_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv2i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x float> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %bc
}
@@ -744,6 +1639,19 @@ define <vscale x 2 x i32> @bitcast_nxv1f64_to_nxv2i32(<vscale x 1 x double> %v)
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv2i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.d, z0.s
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x double> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %bc
}
@@ -760,6 +1668,18 @@ define <vscale x 2 x i32> @bitcast_nxv4bf16_to_nxv2i32(<vscale x 4 x bfloat> %v)
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv2i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 2 x i32>
ret <vscale x 2 x i32> %bc
}
@@ -773,6 +1693,19 @@ define <vscale x 1 x i64> @bitcast_nxv8i8_to_nxv1i64(<vscale x 8 x i8> %v) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i8_to_nxv1i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i8> %v to <vscale x 1 x i64>
ret <vscale x 1 x i64> %bc
}
@@ -782,6 +1715,19 @@ define <vscale x 1 x i64> @bitcast_nxv4i16_to_nxv1i64(<vscale x 4 x i16> %v) #0
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv1i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i16> %v to <vscale x 1 x i64>
ret <vscale x 1 x i64> %bc
}
@@ -791,6 +1737,19 @@ define <vscale x 1 x i64> @bitcast_nxv2i32_to_nxv1i64(<vscale x 2 x i32> %v) #0
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv1i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i32> %v to <vscale x 1 x i64>
ret <vscale x 1 x i64> %bc
}
@@ -800,6 +1759,24 @@ define <vscale x 1 x i64> @bitcast_nxv4f16_to_nxv1i64(<vscale x 4 x half> %v) #0
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv1i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ptrue p1.s
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x half> %v to <vscale x 1 x i64>
ret <vscale x 1 x i64> %bc
}
@@ -809,6 +1786,23 @@ define <vscale x 1 x i64> @bitcast_nxv2f32_to_nxv1i64(<vscale x 2 x float> %v) #
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv1i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ptrue p1.d
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp]
+; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #2, mul vl]
+; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x float> %v to <vscale x 1 x i64>
ret <vscale x 1 x i64> %bc
}
@@ -817,6 +1811,17 @@ define <vscale x 1 x i64> @bitcast_nxv1f64_to_nxv1i64(<vscale x 1 x double> %v)
; CHECK-LABEL: bitcast_nxv1f64_to_nxv1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv1i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x double> %v to <vscale x 1 x i64>
ret <vscale x 1 x i64> %bc
}
@@ -826,6 +1831,24 @@ define <vscale x 1 x i64> @bitcast_nxv4bf16_to_nxv1i64(<vscale x 4 x bfloat> %v)
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv1i64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ptrue p1.s
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 1 x i64>
ret <vscale x 1 x i64> %bc
}
@@ -846,6 +1869,18 @@ define <vscale x 4 x half> @bitcast_nxv8i8_to_nxv4f16(<vscale x 8 x i8> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i8_to_nxv4f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i8> %v to <vscale x 4 x half>
ret <vscale x 4 x half> %bc
}
@@ -854,6 +1889,18 @@ define <vscale x 4 x half> @bitcast_nxv4i16_to_nxv4f16(<vscale x 4 x i16> %v) #0
; CHECK-LABEL: bitcast_nxv4i16_to_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv4f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i16> %v to <vscale x 4 x half>
ret <vscale x 4 x half> %bc
}
@@ -870,6 +1917,18 @@ define <vscale x 4 x half> @bitcast_nxv2i32_to_nxv4f16(<vscale x 2 x i32> %v) #0
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv4f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i32> %v to <vscale x 4 x half>
ret <vscale x 4 x half> %bc
}
@@ -879,6 +1938,19 @@ define <vscale x 4 x half> @bitcast_nxv1i64_to_nxv4f16(<vscale x 1 x i64> %v) #0
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv4f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.s, z0.h
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i64> %v to <vscale x 4 x half>
ret <vscale x 4 x half> %bc
}
@@ -895,6 +1967,18 @@ define <vscale x 4 x half> @bitcast_nxv2f32_to_nxv4f16(<vscale x 2 x float> %v)
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv4f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x float> %v to <vscale x 4 x half>
ret <vscale x 4 x half> %bc
}
@@ -904,6 +1988,19 @@ define <vscale x 4 x half> @bitcast_nxv1f64_to_nxv4f16(<vscale x 1 x double> %v)
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv4f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.s, z0.h
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x double> %v to <vscale x 4 x half>
ret <vscale x 4 x half> %bc
}
@@ -912,6 +2009,17 @@ define <vscale x 4 x half> @bitcast_nxv4bf16_to_nxv4f16(<vscale x 4 x bfloat> %v
; CHECK-LABEL: bitcast_nxv4bf16_to_nxv4f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv4f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 4 x half>
ret <vscale x 4 x half> %bc
}
@@ -932,6 +2040,18 @@ define <vscale x 2 x float> @bitcast_nxv8i8_to_nxv2f32(<vscale x 8 x i8> %v) #0
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i8_to_nxv2f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i8> %v to <vscale x 2 x float>
ret <vscale x 2 x float> %bc
}
@@ -948,6 +2068,18 @@ define <vscale x 2 x float> @bitcast_nxv4i16_to_nxv2f32(<vscale x 4 x i16> %v) #
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv2f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i16> %v to <vscale x 2 x float>
ret <vscale x 2 x float> %bc
}
@@ -956,6 +2088,18 @@ define <vscale x 2 x float> @bitcast_nxv2i32_to_nxv2f32(<vscale x 2 x i32> %v) #
; CHECK-LABEL: bitcast_nxv2i32_to_nxv2f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv2f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i32> %v to <vscale x 2 x float>
ret <vscale x 2 x float> %bc
}
@@ -965,6 +2109,19 @@ define <vscale x 2 x float> @bitcast_nxv1i64_to_nxv2f32(<vscale x 1 x i64> %v) #
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv2f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.d, z0.s
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i64> %v to <vscale x 2 x float>
ret <vscale x 2 x float> %bc
}
@@ -981,6 +2138,18 @@ define <vscale x 2 x float> @bitcast_nxv4f16_to_nxv2f32(<vscale x 4 x half> %v)
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv2f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x half> %v to <vscale x 2 x float>
ret <vscale x 2 x float> %bc
}
@@ -990,6 +2159,19 @@ define <vscale x 2 x float> @bitcast_nxv1f64_to_nxv2f32(<vscale x 1 x double> %v
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv2f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.d, z0.s
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x double> %v to <vscale x 2 x float>
ret <vscale x 2 x float> %bc
}
@@ -1006,6 +2188,18 @@ define <vscale x 2 x float> @bitcast_nxv4bf16_to_nxv2f32(<vscale x 4 x bfloat> %
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv2f32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 2 x float>
ret <vscale x 2 x float> %bc
}
@@ -1019,6 +2213,19 @@ define <vscale x 1 x double> @bitcast_nxv8i8_to_nxv1f64(<vscale x 8 x i8> %v) #0
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i8_to_nxv1f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i8> %v to <vscale x 1 x double>
ret <vscale x 1 x double> %bc
}
@@ -1028,6 +2235,19 @@ define <vscale x 1 x double> @bitcast_nxv4i16_to_nxv1f64(<vscale x 4 x i16> %v)
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv1f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i16> %v to <vscale x 1 x double>
ret <vscale x 1 x double> %bc
}
@@ -1037,6 +2257,19 @@ define <vscale x 1 x double> @bitcast_nxv2i32_to_nxv1f64(<vscale x 2 x i32> %v)
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv1f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i32> %v to <vscale x 1 x double>
ret <vscale x 1 x double> %bc
}
@@ -1045,6 +2278,17 @@ define <vscale x 1 x double> @bitcast_nxv1i64_to_nxv1f64(<vscale x 1 x i64> %v)
; CHECK-LABEL: bitcast_nxv1i64_to_nxv1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv1f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i64> %v to <vscale x 1 x double>
ret <vscale x 1 x double> %bc
}
@@ -1054,6 +2298,24 @@ define <vscale x 1 x double> @bitcast_nxv4f16_to_nxv1f64(<vscale x 4 x half> %v)
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv1f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ptrue p1.s
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x half> %v to <vscale x 1 x double>
ret <vscale x 1 x double> %bc
}
@@ -1063,6 +2325,23 @@ define <vscale x 1 x double> @bitcast_nxv2f32_to_nxv1f64(<vscale x 2 x float> %v
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv1f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ptrue p1.d
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp]
+; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp, #2, mul vl]
+; CHECK_BE-NEXT: ld1d { z0.d }, p1/z, [sp, #2, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x float> %v to <vscale x 1 x double>
ret <vscale x 1 x double> %bc
}
@@ -1072,6 +2351,24 @@ define <vscale x 1 x double> @bitcast_nxv4bf16_to_nxv1f64(<vscale x 4 x bfloat>
; CHECK: // %bb.0:
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4bf16_to_nxv1f64:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-3
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ptrue p1.s
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #2, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #3
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x bfloat> %v to <vscale x 1 x double>
ret <vscale x 1 x double> %bc
}
@@ -1092,6 +2389,18 @@ define <vscale x 4 x bfloat> @bitcast_nxv8i8_to_nxv4bf16(<vscale x 8 x i8> %v) #
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv8i8_to_nxv4bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 8 x i8> %v to <vscale x 4 x bfloat>
ret <vscale x 4 x bfloat> %bc
}
@@ -1100,6 +2409,18 @@ define <vscale x 4 x bfloat> @bitcast_nxv4i16_to_nxv4bf16(<vscale x 4 x i16> %v)
; CHECK-LABEL: bitcast_nxv4i16_to_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i16_to_nxv4bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i16> %v to <vscale x 4 x bfloat>
ret <vscale x 4 x bfloat> %bc
}
@@ -1116,6 +2437,18 @@ define <vscale x 4 x bfloat> @bitcast_nxv2i32_to_nxv4bf16(<vscale x 2 x i32> %v)
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i32_to_nxv4bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i32> %v to <vscale x 4 x bfloat>
ret <vscale x 4 x bfloat> %bc
}
@@ -1125,6 +2458,19 @@ define <vscale x 4 x bfloat> @bitcast_nxv1i64_to_nxv4bf16(<vscale x 1 x i64> %v)
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i64_to_nxv4bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.s, z0.h
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i64> %v to <vscale x 4 x bfloat>
ret <vscale x 4 x bfloat> %bc
}
@@ -1133,6 +2479,17 @@ define <vscale x 4 x bfloat> @bitcast_nxv4f16_to_nxv4bf16(<vscale x 4 x half> %v
; CHECK-LABEL: bitcast_nxv4f16_to_nxv4bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4f16_to_nxv4bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x half> %v to <vscale x 4 x bfloat>
ret <vscale x 4 x bfloat> %bc
}
@@ -1149,6 +2506,18 @@ define <vscale x 4 x bfloat> @bitcast_nxv2f32_to_nxv4bf16(<vscale x 2 x float> %
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f32_to_nxv4bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x float> %v to <vscale x 4 x bfloat>
ret <vscale x 4 x bfloat> %bc
}
@@ -1158,6 +2527,19 @@ define <vscale x 4 x bfloat> @bitcast_nxv1f64_to_nxv4bf16(<vscale x 1 x double>
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1f64_to_nxv4bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.s, z0.h
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x double> %v to <vscale x 4 x bfloat>
ret <vscale x 4 x bfloat> %bc
}
@@ -1178,6 +2560,18 @@ define <vscale x 4 x i8> @bitcast_nxv2i16_to_nxv4i8(<vscale x 2 x i16> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i16_to_nxv4i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1b { z0.s }, p0/z, [sp, #3, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i16> %v to <vscale x 4 x i8>
ret <vscale x 4 x i8> %bc
}
@@ -1188,6 +2582,20 @@ define <vscale x 4 x i8> @bitcast_nxv1i32_to_nxv4i8(<vscale x 1 x i32> %v) #0 {
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i32_to_nxv4i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.h, z0.b
+; CHECK_BE-NEXT: uunpklo z0.s, z0.h
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i32> %v to <vscale x 4 x i8>
ret <vscale x 4 x i8> %bc
}
@@ -1204,6 +2612,18 @@ define <vscale x 4 x i8> @bitcast_nxv2f16_to_nxv4i8(<vscale x 2 x half> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f16_to_nxv4i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1b { z0.s }, p0/z, [sp, #3, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x half> %v to <vscale x 4 x i8>
ret <vscale x 4 x i8> %bc
}
@@ -1222,6 +2642,18 @@ define <vscale x 4 x i8> @bitcast_nxv2bf16_to_nxv4i8(<vscale x 2 x bfloat> %v) #
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2bf16_to_nxv4i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1b { z0.s }, p0/z, [sp, #3, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 4 x i8>
ret <vscale x 4 x i8> %bc
}
@@ -1242,6 +2674,18 @@ define <vscale x 2 x i16> @bitcast_nxv4i8_to_nxv2i16(<vscale x 4 x i8> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i8_to_nxv2i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1b { z0.s }, p0, [sp, #3, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i8> %v to <vscale x 2 x i16>
ret <vscale x 2 x i16> %bc
}
@@ -1252,6 +2696,20 @@ define <vscale x 2 x i16> @bitcast_nxv1i32_to_nxv2i16(<vscale x 1 x i32> %v) #0
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i32_to_nxv2i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1w { z0.s }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.s, z0.h
+; CHECK_BE-NEXT: uunpklo z0.d, z0.s
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i32> %v to <vscale x 2 x i16>
ret <vscale x 2 x i16> %bc
}
@@ -1260,6 +2718,18 @@ define <vscale x 2 x i16> @bitcast_nxv2f16_to_nxv2i16(<vscale x 2 x half> %v) #0
; CHECK-LABEL: bitcast_nxv2f16_to_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f16_to_nxv2i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x half> %v to <vscale x 2 x i16>
ret <vscale x 2 x i16> %bc
}
@@ -1270,6 +2740,18 @@ define <vscale x 2 x i16> @bitcast_nxv2bf16_to_nxv2i16(<vscale x 2 x bfloat> %v)
; CHECK-LABEL: bitcast_nxv2bf16_to_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2bf16_to_nxv2i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 2 x i16>
ret <vscale x 2 x i16> %bc
}
@@ -1284,6 +2766,20 @@ define <vscale x 1 x i32> @bitcast_nxv4i8_to_nxv1i32(<vscale x 4 x i8> %v) #0 {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i8_to_nxv1i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i8> %v to <vscale x 1 x i32>
ret <vscale x 1 x i32> %bc
}
@@ -1294,6 +2790,20 @@ define <vscale x 1 x i32> @bitcast_nxv2i16_to_nxv1i32(<vscale x 2 x i16> %v) #0
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i16_to_nxv1i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i16> %v to <vscale x 1 x i32>
ret <vscale x 1 x i32> %bc
}
@@ -1310,6 +2820,21 @@ define <vscale x 1 x i32> @bitcast_nxv2f16_to_nxv1i32(<vscale x 2 x half> %v) #0
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f16_to_nxv1i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-2
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #2
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x half> %v to <vscale x 1 x i32>
ret <vscale x 1 x i32> %bc
}
@@ -1328,6 +2853,21 @@ define <vscale x 1 x i32> @bitcast_nxv2bf16_to_nxv1i32(<vscale x 2 x bfloat> %v)
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2bf16_to_nxv1i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-2
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: ld1w { z0.s }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #2
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 1 x i32>
ret <vscale x 1 x i32> %bc
}
@@ -1348,6 +2888,18 @@ define <vscale x 2 x half> @bitcast_nxv4i8_to_nxv2f16(<vscale x 4 x i8> %v) #0 {
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i8_to_nxv2f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1b { z0.s }, p0, [sp, #3, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i8> %v to <vscale x 2 x half>
ret <vscale x 2 x half> %bc
}
@@ -1356,6 +2908,18 @@ define <vscale x 2 x half> @bitcast_nxv2i16_to_nxv2f16(<vscale x 2 x i16> %v) #0
; CHECK-LABEL: bitcast_nxv2i16_to_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i16_to_nxv2f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i16> %v to <vscale x 2 x half>
ret <vscale x 2 x half> %bc
}
@@ -1367,6 +2931,17 @@ define <vscale x 2 x half> @bitcast_nxv2bf16_to_nxv2f16(<vscale x 2 x bfloat> %v
; CHECK-LABEL: bitcast_nxv2bf16_to_nxv2f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2bf16_to_nxv2f16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
+; CHECK_BE-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x bfloat> %v to <vscale x 2 x half>
ret <vscale x 2 x half> %bc
}
@@ -1397,6 +2972,18 @@ define <vscale x 2 x bfloat> @bitcast_nxv4i8_to_nxv2bf16(<vscale x 4 x i8> %v) #
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv4i8_to_nxv2bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: st1b { z0.s }, p0, [sp, #3, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 4 x i8> %v to <vscale x 2 x bfloat>
ret <vscale x 2 x bfloat> %bc
}
@@ -1405,6 +2992,18 @@ define <vscale x 2 x bfloat> @bitcast_nxv2i16_to_nxv2bf16(<vscale x 2 x i16> %v)
; CHECK-LABEL: bitcast_nxv2i16_to_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i16_to_nxv2bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i16> %v to <vscale x 2 x bfloat>
ret <vscale x 2 x bfloat> %bc
}
@@ -1415,6 +3014,17 @@ define <vscale x 2 x bfloat> @bitcast_nxv2f16_to_nxv2bf16(<vscale x 2 x half> %v
; CHECK-LABEL: bitcast_nxv2f16_to_nxv2bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2f16_to_nxv2bf16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
+; CHECK_BE-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x half> %v to <vscale x 2 x bfloat>
ret <vscale x 2 x bfloat> %bc
}
@@ -1432,6 +3042,21 @@ define <vscale x 2 x i8> @bitcast_nxv1i16_to_nxv2i8(<vscale x 1 x i16> %v) #0 {
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv1i16_to_nxv2i8:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: ld1b { z0.b }, p0/z, [sp]
+; CHECK_BE-NEXT: uunpklo z0.h, z0.b
+; CHECK_BE-NEXT: uunpklo z0.s, z0.h
+; CHECK_BE-NEXT: uunpklo z0.d, z0.s
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 1 x i16> %v to <vscale x 2 x i8>
ret <vscale x 2 x i8> %bc
}
@@ -1450,6 +3075,21 @@ define <vscale x 1 x i16> @bitcast_nxv2i8_to_nxv1i16(<vscale x 2 x i8> %v) #0 {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_nxv2i8_to_nxv1i16:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: uzp1 z0.s, z0.s, z0.s
+; CHECK_BE-NEXT: ptrue p0.b
+; CHECK_BE-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK_BE-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK_BE-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK_BE-NEXT: ptrue p0.h
+; CHECK_BE-NEXT: ld1h { z0.h }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%bc = bitcast <vscale x 2 x i8> %v to <vscale x 1 x i16>
ret <vscale x 1 x i16> %bc
}
@@ -1483,6 +3123,19 @@ define <vscale x 2 x i32> @bitcast_short_float_to_i32(<vscale x 2 x double> %v)
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_short_float_to_i32:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ptrue p1.s
+; CHECK_BE-NEXT: fcvt z0.s, p0/m, z0.d
+; CHECK_BE-NEXT: st1w { z0.s }, p1, [sp]
+; CHECK_BE-NEXT: ld1d { z0.d }, p0/z, [sp]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%trunc = fptrunc <vscale x 2 x double> %v to <vscale x 2 x float>
%bitcast = bitcast <vscale x 2 x float> %trunc to <vscale x 2 x i32>
ret <vscale x 2 x i32> %bitcast
@@ -1494,6 +3147,19 @@ define <vscale x 2 x double> @bitcast_short_i32_to_float(<vscale x 2 x i64> %v)
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_short_i32_to_float:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ptrue p1.s
+; CHECK_BE-NEXT: st1d { z0.d }, p0, [sp]
+; CHECK_BE-NEXT: ld1w { z0.s }, p1/z, [sp]
+; CHECK_BE-NEXT: fcvt z0.d, p0/m, z0.s
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%trunc = trunc <vscale x 2 x i64> %v to <vscale x 2 x i32>
%bitcast = bitcast <vscale x 2 x i32> %trunc to <vscale x 2 x float>
%extended = fpext <vscale x 2 x float> %bitcast to <vscale x 2 x double>
@@ -1513,6 +3179,19 @@ define <vscale x 2 x float> @bitcast_short_half_to_float(<vscale x 4 x half> %v)
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; CHECK_BE-LABEL: bitcast_short_half_to_float:
+; CHECK_BE: // %bb.0:
+; CHECK_BE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK_BE-NEXT: addvl sp, sp, #-1
+; CHECK_BE-NEXT: ptrue p0.s
+; CHECK_BE-NEXT: fadd z0.h, p0/m, z0.h, z0.h
+; CHECK_BE-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
+; CHECK_BE-NEXT: ptrue p0.d
+; CHECK_BE-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
+; CHECK_BE-NEXT: addvl sp, sp, #1
+; CHECK_BE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK_BE-NEXT: ret
%add = fadd <vscale x 4 x half> %v, %v
%bitcast = bitcast <vscale x 4 x half> %add to <vscale x 2 x float>
ret <vscale x 2 x float> %bitcast
More information about the llvm-commits
mailing list