[llvm] 43b2df0 - [LegalizeTypes][AArch64] Use scalar_to_vector to eliminate bitcast
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 14 08:34:37 PDT 2023
Author: zhongyunde
Date: 2023-06-14T23:33:02+08:00
New Revision: 43b2df03e842f109121ff60e6b62097b5a6c41ea
URL: https://github.com/llvm/llvm-project/commit/43b2df03e842f109121ff60e6b62097b5a6c41ea
DIFF: https://github.com/llvm/llvm-project/commit/43b2df03e842f109121ff60e6b62097b5a6c41ea.diff
LOG: [LegalizeTypes][AArch64] Use scalar_to_vector to eliminate bitcast
```
Legalize t3: v2i16 = bitcast i32
with (v2i16 extract_subvector (v4i16 bitcast (v2i32 scalar_to_vector (i32 in))), 0)
```
Fix https://github.com/llvm/llvm-project/issues/61638
NOTE: Don't touch getPreferredVectorAction like X86 as this will touch
too many test cases.
Reviewed By: dmgreen, paulwalker-arm, efriedma
Differential Revision: https://reviews.llvm.org/D147678
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
llvm/test/CodeGen/AArch64/neon-bitcast.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8bf75e4aee5f0..41b8e991e75ab 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1228,6 +1228,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i8, Custom);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v2i8, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v2i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v4i8, Custom);
+
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
@@ -23019,6 +23023,23 @@ static void replaceBoolVectorBitcast(SDNode *N,
Results.push_back(DAG.getZExtOrTrunc(VectorBits, DL, VT));
}
+static void CustomNonLegalBITCASTResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG, EVT ExtendVT,
+ EVT CastVT) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Use SCALAR_TO_VECTOR for lane zero
+ SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtendVT, Op);
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, DL, CastVT, Vec);
+ SDValue IdxZero = DAG.getVectorIdxConstant(0, DL);
+ Results.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, CastVal, IdxZero));
+ return;
+}
+
void AArch64TargetLowering::ReplaceBITCASTResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDLoc DL(N);
@@ -23026,6 +23047,21 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
EVT VT = N->getValueType(0);
EVT SrcVT = Op.getValueType();
+ if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
+ CustomNonLegalBITCASTResults(N, Results, DAG, MVT::v2i32, MVT::v4i16);
+ return;
+ }
+
+ if (VT == MVT::v4i8 && SrcVT == MVT::i32) {
+ CustomNonLegalBITCASTResults(N, Results, DAG, MVT::v2i32, MVT::v8i8);
+ return;
+ }
+
+ if (VT == MVT::v2i8 && SrcVT == MVT::i16) {
+ CustomNonLegalBITCASTResults(N, Results, DAG, MVT::v4i16, MVT::v8i8);
+ return;
+ }
+
if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
"Expected fp->int bitcast!");
diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
index bc95cfd7d28d9..804c1e7cfc363 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
@@ -446,25 +446,16 @@ define <4 x i32> @anyext_v4i32(ptr %a, ptr %b) {
define <4 x i8> @bitcast(i32 %0) {
; CHECK-LE-LABEL: bitcast:
; CHECK-LE: // %bb.0:
-; CHECK-LE-NEXT: sub sp, sp, #16
-; CHECK-LE-NEXT: .cfi_def_cfa_offset 16
-; CHECK-LE-NEXT: str w0, [sp, #12]
-; CHECK-LE-NEXT: ldr s0, [sp, #12]
-; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-LE-NEXT: add sp, sp, #16
+; CHECK-LE-NEXT: fmov s0, w0
+; CHECK-LE-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: bitcast:
; CHECK-BE: // %bb.0:
-; CHECK-BE-NEXT: sub sp, sp, #16
-; CHECK-BE-NEXT: .cfi_def_cfa_offset 16
-; CHECK-BE-NEXT: str w0, [sp, #12]
-; CHECK-BE-NEXT: ldr s0, [sp, #12]
+; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
-; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
-; CHECK-BE-NEXT: add sp, sp, #16
+; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
+; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
; CHECK-BE-NEXT: ret
%2 = bitcast i32 %0 to <4 x i8>
ret <4 x i8> %2
diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
index 2b060f436c79a..bfd59f3d813c8 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
@@ -514,3 +514,66 @@ define <16 x i8> @test_v2f64_to_v16i8(<2 x double> %in) nounwind{
ret <16 x i8> %val
}
+define <2 x i16> @bitcast_i32_to_v2i16(i32 %word) {
+; CHECK-LE-LABEL: bitcast_i32_to_v2i16:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: fmov s0, w0
+; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: bitcast_i32_to_v2i16:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: fmov s0, w0
+; CHECK-BE-NEXT: rev32 v0.4h, v0.4h
+; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-BE-NEXT: rev64 v0.2s, v0.2s
+; CHECK-BE-NEXT: ret
+ %ret = bitcast i32 %word to <2 x i16>
+ ret <2 x i16> %ret
+}
+
+define <4 x i8> @bitcast_i32_to_v4i8(i32 %word) {
+; CHECK-LE-LABEL: bitcast_i32_to_v4i8:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: fmov s0, w0
+; CHECK-LE-NEXT: zip1 v0.8b, v0.8b, v0.8b
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: bitcast_i32_to_v4i8:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: fmov s0, w0
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: zip1 v0.8b, v0.8b, v0.8b
+; CHECK-BE-NEXT: rev64 v0.8b, v0.8b
+; CHECK-BE-NEXT: ret
+ %ret = bitcast i32 %word to <4 x i8>
+ ret <4 x i8> %ret
+}
+
+; TODO: Eliminate redundant moving back and forth between gpr and vectors
+define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) {
+; CHECK-LE-LABEL: bitcast_i16_to_v2i8:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: fmov s0, w0
+; CHECK-LE-NEXT: umov w8, v0.b[0]
+; CHECK-LE-NEXT: umov w9, v0.b[1]
+; CHECK-LE-NEXT: fmov s0, w8
+; CHECK-LE-NEXT: mov v0.s[1], w9
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: bitcast_i16_to_v2i8:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: fmov s0, w0
+; CHECK-BE-NEXT: rev16 v0.16b, v0.16b
+; CHECK-BE-NEXT: umov w8, v0.b[0]
+; CHECK-BE-NEXT: umov w9, v0.b[1]
+; CHECK-BE-NEXT: fmov s0, w8
+; CHECK-BE-NEXT: mov v0.s[1], w9
+; CHECK-BE-NEXT: rev64 v0.2s, v0.2s
+; CHECK-BE-NEXT: ret
+ %ret = bitcast i16 %word to <2 x i8>
+ ret <2 x i8> %ret
+}
+
More information about the llvm-commits
mailing list