[llvm] 0fb3d42 - [AArch64][GlobalISel] Refactor BITCAST Legalization (#80505)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 21 05:24:49 PST 2024
Author: chuongg3
Date: 2024-02-21T13:24:45Z
New Revision: 0fb3d4296f3a3ebe36661643155f4ee35a3167b7
URL: https://github.com/llvm/llvm-project/commit/0fb3d4296f3a3ebe36661643155f4ee35a3167b7
DIFF: https://github.com/llvm/llvm-project/commit/0fb3d4296f3a3ebe36661643155f4ee35a3167b7.diff
LOG: [AArch64][GlobalISel] Refactor BITCAST Legalization (#80505)
Ensure BITCAST is only legal for types with the same amount of bits.
Enable BITCAST to work with non-legal vector types as well.
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
llvm/test/CodeGen/AArch64/bitcast.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index f001d8a1672972..2beb9919418fc9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -338,6 +338,11 @@ class LegalizerHelper {
unsigned TypeIdx,
LLT NarrowTy);
+ // Fewer Elements for bitcast, ensuring that the size of the Src and Dst
+ // registers will be the same
+ LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy);
+
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 044cd3d2d426ec..30f12bf5cca586 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4677,11 +4677,44 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
case G_FPOWI:
return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
+ case G_BITCAST:
+ return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
default:
return UnableToLegalize;
}
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx,
+ LLT NarrowTy) {
+ assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
+ "Not a bitcast operation");
+
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+
+ unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
+ LLT SrcNarrowTy =
+ LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
+
+ // Split the Src and Dst Reg into smaller registers
+ SmallVector<Register> SrcVRegs, BitcastVRegs;
+ if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
+ return UnableToLegalize;
+
+ // Build new smaller bitcast instructions
+ // Not supporting Leftover types for now but will have to
+ for (unsigned i = 0; i < SrcVRegs.size(); i++)
+ BitcastVRegs.push_back(
+ MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
+
+ MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
@@ -5366,6 +5399,27 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_BITCAST: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
+ if (coefficient % DstTy.getNumElements() != 0)
+ return UnableToLegalize;
+
+ coefficient = coefficient / DstTy.getNumElements();
+
+ LLT NewTy = SrcTy.changeElementCount(
+ ElementCount::get(coefficient, MoreTy.isScalable()));
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, NewTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
default:
return UnableToLegalize;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 261078cd4bd7d0..60e046bc6cf407 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -740,12 +740,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Casts for 32 and 64-bit width type are just copies.
// Same for 128-bit width type, except they are on the FPR bank.
getActionDefinitionsBuilder(G_BITCAST)
- // FIXME: This is wrong since G_BITCAST is not allowed to change the
- // number of bits but it's what the previous code described and fixing
- // it breaks tests.
- .legalForCartesianProduct({s8, s16, s32, s64, s128, v16s8, v8s8, v4s8,
- v8s16, v4s16, v2s16, v4s32, v2s32, v2s64,
- v2p0});
+ // Keeping 32-bit instructions legal to prevent regression in some tests
+ .legalForCartesianProduct({s32, v2s16, v4s8})
+ .legalForCartesianProduct({s64, v8s8, v4s16, v2s32})
+ .legalForCartesianProduct({s128, v16s8, v8s16, v4s32, v2s64, v2p0})
+ .moreElementsToNextPow2(0)
+ .clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .lower();
getActionDefinitionsBuilder(G_VASTART).legalFor({p0});
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 381897b1835deb..d87704cf45d5d5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -127,8 +127,8 @@
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
# DEBUG-NEXT: G_BITCAST (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
-# DEBUG-NEXT: .. the first uncovered type index: 2, OK
-# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
# DEBUG-NEXT: G_FREEZE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index bac9b48a4087b0..a5551285f2788d 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -10,15 +10,6 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i64_v8i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i64_v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v8i32_v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v8i32_v16i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v8i64_v16i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v16i16_v4i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v16i16_v8i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v16i32_v8i64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v3i32_v6i16
define <4 x i16> @foo1(<2 x i32> %a) {
; CHECK-SD-LABEL: foo1:
@@ -74,9 +65,9 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
- %c = add <4 x i8> %a, %b
- %d = bitcast <4 x i8> %c to i32
- ret i32 %d
+ %c = add <4 x i8> %a, %b
+ %d = bitcast <4 x i8> %c to i32
+ ret i32 %d
}
define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
@@ -86,9 +77,9 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: zip1 v0.8b, v0.8b, v0.8b
; CHECK-NEXT: ret
- %c = add i32 %a, %b
- %d = bitcast i32 %c to <4 x i8>
- ret <4 x i8> %d
+ %c = add i32 %a, %b
+ %d = bitcast i32 %c to <4 x i8>
+ ret <4 x i8> %d
}
define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
@@ -104,9 +95,9 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
; CHECK-NEXT: ldr w0, [sp, #12]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
- %c = add <2 x i16> %a, %b
- %d = bitcast <2 x i16> %c to i32
- ret i32 %d
+ %c = add <2 x i16> %a, %b
+ %d = bitcast <2 x i16> %c to i32
+ ret i32 %d
}
define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
@@ -117,9 +108,9 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
- %c = add i32 %a, %b
- %d = bitcast i32 %c to <2 x i16>
- ret <2 x i16> %d
+ %c = add i32 %a, %b
+ %d = bitcast i32 %c to <2 x i16>
+ ret <2 x i16> %d
}
define i64 @bitcast_v8i8_i64(<8 x i8> %a, <8 x i8> %b){
@@ -128,9 +119,9 @@ define i64 @bitcast_v8i8_i64(<8 x i8> %a, <8 x i8> %b){
; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %c = add <8 x i8> %a, %b
- %d = bitcast <8 x i8> %c to i64
- ret i64 %d
+ %c = add <8 x i8> %a, %b
+ %d = bitcast <8 x i8> %c to i64
+ ret i64 %d
}
define <8 x i8> @bitcast_i64_v8i8(i64 %a, i64 %b){
@@ -139,9 +130,9 @@ define <8 x i8> @bitcast_i64_v8i8(i64 %a, i64 %b){
; CHECK-NEXT: add x8, x0, x1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
- %c = add i64 %a, %b
- %d = bitcast i64 %c to <8 x i8>
- ret <8 x i8> %d
+ %c = add i64 %a, %b
+ %d = bitcast i64 %c to <8 x i8>
+ ret <8 x i8> %d
}
define i64 @bitcast_v4i16_i64(<4 x i16> %a, <4 x i16> %b){
@@ -150,9 +141,9 @@ define i64 @bitcast_v4i16_i64(<4 x i16> %a, <4 x i16> %b){
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %c = add <4 x i16> %a, %b
- %d = bitcast <4 x i16> %c to i64
- ret i64 %d
+ %c = add <4 x i16> %a, %b
+ %d = bitcast <4 x i16> %c to i64
+ ret i64 %d
}
define <4 x i16> @bitcast_i64_v4i16(i64 %a, i64 %b){
@@ -161,9 +152,9 @@ define <4 x i16> @bitcast_i64_v4i16(i64 %a, i64 %b){
; CHECK-NEXT: add x8, x0, x1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
- %c = add i64 %a, %b
- %d = bitcast i64 %c to <4 x i16>
- ret <4 x i16> %d
+ %c = add i64 %a, %b
+ %d = bitcast i64 %c to <4 x i16>
+ ret <4 x i16> %d
}
define i64 @bitcast_v2i32_i64(<2 x i32> %a, <2 x i32> %b){
@@ -172,9 +163,9 @@ define i64 @bitcast_v2i32_i64(<2 x i32> %a, <2 x i32> %b){
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
- %c = add <2 x i32> %a, %b
- %d = bitcast <2 x i32> %c to i64
- ret i64 %d
+ %c = add <2 x i32> %a, %b
+ %d = bitcast <2 x i32> %c to i64
+ ret i64 %d
}
define <2 x i32> @bitcast_i64_v2i32(i64 %a, i64 %b){
@@ -183,9 +174,9 @@ define <2 x i32> @bitcast_i64_v2i32(i64 %a, i64 %b){
; CHECK-NEXT: add x8, x0, x1
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
- %c = add i64 %a, %b
- %d = bitcast i64 %c to <2 x i32>
- ret <2 x i32> %d
+ %c = add i64 %a, %b
+ %d = bitcast i64 %c to <2 x i32>
+ ret <2 x i32> %d
}
; ===== Legal Vector Types =====
@@ -195,9 +186,9 @@ define <4 x i16> @bitcast_v2i32_v4i16(<2 x i32> %a, <2 x i32> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
- %c = add <2 x i32> %a, %b
- %d = bitcast <2 x i32> %c to <4 x i16>
- ret <4 x i16> %d
+ %c = add <2 x i32> %a, %b
+ %d = bitcast <2 x i32> %c to <4 x i16>
+ ret <4 x i16> %d
}
define <4 x i32> @bitcast_v2i64_v4i32(<2 x i64> %a, <2 x i64> %b){
@@ -205,9 +196,9 @@ define <4 x i32> @bitcast_v2i64_v4i32(<2 x i64> %a, <2 x i64> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
- %c = add <2 x i64> %a, %b
- %d = bitcast <2 x i64> %c to <4 x i32>
- ret <4 x i32> %d
+ %c = add <2 x i64> %a, %b
+ %d = bitcast <2 x i64> %c to <4 x i32>
+ ret <4 x i32> %d
}
define <8 x i8> @bitcast_v2i32_v8i8(<2 x i32> %a, <2 x i32> %b){
@@ -215,9 +206,9 @@ define <8 x i8> @bitcast_v2i32_v8i8(<2 x i32> %a, <2 x i32> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-NEXT: ret
- %c = add <2 x i32> %a, %b
- %d = bitcast <2 x i32> %c to <8 x i8>
- ret <8 x i8> %d
+ %c = add <2 x i32> %a, %b
+ %d = bitcast <2 x i32> %c to <8 x i8>
+ ret <8 x i8> %d
}
define <8 x i16> @bitcast_v2i64_v8i16(<2 x i64> %a, <2 x i64> %b){
@@ -225,9 +216,9 @@ define <8 x i16> @bitcast_v2i64_v8i16(<2 x i64> %a, <2 x i64> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
- %c = add <2 x i64> %a, %b
- %d = bitcast <2 x i64> %c to <8 x i16>
- ret <8 x i16> %d
+ %c = add <2 x i64> %a, %b
+ %d = bitcast <2 x i64> %c to <8 x i16>
+ ret <8 x i16> %d
}
define <16 x i8> @bitcast_v2i64_v16i8(<2 x i64> %a, <2 x i64> %b){
@@ -235,9 +226,9 @@ define <16 x i8> @bitcast_v2i64_v16i8(<2 x i64> %a, <2 x i64> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: ret
- %c = add <2 x i64> %a, %b
- %d = bitcast <2 x i64> %c to <16 x i8>
- ret <16 x i8> %d
+ %c = add <2 x i64> %a, %b
+ %d = bitcast <2 x i64> %c to <16 x i8>
+ ret <16 x i8> %d
}
define <2 x i32> @bitcast_v4i16_v2i32(<4 x i16> %a, <4 x i16> %b){
@@ -245,9 +236,9 @@ define <2 x i32> @bitcast_v4i16_v2i32(<4 x i16> %a, <4 x i16> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
- %c = add <4 x i16> %a, %b
- %d = bitcast <4 x i16> %c to <2 x i32>
- ret <2 x i32> %d
+ %c = add <4 x i16> %a, %b
+ %d = bitcast <4 x i16> %c to <2 x i32>
+ ret <2 x i32> %d
}
define <2 x i64> @bitcast_v4i32_v2i64(<4 x i32> %a, <4 x i32> %b){
@@ -255,9 +246,9 @@ define <2 x i64> @bitcast_v4i32_v2i64(<4 x i32> %a, <4 x i32> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
- %c = add <4 x i32> %a, %b
- %d = bitcast <4 x i32> %c to <2 x i64>
- ret <2 x i64> %d
+ %c = add <4 x i32> %a, %b
+ %d = bitcast <4 x i32> %c to <2 x i64>
+ ret <2 x i64> %d
}
define <8 x i8> @bitcast_v4i16_v8i8(<4 x i16> %a, <4 x i16> %b){
@@ -265,9 +256,9 @@ define <8 x i8> @bitcast_v4i16_v8i8(<4 x i16> %a, <4 x i16> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
- %c = add <4 x i16> %a, %b
- %d = bitcast <4 x i16> %c to <8 x i8>
- ret <8 x i8> %d
+ %c = add <4 x i16> %a, %b
+ %d = bitcast <4 x i16> %c to <8 x i8>
+ ret <8 x i8> %d
}
define <8 x i16> @bitcast_v4i32_v8i16(<4 x i32> %a, <4 x i32> %b){
@@ -275,9 +266,9 @@ define <8 x i16> @bitcast_v4i32_v8i16(<4 x i32> %a, <4 x i32> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
- %c = add <4 x i32> %a, %b
- %d = bitcast <4 x i32> %c to <8 x i16>
- ret <8 x i16> %d
+ %c = add <4 x i32> %a, %b
+ %d = bitcast <4 x i32> %c to <8 x i16>
+ ret <8 x i16> %d
}
define <16 x i8> @bitcast_v4i32_v16i8(<4 x i32> %a, <4 x i32> %b){
@@ -285,9 +276,9 @@ define <16 x i8> @bitcast_v4i32_v16i8(<4 x i32> %a, <4 x i32> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
- %c = add <4 x i32> %a, %b
- %d = bitcast <4 x i32> %c to <16 x i8>
- ret <16 x i8> %d
+ %c = add <4 x i32> %a, %b
+ %d = bitcast <4 x i32> %c to <16 x i8>
+ ret <16 x i8> %d
}
define <2 x i32> @bitcast_v8i8_v2i32(<8 x i8> %a, <8 x i8> %b){
@@ -295,9 +286,9 @@ define <2 x i32> @bitcast_v8i8_v2i32(<8 x i8> %a, <8 x i8> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
- %c = add <8 x i8> %a, %b
- %d = bitcast <8 x i8> %c to <2 x i32>
- ret <2 x i32> %d
+ %c = add <8 x i8> %a, %b
+ %d = bitcast <8 x i8> %c to <2 x i32>
+ ret <2 x i32> %d
}
define <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a, <8 x i16> %b){
@@ -305,9 +296,9 @@ define <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a, <8 x i16> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
- %c = add <8 x i16> %a, %b
- %d = bitcast <8 x i16> %c to <2 x i64>
- ret <2 x i64> %d
+ %c = add <8 x i16> %a, %b
+ %d = bitcast <8 x i16> %c to <2 x i64>
+ ret <2 x i64> %d
}
define <4 x i16> @bitcast_v8i8_v4i16(<8 x i8> %a, <8 x i8> %b){
@@ -315,9 +306,9 @@ define <4 x i16> @bitcast_v8i8_v4i16(<8 x i8> %a, <8 x i8> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
- %c = add <8 x i8> %a, %b
- %d = bitcast <8 x i8> %c to <4 x i16>
- ret <4 x i16> %d
+ %c = add <8 x i8> %a, %b
+ %d = bitcast <8 x i8> %c to <4 x i16>
+ ret <4 x i16> %d
}
define <4 x i32> @bitcast_v8i16_v4i32(<8 x i16> %a, <8 x i16> %b){
@@ -325,9 +316,9 @@ define <4 x i32> @bitcast_v8i16_v4i32(<8 x i16> %a, <8 x i16> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
- %c = add <8 x i16> %a, %b
- %d = bitcast <8 x i16> %c to <4 x i32>
- ret <4 x i32> %d
+ %c = add <8 x i16> %a, %b
+ %d = bitcast <8 x i16> %c to <4 x i32>
+ ret <4 x i32> %d
}
define <16 x i8> @bitcast_v8i16_v16i8(<8 x i16> %a, <8 x i16> %b){
@@ -335,9 +326,9 @@ define <16 x i8> @bitcast_v8i16_v16i8(<8 x i16> %a, <8 x i16> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
- %c = add <8 x i16> %a, %b
- %d = bitcast <8 x i16> %c to <16 x i8>
- ret <16 x i8> %d
+ %c = add <8 x i16> %a, %b
+ %d = bitcast <8 x i16> %c to <16 x i8>
+ ret <16 x i8> %d
}
define <2 x i64> @bitcast_v16i8_v2i64(<16 x i8> %a, <16 x i8> %b){
@@ -345,9 +336,9 @@ define <2 x i64> @bitcast_v16i8_v2i64(<16 x i8> %a, <16 x i8> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %c = add <16 x i8> %a, %b
- %d = bitcast <16 x i8> %c to <2 x i64>
- ret <2 x i64> %d
+ %c = add <16 x i8> %a, %b
+ %d = bitcast <16 x i8> %c to <2 x i64>
+ ret <2 x i64> %d
}
define <4 x i32> @bitcast_v16i8_v4i32(<16 x i8> %a, <16 x i8> %b){
@@ -355,9 +346,9 @@ define <4 x i32> @bitcast_v16i8_v4i32(<16 x i8> %a, <16 x i8> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %c = add <16 x i8> %a, %b
- %d = bitcast <16 x i8> %c to <4 x i32>
- ret <4 x i32> %d
+ %c = add <16 x i8> %a, %b
+ %d = bitcast <16 x i8> %c to <4 x i32>
+ ret <4 x i32> %d
}
define <8 x i16> @bitcast_v16i8_v8i16(<16 x i8> %a, <16 x i8> %b){
@@ -365,9 +356,9 @@ define <8 x i16> @bitcast_v16i8_v8i16(<16 x i8> %a, <16 x i8> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %c = add <16 x i8> %a, %b
- %d = bitcast <16 x i8> %c to <8 x i16>
- ret <8 x i16> %d
+ %c = add <16 x i8> %a, %b
+ %d = bitcast <16 x i8> %c to <8 x i16>
+ ret <8 x i16> %d
}
; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
@@ -387,9 +378,9 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
- %c = add <2 x i16> %a, %b
- %d = bitcast <2 x i16> %c to <4 x i8>
- ret <4 x i8> %d
+ %c = add <2 x i16> %a, %b
+ %d = bitcast <2 x i16> %c to <4 x i8>
+ ret <4 x i8> %d
}
define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
@@ -407,101 +398,177 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
- %c = add <4 x i8> %a, %b
- %d = bitcast <4 x i8> %c to <2 x i16>
- ret <2 x i16> %d
+ %c = add <4 x i8> %a, %b
+ %d = bitcast <4 x i8> %c to <2 x i16>
+ ret <2 x i16> %d
}
define <8 x i32> @bitcast_v4i64_v8i32(<4 x i64> %a, <4 x i64> %b){
-; CHECK-LABEL: bitcast_v4i64_v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: ret
- %c = add <4 x i64> %a, %b
- %d = bitcast <4 x i64> %c to <8 x i32>
- ret <8 x i32> %d
+; CHECK-SD-LABEL: bitcast_v4i64_v8i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v4i64_v8i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: ret
+ %c = add <4 x i64> %a, %b
+ %d = bitcast <4 x i64> %c to <8 x i32>
+ ret <8 x i32> %d
}
define <16 x i16> @bitcast_v4i64_v16i16(<4 x i64> %a, <4 x i64> %b){
-; CHECK-LABEL: bitcast_v4i64_v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: add v0.2d, v0.2d, v2.2d
-; CHECK-NEXT: ret
- %c = add <4 x i64> %a, %b
- %d = bitcast <4 x i64> %c to <16 x i16>
- ret <16 x i16> %d
+; CHECK-SD-LABEL: bitcast_v4i64_v16i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v4i64_v16i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-GI-NEXT: ret
+ %c = add <4 x i64> %a, %b
+ %d = bitcast <4 x i64> %c to <16 x i16>
+ ret <16 x i16> %d
}
define <4 x i64> @bitcast_v8i32_v4i64(<8 x i32> %a, <8 x i32> %b){
-; CHECK-LABEL: bitcast_v8i32_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ret
- %c = add <8 x i32> %a, %b
- %d = bitcast <8 x i32> %c to <4 x i64>
- ret <4 x i64> %d
+; CHECK-SD-LABEL: bitcast_v8i32_v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v8i32_v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: fmov x9, d3
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: ret
+ %c = add <8 x i32> %a, %b
+ %d = bitcast <8 x i32> %c to <4 x i64>
+ ret <4 x i64> %d
}
define <16 x i16> @bitcast_v8i32_v16i16(<8 x i32> %a, <8 x i32> %b){
-; CHECK-LABEL: bitcast_v8i32_v16i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ret
- %c = add <8 x i32> %a, %b
- %d = bitcast <8 x i32> %c to <16 x i16>
- ret <16 x i16> %d
+; CHECK-SD-LABEL: bitcast_v8i32_v16i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-SD-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v8i32_v16i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT: ret
+ %c = add <8 x i32> %a, %b
+ %d = bitcast <8 x i32> %c to <16 x i16>
+ ret <16 x i16> %d
}
define <16 x i32> @bitcast_v8i64_v16i32(<8 x i64> %a, <8 x i64> %b){
-; CHECK-LABEL: bitcast_v8i64_v16i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v2.2d, v2.2d, v6.2d
-; CHECK-NEXT: add v0.2d, v0.2d, v4.2d
-; CHECK-NEXT: add v1.2d, v1.2d, v5.2d
-; CHECK-NEXT: add v3.2d, v3.2d, v7.2d
-; CHECK-NEXT: ret
- %c = add <8 x i64> %a, %b
- %d = bitcast <8 x i64> %c to <16 x i32>
- ret <16 x i32> %d
+; CHECK-SD-LABEL: bitcast_v8i64_v16i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v2.2d, v2.2d, v6.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v5.2d
+; CHECK-SD-NEXT: add v3.2d, v3.2d, v7.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v8i64_v16i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v4.2d
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v5.2d
+; CHECK-GI-NEXT: add v2.2d, v2.2d, v6.2d
+; CHECK-GI-NEXT: add v3.2d, v3.2d, v7.2d
+; CHECK-GI-NEXT: ret
+ %c = add <8 x i64> %a, %b
+ %d = bitcast <8 x i64> %c to <16 x i32>
+ ret <16 x i32> %d
}
define <4 x i64> @bitcast_v16i16_v4i64(<16 x i16> %a, <16 x i16> %b){
-; CHECK-LABEL: bitcast_v16i16_v4i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
- %c = add <16 x i16> %a, %b
- %d = bitcast <16 x i16> %c to <4 x i64>
- ret <4 x i64> %d
+; CHECK-SD-LABEL: bitcast_v16i16_v4i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v16i16_v4i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: mov d3, v1.d[1]
+; CHECK-GI-NEXT: fmov x8, d2
+; CHECK-GI-NEXT: fmov x9, d3
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: ret
+ %c = add <16 x i16> %a, %b
+ %d = bitcast <16 x i16> %c to <4 x i64>
+ ret <4 x i64> %d
}
define <8 x i32> @bitcast_v16i16_v8i32(<16 x i16> %a, <16 x i16> %b){
-; CHECK-LABEL: bitcast_v16i16_v8i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v1.8h, v1.8h, v3.8h
-; CHECK-NEXT: add v0.8h, v0.8h, v2.8h
-; CHECK-NEXT: ret
- %c = add <16 x i16> %a, %b
- %d = bitcast <16 x i16> %c to <8 x i32>
- ret <8 x i32> %d
+; CHECK-SD-LABEL: bitcast_v16i16_v8i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-SD-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v16i16_v8i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT: add v1.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT: ret
+ %c = add <16 x i16> %a, %b
+ %d = bitcast <16 x i16> %c to <8 x i32>
+ ret <8 x i32> %d
}
define <8 x i64> @bitcast_v16i32_v8i64(<16 x i32> %a, <16 x i32> %b){
-; CHECK-LABEL: bitcast_v16i32_v8i64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add v2.4s, v2.4s, v6.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
-; CHECK-NEXT: add v1.4s, v1.4s, v5.4s
-; CHECK-NEXT: add v3.4s, v3.4s, v7.4s
-; CHECK-NEXT: ret
- %c = add <16 x i32> %a, %b
- %d = bitcast <16 x i32> %c to <8 x i64>
- ret <8 x i64> %d
+; CHECK-SD-LABEL: bitcast_v16i32_v8i64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add v2.4s, v2.4s, v6.4s
+; CHECK-SD-NEXT: add v0.4s, v0.4s, v4.4s
+; CHECK-SD-NEXT: add v1.4s, v1.4s, v5.4s
+; CHECK-SD-NEXT: add v3.4s, v3.4s, v7.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v16i32_v8i64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT: add v2.4s, v2.4s, v6.4s
+; CHECK-GI-NEXT: add v3.4s, v3.4s, v7.4s
+; CHECK-GI-NEXT: mov d4, v0.d[1]
+; CHECK-GI-NEXT: mov d5, v1.d[1]
+; CHECK-GI-NEXT: mov d6, v2.d[1]
+; CHECK-GI-NEXT: mov d7, v3.d[1]
+; CHECK-GI-NEXT: fmov x8, d4
+; CHECK-GI-NEXT: fmov x9, d5
+; CHECK-GI-NEXT: fmov x10, d6
+; CHECK-GI-NEXT: fmov x11, d7
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: mov v2.d[1], x10
+; CHECK-GI-NEXT: mov v3.d[1], x11
+; CHECK-GI-NEXT: ret
+ %c = add <16 x i32> %a, %b
+ %d = bitcast <16 x i32> %c to <8 x i64>
+ ret <8 x i64> %d
}
; ===== Vectors with Non-Pow 2 Widths =====
@@ -511,7 +578,7 @@ define <6 x i16> @bitcast_v3i32_v6i16(<3 x i32> %a, <3 x i32> %b){
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ret
- %c = add <3 x i32> %a, %b
- %d = bitcast <3 x i32> %c to <6 x i16>
- ret <6 x i16> %d
+ %c = add <3 x i32> %a, %b
+ %d = bitcast <3 x i32> %c to <6 x i16>
+ ret <6 x i16> %d
}
More information about the llvm-commits
mailing list