[llvm] 3508f12 - [PowerPC][GISel] Add initial GlobalISel support for vector functions.
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 27 06:24:14 PDT 2023
Author: Amy Kwan
Date: 2023-03-27T08:23:05-05:00
New Revision: 3508f123353c0a145ee79cebb972f46fcb97bf1e
URL: https://github.com/llvm/llvm-project/commit/3508f123353c0a145ee79cebb972f46fcb97bf1e
DIFF: https://github.com/llvm/llvm-project/commit/3508f123353c0a145ee79cebb972f46fcb97bf1e.diff
LOG: [PowerPC][GISel] Add initial GlobalISel support for vector functions.
This patch adds the initial support for vector functions and register banks
within GlobalISel. With this patch, we are able to support simple functions that
return vectors, and also functions that perform simple operations.
This patch also:
- Legalizes vector types for G_AND, G_OR, G_XOR, G_ADD, G_SUB, G_BITCAST, G_FADD, G_FSUB
- Introduce initial support for bitcasting (that will need to be extended upon)
- Add various different test cases to for test vector support within GlobalISel
Differential Revision: https://reviews.llvm.org/D137785
Added:
llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-arithmentic-vec.ll
llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-logical-vec.ll
llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-ret-and-bitcast-vec.ll
Modified:
llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
llvm/test/CodeGen/PowerPC/GlobalISel/float-arithmetic.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
index 97f3e0963d7b5..24f02a3def06b 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
@@ -113,6 +113,10 @@ static const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank *RB) {
if (Ty.getSizeInBits() == 64)
return &PPC::F8RCRegClass;
}
+ if (RB->getID() == PPC::VECRegBankID) {
+ if (Ty.getSizeInBits() == 128)
+ return &PPC::VSRCRegClass;
+ }
if (RB->getID() == PPC::CRRegBankID) {
if (Ty.getSizeInBits() == 1)
return &PPC::CRBITRCRegClass;
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
index 2e0d12c4e633b..6b24c2a07f681 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
@@ -10,12 +10,33 @@
//===----------------------------------------------------------------------===//
#include "PPCLegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "ppc-legalinfo"
using namespace llvm;
using namespace LegalizeActions;
+using namespace LegalizeMutations;
+using namespace LegalityPredicates;
+
+static LegalityPredicate isRegisterType(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ unsigned TypeSize = QueryTy.getSizeInBits();
+
+ if (TypeSize % 32 == 1 || TypeSize > 128)
+ return false;
+
+ // Check if this is a legal PowerPC vector type.
+ if (QueryTy.isVector()) {
+ const int EltSize = QueryTy.getElementType().getSizeInBits();
+ return (EltSize == 8 || EltSize == 16 || EltSize == 32 || EltSize == 64);
+ }
+
+ return true;
+ };
+}
PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
using namespace TargetOpcode;
@@ -25,6 +46,10 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
+ const LLT V16S8 = LLT::fixed_vector(16, 8);
+ const LLT V8S16 = LLT::fixed_vector(8, 16);
+ const LLT V4S32 = LLT::fixed_vector(4, 32);
+ const LLT V2S64 = LLT::fixed_vector(2, 64);
getActionDefinitionsBuilder(G_IMPLICIT_DEF).legalFor({S64});
getActionDefinitionsBuilder(G_CONSTANT)
.legalFor({S32, S64})
@@ -33,14 +58,18 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
.legalForCartesianProduct({S64}, {S1, S8, S16, S32})
.clampScalar(0, S64, S64);
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
- .legalFor({S64})
- .clampScalar(0, S64, S64);
+ .legalFor({S64, V4S32})
+ .clampScalar(0, S64, S64)
+ .bitcastIf(typeIsNot(0, V4S32), changeTo(0, V4S32));
getActionDefinitionsBuilder({G_ADD, G_SUB})
- .legalFor({S64})
+ .legalFor({S64, V16S8, V8S16, V4S32, V2S64})
.clampScalar(0, S64, S64);
+ getActionDefinitionsBuilder(G_BITCAST)
+ .legalIf(all(isRegisterType(0), isRegisterType(1)))
+ .lower();
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV})
- .legalFor({S32, S64});
+ .legalFor({S32, S64, V4S32, V2S64});
getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({S1},
{S32, S64});
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
index 0ae44ecc52b22..25587b39b97fa 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -48,6 +48,14 @@ PPCRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case PPC::VSSRCRegClassID:
case PPC::F4RCRegClassID:
return getRegBank(PPC::FPRRegBankID);
+ case PPC::VSRCRegClassID:
+ case PPC::VRRCRegClassID:
+ case PPC::VRRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+ case PPC::VSRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+ case PPC::SPILLTOVSRRCRegClassID:
+ case PPC::VSLRCRegClassID:
+ case PPC::VSLRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+ return getRegBank(PPC::VECRegBankID);
case PPC::CRRCRegClassID:
case PPC::CRBITRCRegClassID:
return getRegBank(PPC::CRRegBankID);
@@ -90,11 +98,21 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Extension ops.
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
- case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_ANYEXT: {
assert(NumOperands <= 3 &&
"This code is for instructions with 3 or less operands");
- OperandsMapping = getValueMapping(PMI_GPR64);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = Ty.getSizeInBits();
+ switch (Size) {
+ case 128:
+ OperandsMapping = getValueMapping(PMI_VEC128);
+ break;
+ default:
+ OperandsMapping = getValueMapping(PMI_GPR64);
+ break;
+ }
break;
+ }
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
@@ -102,8 +120,19 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
Register SrcReg = MI.getOperand(1).getReg();
unsigned Size = getSizeInBits(SrcReg, MRI, TRI);
- assert((Size == 32 || Size == 64) && "Unsupported floating point types!\n");
- OperandsMapping = getValueMapping(Size == 32 ? PMI_FPR32 : PMI_FPR64);
+ assert((Size == 32 || Size == 64 || Size == 128) &&
+ "Unsupported floating point types!\n");
+ switch (Size) {
+ case 32:
+ OperandsMapping = getValueMapping(PMI_FPR32);
+ break;
+ case 64:
+ OperandsMapping = getValueMapping(PMI_FPR64);
+ break;
+ case 128:
+ OperandsMapping = getValueMapping(PMI_VEC128);
+ break;
+ }
break;
}
case TargetOpcode::G_FCMP: {
@@ -185,6 +214,23 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping = getOperandsMapping(OpdsMapping);
break;
}
+ case TargetOpcode::G_BITCAST: {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ bool DstIsGPR = !DstTy.isVector();
+ bool SrcIsGPR = !SrcTy.isVector();
+ // TODO: Currently, only vector and GPR register banks are handled.
+ // This needs to be extended to handle floating point register
+ // banks in the future.
+ const RegisterBank &DstRB = DstIsGPR ? PPC::GPRRegBank : PPC::VECRegBank;
+ const RegisterBank &SrcRB = SrcIsGPR ? PPC::GPRRegBank : PPC::VECRegBank;
+
+ return getInstructionMapping(
+ MappingID, Cost, getCopyMapping(DstRB.getID(), SrcRB.getID(), DstSize),
+ NumOperands);
+ }
default:
return getInvalidInstructionMapping();
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
index 885bdcb758165..c2a16c92ba85d 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
@@ -32,7 +32,8 @@ class PPCGenRegisterBankInfo : public RegisterBankInfo {
PMI_GPR64 = 2,
PMI_FPR32 = 3,
PMI_FPR64 = 4,
- PMI_CR = 5,
+ PMI_VEC128 = 5,
+ PMI_CR = 6,
PMI_Min = PMI_GPR32,
};
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
index 16f3bd8cf4a75..f2237d825cb4a 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
@@ -15,5 +15,7 @@
def GPRRegBank : RegisterBank<"GPR", [G8RC, G8RC_NOX0]>;
/// Floating point Registers
def FPRRegBank : RegisterBank<"FPR", [VSSRC]>;
+/// Vector Registers
+def VECRegBank : RegisterBank<"VEC", [VSRC]>;
/// Condition Registers
def CRRegBank : RegisterBank<"CR", [CRRC]>;
diff --git a/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def b/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
index f7e79ae71ebd1..eff4432206e10 100644
--- a/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
+++ b/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
@@ -22,7 +22,9 @@ RegisterBankInfo::PartialMapping PPCGenRegisterBankInfo::PartMappings[]{
{0, 32, PPC::FPRRegBank},
// 3: FPR 64-bit value
{0, 64, PPC::FPRRegBank},
- // 4: CR 4-bit value
+ // 4: 128-bit vector (VSX, Altivec)
+ {0, 128, PPC::VECRegBank},
+ // 5: CR 4-bit value
{0, 4, PPC::CRRegBank},
};
@@ -57,7 +59,11 @@ RegisterBankInfo::ValueMapping PPCGenRegisterBankInfo::ValMappings[]{
{&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 13: CR 4-bit value.
+ // 13: 128-bit vector.
+ {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+ {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+ {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+ // 16: CR 4-bit value.
{&PPCGenRegisterBankInfo::PartMappings[PMI_CR - PMI_Min], 1},
};
@@ -71,14 +77,36 @@ PPCGenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx) {
return &ValMappings[1 + 3 * ValMappingIdx];
}
+PPCGenRegisterBankInfo::PartialMappingIdx
+ PPCGenRegisterBankInfo::BankIDToCopyMapIdx[]{
+ PMI_None,
+ PMI_FPR64, // FPR
+ PMI_GPR64, // GPR
+ PMI_VEC128, // VEC
+};
+
// TODO Too simple!
const RegisterBankInfo::ValueMapping *
PPCGenRegisterBankInfo::getCopyMapping(unsigned DstBankID, unsigned SrcBankID,
unsigned Size) {
assert(DstBankID < PPC::NumRegisterBanks && "Invalid bank ID");
assert(SrcBankID < PPC::NumRegisterBanks && "Invalid bank ID");
+ PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
+ PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
+ assert(DstRBIdx != PMI_None && "No such mapping");
+ assert(SrcRBIdx != PMI_None && "No such mapping");
+
+ if (DstRBIdx == SrcRBIdx)
+ return getValueMapping(DstRBIdx);
- return &ValMappings[1];
+ assert(Size <= 128 && "Can currently handle types up to 128 bits (vectors)!");
+ // TODO: This function needs to be updated to handle all cases for
+ // GPRs, FPRs and vectors. It currently only handles bitcasting to
+ // the same type and has only mainly been tested for bitcasting
+ // between
diff erent vector types.
+ unsigned ValMappingIdx = DstRBIdx - PMI_Min;
+
+ return &ValMappings[1 + 3 * ValMappingIdx];
}
} // namespace llvm
diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/float-arithmetic.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/float-arithmetic.ll
index 185fa4deb6b5b..1ebde0e267146 100644
--- a/llvm/test/CodeGen/PowerPC/GlobalISel/float-arithmetic.ll
+++ b/llvm/test/CodeGen/PowerPC/GlobalISel/float-arithmetic.ll
@@ -51,3 +51,75 @@ entry:
%div = fdiv float %a, %b
ret float %div
}
+
+define <4 x float> @test_fadd_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fadd_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvaddsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fadd <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_fadd_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fadd_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvadddp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fadd <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
+define <4 x float> @test_fsub_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fsub_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvsubsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fsub <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_fsub_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fsub_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvsubdp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fsub <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
+define <4 x float> @test_fmul_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fmul_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmulsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fmul <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_fmul_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fmul_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvmuldp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fmul <2 x double> %a, %b
+ ret <2 x double> %res
+}
+
+define <4 x float> @test_fdiv_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_fdiv_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvdivsp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fdiv <4 x float> %a, %b
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_fdiv_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_fdiv_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvdivdp v2, v2, v3
+; CHECK-NEXT: blr
+ %res = fdiv <2 x double> %a, %b
+ ret <2 x double> %res
+}
diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-arithmentic-vec.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-arithmentic-vec.ll
new file mode 100644
index 0000000000000..e38de1b7259d9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-arithmentic-vec.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -global-isel -o - < %s | FileCheck %s
+
+define <16 x i8> @test_add_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_add_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddubm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = add <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_add_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_add_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vadduhm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = add <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_add_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_add_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vadduwm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = add <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_add_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_add_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vaddudm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = add <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
+
+define <16 x i8> @test_sub_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_sub_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsububm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = sub <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_sub_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_sub_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubuhm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = sub <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_sub_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_sub_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubuwm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = sub <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_sub_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_sub_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsubudm v2, v2, v3
+; CHECK-NEXT: blr
+ %res = sub <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-logical-vec.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-logical-vec.ll
new file mode 100644
index 0000000000000..07cb57099dc49
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-logical-vec.ll
@@ -0,0 +1,174 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -global-isel -o - < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -mattr=-vsx -global-isel -o - < %s | \
+; RUN: FileCheck %s --check-prefix=NO-VSX
+
+define <16 x i8> @test_and_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_and_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxland v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_and_v16i8:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vand v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = and <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_or_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_or_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_or_v16i8:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = or <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_xor_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_xor_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlxor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_xor_v16i8:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vxor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = xor <16 x i8> %a, %b
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_and_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_and_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxland v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_and_v8i16:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vand v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = and <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_or_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_or_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_or_v8i16:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = or <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_xor_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_xor_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlxor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_xor_v8i16:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vxor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = xor <8 x i16> %a, %b
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_and_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_and_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxland v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_and_v4i32:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vand v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = and <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_or_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_or_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_or_v4i32:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = or <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_xor_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_xor_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlxor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_xor_v4i32:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vxor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = xor <4 x i32> %a, %b
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_and_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_and_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxland v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_and_v2i64:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vand v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = and <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_or_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_or_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_or_v2i64:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = or <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_xor_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_xor_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xxlxor v2, v2, v3
+; CHECK-NEXT: blr
+;
+; NO-VSX-LABEL: test_xor_v2i64:
+; NO-VSX: # %bb.0:
+; NO-VSX-NEXT: vxor v2, v2, v3
+; NO-VSX-NEXT: blr
+ %res = xor <2 x i64> %a, %b
+ ret <2 x i64> %res
+}
diff --git a/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-ret-and-bitcast-vec.ll b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-ret-and-bitcast-vec.ll
new file mode 100644
index 0000000000000..b0dd3522cf34f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/GlobalISel/ppc-isel-ret-and-bitcast-vec.ll
@@ -0,0 +1,336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -global-isel -o - < %s | FileCheck %s
+
+; Test returning vectors in functions
+define <16 x i8> @test_ret_v16i8(<16 x i8> %a){
+; CHECK-LABEL: test_ret_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <16 x i8> %a
+}
+
+define <8 x i16> @test_ret_v8i16(<8 x i16> %a){
+; CHECK-LABEL: test_ret_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <8 x i16> %a
+}
+
+define <4 x i32> @test_ret_v4i32(<4 x i32> %a){
+; CHECK-LABEL: test_ret_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <4 x i32> %a
+}
+
+define <2 x i64> @test_ret_v2i64(<2 x i64> %a){
+; CHECK-LABEL: test_ret_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <2 x i64> %a
+}
+
+define <4 x float> @test_ret_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_ret_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <4 x float> %a
+}
+
+define <2 x double> @test_ret_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_ret_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ ret <2 x double> %a
+}
+
+; Test simple bitcasting of vectors
+define <16 x i8> @test_bitcast_v16i8_v16i8(<16 x i8> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v8i16(<8 x i16> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v4i32(<4 x i32> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v2i64(<2 x i64> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_bitcast_v16i8_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v16i8_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <16 x i8>
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v8i16_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v8i16_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_bitcast_v8i16_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v8i16_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <8 x i16>
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v4i32_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v4i32_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_bitcast_v4i32_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v4i32_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <4 x i32>
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v2i64_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v2i64_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_bitcast_v2i64_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v2i64_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v4f32_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v4f32_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_bitcast_v4f32_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v4f32_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <4 x float>
+ ret <4 x float> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <16 x i8> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v8i16(<8 x i16> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <8 x i16> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x i32> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v2i64(<2 x i64> %a) {
+; CHECK-LABEL: test_bitcast_v2f64_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x i64> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v4f32(<4 x float> %a){
+; CHECK-LABEL: test_bitcast_v2f64_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <4 x float> %a to <2 x double>
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_bitcast_v2f64_v2f64(<2 x double> %a){
+; CHECK-LABEL: test_bitcast_v2f64_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: blr
+ %res = bitcast <2 x double> %a to <2 x double>
+ ret <2 x double> %res
+}
+
More information about the llvm-commits
mailing list