[llvm] 7023cef - [AArch64][Global ISel] Add sext/zext of vector extract improvements
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 7 13:18:02 PDT 2021
Author: Irina Dobrescu
Date: 2021-09-07T21:17:51+01:00
New Revision: 7023cefe61913fefd82eb7ee463a2c8d01faf1ae
URL: https://github.com/llvm/llvm-project/commit/7023cefe61913fefd82eb7ee463a2c8d01faf1ae
DIFF: https://github.com/llvm/llvm-project/commit/7023cefe61913fefd82eb7ee463a2c8d01faf1ae.diff
LOG: [AArch64][Global ISel] Add sext/zext of vector extract improvements
This patch adds improvements for sext/zext of a vector extract in Global
ISel.
For example, this piece of code:
define i64 @si64(<4 x i32> %0, i32 %1) {
%3 = extractelement <4 x i32> %0, i64 1
%s = sext i32 %3 to i64
ret i64 %s
}
Used to have this lowering:
si64:
mov s0, v0.s[1]
fmov w8, s0
sxtw x0, w8
ret
Whereas this patch makes it lower to this:
si64:
smov x0, v0.h[0]
ret
Differential Revision: https://reviews.llvm.org/D108137
Added:
llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir
llvm/test/CodeGen/AArch64/extract-sext-zext.ll
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 62caf28a3f328..0904a7ecce761 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -193,6 +193,7 @@ class AArch64InstructionSelector : public InstructionSelector {
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
@@ -2166,6 +2167,12 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
I.eraseFromParent();
return true;
}
+ case TargetOpcode::G_SEXT:
+ // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
+ // over a normal extend.
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+ return false;
case TargetOpcode::G_BR:
return false;
case TargetOpcode::G_SHL:
@@ -3054,6 +3061,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
case TargetOpcode::G_ANYEXT: {
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
@@ -3100,6 +3110,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_SEXT_INREG:
case TargetOpcode::G_SEXT: {
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+
unsigned Opcode = I.getOpcode();
const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
const Register DefReg = I.getOperand(0).getReg();
@@ -4851,6 +4864,68 @@ MachineInstr *AArch64InstructionSelector::emitLaneInsert(
return InsElt;
}
+bool AArch64InstructionSelector::selectUSMovFromExtend(
+ MachineInstr &MI, MachineRegisterInfo &MRI) {
+ if (MI.getOpcode() != TargetOpcode::G_SEXT &&
+ MI.getOpcode() != TargetOpcode::G_ZEXT &&
+ MI.getOpcode() != TargetOpcode::G_ANYEXT)
+ return false;
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
+ const Register DefReg = MI.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DefReg);
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
+ MI.getOperand(1).getReg(), MRI);
+ int64_t Lane;
+ if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
+ return false;
+ Register Src0 = Extract->getOperand(1).getReg();
+
+ const LLT &VecTy = MRI.getType(Src0);
+
+ if (VecTy.getSizeInBits() != 128) {
+ const MachineInstr *ScalarToVector = emitScalarToVector(
+ VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
+ assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
+ Src0 = ScalarToVector->getOperand(0).getReg();
+ }
+
+ unsigned Opcode;
+ if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
+ Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
+ else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
+ Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
+ else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
+ Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
+ else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
+ Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
+ else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
+ Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
+ else
+ llvm_unreachable("Unexpected type combo for S/UMov!");
+
+ // We may need to generate one of these, depending on the type and sign of the
+ // input:
+ // DstReg = SMOV Src0, Lane;
+ // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
+ MachineInstr *ExtI = nullptr;
+ if (DstSize == 64 && !IsSigned) {
+ Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
+ ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
+ .addImm(0)
+ .addUse(NewReg)
+ .addImm(AArch64::sub_32);
+ RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
+ } else
+ ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
+
+ constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 0e5eb0a721b29..8fabf29dc23d1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -576,7 +576,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT &VecTy = Query.Types[1];
return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
- VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
+ VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
+ VecTy == v2p0;
})
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir
new file mode 100644
index 0000000000000..b2b8fe8c817ff
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir
@@ -0,0 +1,303 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: si64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $w0
+
+ ; CHECK-LABEL: name: si64
+ ; CHECK: liveins: $q0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[SMOVvi32to64_:%[0-9]+]]:gpr64 = SMOVvi32to64 [[COPY]], 1
+ ; CHECK: $x0 = COPY [[SMOVvi32to64_]]
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:fpr(<4 x s32>) = COPY $q0
+ %3:gpr(s64) = G_CONSTANT i64 1
+ %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %3(s64)
+ %5:gpr(s32) = COPY %2(s32)
+ %4:gpr(s64) = G_SEXT %5(s32)
+ $x0 = COPY %4(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: si64_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $w0
+
+ ; CHECK-LABEL: name: si64_2
+ ; CHECK: liveins: $d0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[SMOVvi32to64_:%[0-9]+]]:gpr64 = SMOVvi32to64 [[INSERT_SUBREG]], 1
+ ; CHECK: $x0 = COPY [[SMOVvi32to64_]]
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:fpr(<2 x s32>) = COPY $d0
+ %3:gpr(s64) = G_CONSTANT i64 1
+ %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64)
+ %5:gpr(s32) = COPY %2(s32)
+ %4:gpr(s64) = G_SEXT %5(s32)
+ $x0 = COPY %4(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: zi64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $w0
+
+ ; CHECK-LABEL: name: zi64
+ ; CHECK: liveins: $q0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[UMOVvi32_:%[0-9]+]]:gpr32 = UMOVvi32 [[COPY]], 1
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UMOVvi32_]], %subreg.sub_32
+ ; CHECK: $x0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:fpr(<4 x s32>) = COPY $q0
+ %3:gpr(s64) = G_CONSTANT i64 1
+ %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %3(s64)
+ %5:gpr(s32) = COPY %2(s32)
+ %4:gpr(s64) = G_ZEXT %5(s32)
+ $x0 = COPY %4(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: zi64_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $w0
+
+ ; CHECK-LABEL: name: zi64_2
+ ; CHECK: liveins: $d0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[UMOVvi32_:%[0-9]+]]:gpr32 = UMOVvi32 [[INSERT_SUBREG]], 1
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UMOVvi32_]], %subreg.sub_32
+ ; CHECK: $x0 = COPY [[SUBREG_TO_REG]]
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:fpr(<2 x s32>) = COPY $d0
+ %3:gpr(s64) = G_CONSTANT i64 1
+ %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64)
+ %5:gpr(s32) = COPY %2(s32)
+ %4:gpr(s64) = G_ZEXT %5(s32)
+ $x0 = COPY %4(s64)
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: si32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $w0
+
+ ; CHECK-LABEL: name: si32
+ ; CHECK: liveins: $q0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[SMOVvi16to32_:%[0-9]+]]:gpr32 = SMOVvi16to32 [[COPY]], 1
+ ; CHECK: $w0 = COPY [[SMOVvi16to32_]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:fpr(<8 x s16>) = COPY $q0
+ %4:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<8 x s16>), %4(s64)
+ %6:gpr(s16) = COPY %3(s16)
+ %5:gpr(s32) = G_SEXT %6(s16)
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: zi32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $w0
+
+ ; CHECK-LABEL: name: zi32
+ ; CHECK: liveins: $q0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[COPY]], 1
+ ; CHECK: $w0 = COPY [[UMOVvi16_]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:fpr(<8 x s16>) = COPY $q0
+ %4:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<8 x s16>), %4(s64)
+ %6:gpr(s16) = COPY %3(s16)
+ %5:gpr(s32) = G_ZEXT %6(s16)
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: si32_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $w0
+
+ ; CHECK-LABEL: name: si32_2
+ ; CHECK: liveins: $d0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[SMOVvi16to32_:%[0-9]+]]:gpr32 = SMOVvi16to32 [[INSERT_SUBREG]], 1
+ ; CHECK: $w0 = COPY [[SMOVvi16to32_]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:fpr(<4 x s16>) = COPY $d0
+ %4:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %4(s64)
+ %6:gpr(s16) = COPY %3(s16)
+ %5:gpr(s32) = G_SEXT %6(s16)
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: zi32_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $w0
+
+ ; CHECK-LABEL: name: zi32_2
+ ; CHECK: liveins: $d0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[INSERT_SUBREG]], 1
+ ; CHECK: $w0 = COPY [[UMOVvi16_]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:fpr(<4 x s16>) = COPY $d0
+ %4:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %4(s64)
+ %6:gpr(s16) = COPY %3(s16)
+ %5:gpr(s32) = G_ZEXT %6(s16)
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: si16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $w0
+
+ ; CHECK-LABEL: name: si16
+ ; CHECK: liveins: $q0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[SMOVvi8to32_:%[0-9]+]]:gpr32 = SMOVvi8to32 [[COPY]], 1
+ ; CHECK: $w0 = COPY [[SMOVvi8to32_]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:fpr(<16 x s8>) = COPY $q0
+ %4:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<16 x s8>), %4(s64)
+ %7:gpr(s8) = COPY %3(s8)
+ %6:gpr(s32) = G_SEXT %7(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: zi16
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $w0
+
+ ; CHECK-LABEL: name: zi16
+ ; CHECK: liveins: $q0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+ ; CHECK: [[UMOVvi8_:%[0-9]+]]:gpr32 = UMOVvi8 [[COPY]], 1
+ ; CHECK: $w0 = COPY [[UMOVvi8_]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:fpr(<16 x s8>) = COPY $q0
+ %4:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<16 x s8>), %4(s64)
+ %7:gpr(s8) = COPY %3(s8)
+ %6:gpr(s32) = G_ZEXT %7(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: si16_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $w0
+
+ ; CHECK-LABEL: name: si16_2
+ ; CHECK: liveins: $d0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[SMOVvi8to32_:%[0-9]+]]:gpr32 = SMOVvi8to32 [[INSERT_SUBREG]], 1
+ ; CHECK: $w0 = COPY [[SMOVvi8to32_]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:fpr(<8 x s8>) = COPY $d0
+ %4:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<8 x s8>), %4(s64)
+ %7:gpr(s8) = COPY %3(s8)
+ %6:gpr(s32) = G_SEXT %7(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: zi16_2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $w0
+
+ ; CHECK-LABEL: name: zi16_2
+ ; CHECK: liveins: $d0, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[UMOVvi8_:%[0-9]+]]:gpr32 = UMOVvi8 [[INSERT_SUBREG]], 1
+ ; CHECK: $w0 = COPY [[UMOVvi8_]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:fpr(<8 x s8>) = COPY $d0
+ %4:gpr(s64) = G_CONSTANT i64 1
+ %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<8 x s8>), %4(s64)
+ %7:gpr(s8) = COPY %3(s8)
+ %6:gpr(s32) = G_ZEXT %7(s8)
+ $w0 = COPY %6(s32)
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir
index e157c7339daac..4c5422e1f214b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt.mir
@@ -225,10 +225,8 @@ body: |
; CHECK-LABEL: name: v16s8
; CHECK: liveins: $q0
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
- ; CHECK: [[COPY1:%[0-9]+]]:fpr8 = COPY [[COPY]].bsub
- ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY1]], %subreg.bsub
- ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
- ; CHECK: $w0 = COPY [[COPY2]]
+ ; CHECK: [[UMOVvi8_:%[0-9]+]]:gpr32 = UMOVvi8 [[COPY]], 0
+ ; CHECK: $w0 = COPY [[UMOVvi8_]]
; CHECK: RET_ReallyLR implicit $w0
%0:fpr(<16 x s8>) = COPY $q0
%2:gpr(s64) = G_CONSTANT i64 0
@@ -238,6 +236,35 @@ body: |
$w0 = COPY %3(s32)
RET_ReallyLR implicit $w0
+...
+---
+name: v8s8
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$q0' }
+body: |
+ bb.1:
+ liveins: $d0
+
+ ; CHECK-LABEL: name: v8s8
+ ; CHECK: liveins: $d0
+ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
+ ; CHECK: [[UMOVvi8_:%[0-9]+]]:gpr32 = UMOVvi8 [[INSERT_SUBREG]], 0
+ ; CHECK: $w0 = COPY [[UMOVvi8_]]
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:fpr(<8 x s8>) = COPY $d0
+ %2:gpr(s64) = G_CONSTANT i64 0
+ %1:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<8 x s8>), %2(s64)
+ %4:gpr(s8) = COPY %1(s8)
+ %3:gpr(s32) = G_ANYEXT %4(s8)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR implicit $w0
+
...
---
name: v2p0
diff --git a/llvm/test/CodeGen/AArch64/extract-sext-zext.ll b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll
new file mode 100644
index 0000000000000..2cbd690c3f685
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll
@@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-ISEL
+; RUN: llc -mtriple=aarch64-eabi -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GLOBAL
+
+define i64 @extract_v2i64(<2 x i64> %x, i32 %y) {
+; CHECK-ISEL-LABEL: extract_v2i64:
+; CHECK-ISEL: // %bb.0:
+; CHECK-ISEL-NEXT: mov x0, v0.d[1]
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-GLOBAL-LABEL: extract_v2i64:
+; CHECK-GLOBAL: // %bb.0:
+; CHECK-GLOBAL-NEXT: mov d0, v0.d[1]
+; CHECK-GLOBAL-NEXT: fmov x0, d0
+; CHECK-GLOBAL-NEXT: ret
+ %ext = extractelement <2 x i64> %x, i32 1
+ ret i64 %ext
+}
+
+define i64 @extract_v1i64(<1 x i64> %x, i32 %y) {
+; CHECK-ISEL-LABEL: extract_v1i64:
+; CHECK-ISEL: // %bb.0:
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-GLOBAL-LABEL: extract_v1i64:
+; CHECK-GLOBAL: // %bb.0:
+; CHECK-GLOBAL-NEXT: fmov x0, d0
+; CHECK-GLOBAL-NEXT: ret
+ %ext = extractelement <1 x i64> %x, i32 1
+ ret i64 %ext
+}
+
+define i32 @extract_v4i32(<4 x i32> %x, i32 %y) {
+; CHECK-ISEL-LABEL: extract_v4i32:
+; CHECK-ISEL: // %bb.0:
+; CHECK-ISEL-NEXT: mov w0, v0.s[1]
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-GLOBAL-LABEL: extract_v4i32:
+; CHECK-GLOBAL: // %bb.0:
+; CHECK-GLOBAL-NEXT: mov s0, v0.s[1]
+; CHECK-GLOBAL-NEXT: fmov w0, s0
+; CHECK-GLOBAL-NEXT: ret
+ %ext = extractelement <4 x i32> %x, i32 1
+ ret i32 %ext
+}
+
+define i32 @extract_v2i32(<2 x i32> %x, i32 %y) {
+; CHECK-ISEL-LABEL: extract_v2i32:
+; CHECK-ISEL: // %bb.0:
+; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-ISEL-NEXT: mov w0, v0.s[1]
+; CHECK-ISEL-NEXT: ret
+;
+; CHECK-GLOBAL-LABEL: extract_v2i32:
+; CHECK-GLOBAL: // %bb.0:
+; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GLOBAL-NEXT: mov s0, v0.s[1]
+; CHECK-GLOBAL-NEXT: fmov w0, s0
+; CHECK-GLOBAL-NEXT: ret
+ %ext = extractelement <2 x i32> %x, i32 1
+ ret i32 %ext
+}
+
+define i16 @extract_v8i16(<8 x i16> %x, i32 %y) {
+; CHECK-LABEL: extract_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w0, v0.h[1]
+; CHECK-NEXT: ret
+ %ext = extractelement <8 x i16> %x, i32 1
+ ret i16 %ext
+}
+
+define i16 @extract_v4i16(<4 x i16> %x, i32 %y) {
+; CHECK-LABEL: extract_v4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w0, v0.h[1]
+; CHECK-NEXT: ret
+ %ext = extractelement <4 x i16> %x, i32 1
+ ret i16 %ext
+}
+
+define i8 @extract_v16i8(<16 x i8> %x, i32 %y) {
+; CHECK-LABEL: extract_v16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w0, v0.b[1]
+; CHECK-NEXT: ret
+ %ext = extractelement <16 x i8> %x, i32 1
+ ret i8 %ext
+}
+
+define i8 @extract_v8i8(<8 x i8> %x, i32 %y) {
+; CHECK-LABEL: extract_v8i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w0, v0.b[1]
+; CHECK-NEXT: ret
+ %ext = extractelement <8 x i8> %x, i32 1
+ ret i8 %ext
+}
+
+
+define i64 @sv2i32i64(<2 x i32> %x) {
+; CHECK-LABEL: sv2i32i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: smov x0, v0.s[1]
+; CHECK-NEXT: ret
+ %e = extractelement <2 x i32> %x, i64 1
+ %s = sext i32 %e to i64
+ ret i64 %s
+}
+
+define i64 @sv4i32i64(<4 x i32> %x) {
+; CHECK-LABEL: sv4i32i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: smov x0, v0.s[2]
+; CHECK-NEXT: ret
+ %e = extractelement <4 x i32> %x, i64 2
+ %s = sext i32 %e to i64
+ ret i64 %s
+}
+
+define i64 @sv4i16i64(<4 x i16> %x) {
+; CHECK-LABEL: sv4i16i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: smov x0, v0.h[2]
+; CHECK-NEXT: ret
+ %e = extractelement <4 x i16> %x, i64 2
+ %s = sext i16 %e to i64
+ ret i64 %s
+}
+
+define i64 @sv8i16i64(<8 x i16> %x) {
+; CHECK-LABEL: sv8i16i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: smov x0, v0.h[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i16> %x, i64 2
+ %s = sext i16 %e to i64
+ ret i64 %s
+}
+
+define i64 @sv8i8i64(<8 x i8> %x) {
+; CHECK-LABEL: sv8i8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: smov x0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i8> %x, i64 2
+ %s = sext i8 %e to i64
+ ret i64 %s
+}
+
+define i64 @sv16i8i64(<16 x i8> %x) {
+; CHECK-LABEL: sv16i8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: smov x0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <16 x i8> %x, i64 2
+ %s = sext i8 %e to i64
+ ret i64 %s
+}
+
+define i32 @sv8i16i32(<8 x i16> %x) {
+; CHECK-LABEL: sv8i16i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: smov w0, v0.h[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i16> %x, i64 2
+ %s = sext i16 %e to i32
+ ret i32 %s
+}
+
+define i32 @sv4i16i32(<4 x i16> %x) {
+; CHECK-LABEL: sv4i16i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: smov w0, v0.h[2]
+; CHECK-NEXT: ret
+ %e = extractelement <4 x i16> %x, i64 2
+ %s = sext i16 %e to i32
+ ret i32 %s
+}
+
+define i32 @sv16i8i32(<16 x i8> %x) {
+; CHECK-LABEL: sv16i8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: smov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <16 x i8> %x, i64 2
+ %s = sext i8 %e to i32
+ ret i32 %s
+}
+
+define i32 @sv8i8i32(<8 x i8> %x) {
+; CHECK-LABEL: sv8i8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: smov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i8> %x, i64 2
+ %s = sext i8 %e to i32
+ ret i32 %s
+}
+
+define i16 @sv16i8i16(<16 x i8> %x) {
+; CHECK-LABEL: sv16i8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: smov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <16 x i8> %x, i64 2
+ %s = sext i8 %e to i16
+ ret i16 %s
+}
+
+define i16 @sv8i8i16(<8 x i8> %x) {
+; CHECK-LABEL: sv8i8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: smov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i8> %x, i64 2
+ %s = sext i8 %e to i16
+ ret i16 %s
+}
+
+
+
+define i64 @zv2i32i64(<2 x i32> %x) {
+; CHECK-LABEL: zv2i32i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov w0, v0.s[1]
+; CHECK-NEXT: ret
+ %e = extractelement <2 x i32> %x, i64 1
+ %s = zext i32 %e to i64
+ ret i64 %s
+}
+
+define i64 @zv4i32i64(<4 x i32> %x) {
+; CHECK-LABEL: zv4i32i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w0, v0.s[2]
+; CHECK-NEXT: ret
+ %e = extractelement <4 x i32> %x, i64 2
+ %s = zext i32 %e to i64
+ ret i64 %s
+}
+
+define i64 @zv4i16i64(<4 x i16> %x) {
+; CHECK-LABEL: zv4i16i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w0, v0.h[2]
+; CHECK-NEXT: ret
+ %e = extractelement <4 x i16> %x, i64 2
+ %s = zext i16 %e to i64
+ ret i64 %s
+}
+
+define i64 @zv8i16i64(<8 x i16> %x) {
+; CHECK-LABEL: zv8i16i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w0, v0.h[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i16> %x, i64 2
+ %s = zext i16 %e to i64
+ ret i64 %s
+}
+
+define i64 @zv8i8i64(<8 x i8> %x) {
+; CHECK-LABEL: zv8i8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i8> %x, i64 2
+ %s = zext i8 %e to i64
+ ret i64 %s
+}
+
+define i64 @zv16i8i64(<16 x i8> %x) {
+; CHECK-LABEL: zv16i8i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <16 x i8> %x, i64 2
+ %s = zext i8 %e to i64
+ ret i64 %s
+}
+
+define i32 @zv8i16i32(<8 x i16> %x) {
+; CHECK-LABEL: zv8i16i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w0, v0.h[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i16> %x, i64 2
+ %s = zext i16 %e to i32
+ ret i32 %s
+}
+
+define i32 @zv4i16i32(<4 x i16> %x) {
+; CHECK-LABEL: zv4i16i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w0, v0.h[2]
+; CHECK-NEXT: ret
+ %e = extractelement <4 x i16> %x, i64 2
+ %s = zext i16 %e to i32
+ ret i32 %s
+}
+
+define i32 @zv16i8i32(<16 x i8> %x) {
+; CHECK-LABEL: zv16i8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <16 x i8> %x, i64 2
+ %s = zext i8 %e to i32
+ ret i32 %s
+}
+
+define i32 @zv8i8i32(<8 x i8> %x) {
+; CHECK-LABEL: zv8i8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i8> %x, i64 2
+ %s = zext i8 %e to i32
+ ret i32 %s
+}
+
+define i16 @zv16i8i16(<16 x i8> %x) {
+; CHECK-LABEL: zv16i8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: umov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <16 x i8> %x, i64 2
+ %s = zext i8 %e to i16
+ ret i16 %s
+}
+
+define i16 @zv8i8i16(<8 x i8> %x) {
+; CHECK-LABEL: zv8i8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w0, v0.b[2]
+; CHECK-NEXT: ret
+ %e = extractelement <8 x i8> %x, i64 2
+ %s = zext i8 %e to i16
+ ret i16 %s
+}
More information about the llvm-commits
mailing list