[llvm] 2363a20 - [AArch64][GlobalISel] Optimize G_BUILD_VECTOR of undef + 1 elt -> SUBREG_TO_REG
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 26 11:45:35 PDT 2021
Author: Jessica Paquette
Date: 2021-08-26T11:45:11-07:00
New Revision: 2363a2000171d12e21bc49a783291401656c4f35
URL: https://github.com/llvm/llvm-project/commit/2363a2000171d12e21bc49a783291401656c4f35
DIFF: https://github.com/llvm/llvm-project/commit/2363a2000171d12e21bc49a783291401656c4f35.diff
LOG: [AArch64][GlobalISel] Optimize G_BUILD_VECTOR of undef + 1 elt -> SUBREG_TO_REG
This pattern
```
%elt = ... something ...
%undef = G_IMPLICIT_DEF
%vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
```
Can be selected to a SUBREG_TO_REG, assuming `%elt` and `%vec` have the same
register bank. We don't care about any of the bits in `%vec` aside from those
in `%elt`, which just happens to be the 0th element.
This is preferable to emitting `mov` instructions for every index.
This gives minor code size improvements on the test suite at -Os.
Differential Revision: https://reviews.llvm.org/D108773
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir
llvm/test/CodeGen/AArch64/arm64-rev.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index c2b8b58fbec7d..c2951c4f6ecb2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -164,6 +164,9 @@ class AArch64InstructionSelector : public InstructionSelector {
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
+ /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
+ /// SUBREG_TO_REG.
+ bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -4963,6 +4966,47 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
return true;
}
+bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
+ MachineInstr &I, MachineRegisterInfo &MRI) {
+ // Given:
+ // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
+ //
+ // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
+ Register Dst = I.getOperand(0).getReg();
+ Register EltReg = I.getOperand(1).getReg();
+ LLT EltTy = MRI.getType(EltReg);
+ // If the index isn't on the same bank as its elements, then this can't be a
+ // SUBREG_TO_REG.
+ const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
+ const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
+ if (EltRB != DstRB)
+ return false;
+ if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
+ [&MRI](const MachineOperand &Op) {
+ return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
+ MRI);
+ }))
+ return false;
+ unsigned SubReg;
+ const TargetRegisterClass *EltRC =
+ getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
+ if (!EltRC)
+ return false;
+ const TargetRegisterClass *DstRC =
+ getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
+ if (!DstRC)
+ return false;
+ if (!getSubRegForClass(EltRC, TRI, SubReg))
+ return false;
+ auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
+ .addImm(0)
+ .addUse(EltReg)
+ .addImm(SubReg);
+ I.eraseFromParent();
+ constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
+ return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
+}
+
bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
@@ -4974,6 +5018,9 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
if (tryOptConstantBuildVec(I, DstTy, MRI))
return true;
+ if (tryOptBuildVecToSubregToReg(I, MRI))
+ return true;
+
if (EltSize < 16 || EltSize > 64)
return false; // Don't support all element types yet.
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir
index af186b49af232..5de97256fc85a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-build-vector.mir
@@ -228,3 +228,55 @@ body: |
$d0 = COPY %1(<8 x s8>)
RET_ReallyLR
...
+---
+name: undef_elts_to_subreg_to_reg
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $s0
+ ; We have a BUILD_VECTOR whose 0th element is a subregister of the wide
+ ; register class. Everything else is undef. This is a SUBREG_TO_REG.
+
+ ; CHECK-LABEL: name: undef_elts_to_subreg_to_reg
+ ; CHECK: liveins: $s0
+ ; CHECK: %val:fpr32 = COPY $s0
+ ; CHECK: %bv:fpr128 = SUBREG_TO_REG 0, %val, %subreg.ssub
+ ; CHECK: $q0 = COPY %bv
+ ; CHECK: RET_ReallyLR implicit $q0
+ %val:fpr(s32) = COPY $s0
+ %undef:fpr(s32) = G_IMPLICIT_DEF
+ %bv:fpr(<4 x s32>) = G_BUILD_VECTOR %val(s32), %undef(s32), %undef(s32), %undef(s32)
+ $q0 = COPY %bv(<4 x s32>)
+ RET_ReallyLR implicit $q0
+...
+...
+---
+name: undef_elts_
diff erent_regbanks
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0
+ ; Element is not a subregister of the wide register class. This is not a
+ ; SUBREG_TO_REG.
+
+ ; CHECK-LABEL: name: undef_elts_
diff erent_regbanks
+ ; CHECK: liveins: $w0
+ ; CHECK: %val:gpr32all = COPY $w0
+ ; CHECK: %undef:gpr32 = IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %val, %subreg.ssub
+ ; CHECK: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %undef
+ ; CHECK: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSvi32gpr]], 2, %undef
+ ; CHECK: %bv:fpr128 = INSvi32gpr [[INSvi32gpr1]], 3, %undef
+ ; CHECK: $q0 = COPY %bv
+ ; CHECK: RET_ReallyLR implicit $q0
+ %val:gpr(s32) = COPY $w0
+ %undef:gpr(s32) = G_IMPLICIT_DEF
+ %bv:fpr(<4 x s32>) = G_BUILD_VECTOR %val(s32), %undef(s32), %undef(s32), %undef(s32)
+ $q0 = COPY %bv(<4 x s32>)
+ RET_ReallyLR implicit $q0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index cd9318882c532..276302b4b1133 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -572,10 +572,7 @@ define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest
; GISEL-LABEL: float_vrev64:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: movi d0, #0000000000000000
-; GISEL-NEXT: mov.s v0[1], v0[0]
-; GISEL-NEXT: mov.s v0[2], v0[0]
; GISEL-NEXT: adrp x8, .LCPI28_0
-; GISEL-NEXT: mov.s v0[3], v0[0]
; GISEL-NEXT: ldr q1, [x0]
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI28_0]
; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2
More information about the llvm-commits
mailing list