[llvm] [GlobalIsel][AArch64] MVP for vectorized selects. (PR #76104)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 20 13:48:48 PST 2023
https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/76104
Try to select Selects where the condition is a vector.
Inspired by arm64-vselect.ll and extensions to it.
Note that there are not tests for all legal types.
>From 360acc9a0b540012e559dc94bb2177ef86f48d1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Wed, 20 Dec 2023 22:22:23 +0100
Subject: [PATCH 1/2] [GlobalIsel][AArch64] MVP for vectorized selects.
Try to select Selects where the condition is a vector.
Inspired by arm64-vselect.ll and extensions to it.
Note that there are not tests for all legal types.
---
.../GISel/AArch64InstructionSelector.cpp | 34 +++++++++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 8 ++-
.../AArch64/GlobalISel/select-select.mir | 71 +++++++++++++------
3 files changed, 91 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index a4ace6cce46342..6fbd850395fda6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -507,6 +507,9 @@ class AArch64InstructionSelector : public InstructionSelector {
/// zero extended.
bool isDef32(const MachineInstr &MI) const;
+ /// Select selects where the condition is a vector.
+ bool selectVectorSelect(GSelect &Sel, MachineIRBuilder &MIB) const;
+
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
@@ -1154,6 +1157,34 @@ static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
return GenericOpc;
}
+bool AArch64InstructionSelector::selectVectorSelect(
+ GSelect &Sel, MachineIRBuilder &MIB) const {
+ MachineRegisterInfo *MRI = MIB.getMRI();
+ Register Dst = Sel.getReg(0);
+ Register Cond = Sel.getCondReg();
+ Register True = Sel.getTrueReg();
+ Register False = Sel.getFalseReg();
+ LLT DestTy = MRI->getType(Dst);
+ LLT CondTy = MRI->getType(Cond);
+
+ // There are no scalable vectors yet.
+ if (CondTy.isScalable())
+ return false;
+
+ // We would need to sext the Cond to the Dest, i.e., sshll.
+ // It will fail for v2s64, v4s32, and v8s16.
+ if (CondTy != DestTy)
+ return false;
+
+ unsigned Opcode =
+ CondTy.getSizeInBits() == 128 ? AArch64::BSLv16i8 : AArch64::BSLv8i8;
+ auto SelMI = MIB.buildInstr(Opcode, {Dst}, {Cond, True, False});
+ constrainSelectedInstRegOperands(*SelMI, TII, TRI, RBI);
+
+ Sel.eraseFromParent();
+ return true;
+}
+
MachineInstr *
AArch64InstructionSelector::emitSelect(Register Dst, Register True,
Register False, AArch64CC::CondCode CC,
@@ -3442,6 +3473,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const Register TReg = Sel.getTrueReg();
const Register FReg = Sel.getFalseReg();
+ if (MRI.getType(CondReg).isVector())
+ return selectVectorSelect(Sel, MIB);
+
if (tryOptSelect(Sel))
return true;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 8b909f53c84460..0af18108c6710f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -708,7 +708,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});
getActionDefinitionsBuilder(G_SELECT)
- .legalFor({{s32, s32}, {s64, s32}, {p0, s32}})
+ .legalFor({{s32, s32},
+ {s64, s32},
+ {p0, s32},
+ {v2s32, v2s32},
+ {v4s16, v4s16},
+ {v8s8, v8s8},
+ {v16s8, v16s8}})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
.clampScalar(1, s32, s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
index e26c1431350979..8d125a81df49a5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
@@ -95,7 +95,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
; CHECK-LABEL: name: csinc_t_0_f_1
; CHECK: liveins: $w0, $w1
@@ -123,7 +122,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
; CHECK-LABEL: name: csinv_t_0_f_neg_1
; CHECK: liveins: $w0, $w1
@@ -151,7 +149,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
; CHECK-LABEL: name: csinc_t_1
; CHECK: liveins: $w0, $w1, $w2
@@ -180,7 +177,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
; CHECK-LABEL: name: csinv_t_neg_1
; CHECK: liveins: $w0, $w1, $w2
@@ -209,7 +205,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, t, 1 -> CSINC t, zreg, cc
; CHECK-LABEL: name: csinc_f_1
; CHECK: liveins: $w0, $w1, $w2
@@ -238,7 +233,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, t, -1 -> CSINC t, zreg, cc
; CHECK-LABEL: name: csinc_f_neg_1
; CHECK: liveins: $w0, $w1, $w2
@@ -267,7 +261,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
; CHECK-LABEL: name: csinc_t_1_no_cmp
; CHECK: liveins: $w0, $w1
@@ -294,7 +287,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; G_SELECT cc, t, 1 -> CSINC t, zreg, cc
; CHECK-LABEL: name: csinc_f_1_no_cmp
; CHECK: liveins: $w0, $w1
@@ -321,7 +313,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x0, $x1
- ; G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
; CHECK-LABEL: name: csinc_t_1_no_cmp_s64
; CHECK: liveins: $x0, $x1
@@ -350,7 +341,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
; CHECK-LABEL: name: csneg_s32
; CHECK: liveins: $w0, $w1, $w2
@@ -380,7 +370,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
; CHECK-LABEL: name: csneg_inverted_cc
; CHECK: liveins: $w0, $w1, $w2
@@ -410,7 +399,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
; CHECK-LABEL: name: csneg_s64
; CHECK: liveins: $x0, $x1, $x2
@@ -441,8 +429,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; We should prefer eliminating the G_SUB over eliminating the constant true
- ; value.
; CHECK-LABEL: name: csneg_with_true_cst
; CHECK: liveins: $w0, $w1, $w2
@@ -472,7 +458,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc
; CHECK-LABEL: name: csinv_s32
; CHECK: liveins: $w0, $w1, $w2
@@ -502,7 +487,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
; CHECK-LABEL: name: csinv_inverted_cc
; CHECK: liveins: $w0, $w1, $w2
@@ -532,7 +516,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc
; CHECK-LABEL: name: csinv_s64
; CHECK: liveins: $x0, $x1, $x2
@@ -564,7 +547,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; zext(s32 -1) != s64 -1, so we can't fold it away.
; CHECK-LABEL: name: xor_not_negative_one
; CHECK: liveins: $x0, $x1, $x2
@@ -601,7 +583,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, %true, (G_ADD %x, 1) -> CSINC %true, %x, cc
; CHECK-LABEL: name: csinc_s32
; CHECK: liveins: $w0, $w1, $w2
; CHECK-NEXT: {{ $}}
@@ -630,7 +611,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; G_SELECT cc, (G_ADD %x, 1), %false -> CSINC %x, %false, inv_cc
; CHECK-LABEL: name: csinc_s32_inverted_cc
; CHECK: liveins: $w0, $w1, $w2
; CHECK-NEXT: {{ $}}
@@ -659,7 +639,6 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $x0, $x1, $x2
- ; G_SELECT cc, %true, (G_PTR_ADD %x, 1) -> CSINC %true, %x, cc
; CHECK-LABEL: name: csinc_ptr_add
; CHECK: liveins: $x0, $x1, $x2
@@ -713,3 +692,53 @@ body: |
%select:gpr(s32) = G_SELECT %reg0, %xor, %sub
$w0 = COPY %select(s32)
RET_ReallyLR implicit $w0
+...
+---
+name: select_vectorized_conditon_v2s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1, $w2
+ ; CHECK-LABEL: name: select_vectorized_conditon_v2s32
+ ; CHECK: liveins: $w0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %reg0:gpr32all = COPY $w0
+ ; CHECK-NEXT: %reg1:gpr32 = COPY $w1
+ ; CHECK-NEXT: %reg2:gpr32all = COPY $w2
+ ; CHECK-NEXT: %reg3:gpr32 = COPY $w0
+ ; CHECK-NEXT: %reg4:gpr32all = COPY $w1
+ ; CHECK-NEXT: %reg5:gpr32 = COPY $w2
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %reg0, %subreg.ssub
+ ; CHECK-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %reg1
+ ; CHECK-NEXT: %true:fpr64 = COPY [[INSvi32gpr]].dsub
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], %reg2, %subreg.ssub
+ ; CHECK-NEXT: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 1, %reg3
+ ; CHECK-NEXT: %false:fpr64 = COPY [[INSvi32gpr1]].dsub
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], %reg4, %subreg.ssub
+ ; CHECK-NEXT: [[INSvi32gpr2:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG2]], 1, %reg5
+ ; CHECK-NEXT: %cond:fpr64 = COPY [[INSvi32gpr2]].dsub
+ ; CHECK-NEXT: %select:fpr64 = BSLv8i8 %cond, %true, %false
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF3]], %select, %subreg.dsub
+ ; CHECK-NEXT: %extract:fpr32 = DUPi32 [[INSERT_SUBREG3]], 1
+ ; CHECK-NEXT: $w0 = COPY %extract
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %idx:gpr(s64) = G_CONSTANT i64 1
+ %reg0:gpr(s32) = COPY $w0
+ %reg1:gpr(s32) = COPY $w1
+ %reg2:gpr(s32) = COPY $w2
+ %reg3:gpr(s32) = COPY $w0
+ %reg4:gpr(s32) = COPY $w1
+ %reg5:gpr(s32) = COPY $w2
+ %true:fpr(<2 x s32>) = G_BUILD_VECTOR %reg0(s32), %reg1(s32)
+ %false:fpr(<2 x s32>) = G_BUILD_VECTOR %reg2(s32), %reg3(s32)
+ %cond:fpr(<2 x s32>) = G_BUILD_VECTOR %reg4(s32), %reg5(s32)
+ %select:fpr(<2 x s32>) = G_SELECT %cond, %true, %false
+ %extract:fpr(s32) = G_EXTRACT_VECTOR_ELT %select:fpr(<2 x s32>), %idx:gpr(s64)
+ $w0 = COPY %extract(s32)
+ RET_ReallyLR implicit $w0
>From b6a7d7418ad75eb52d7ef1c614821364d6398067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Wed, 20 Dec 2023 22:45:34 +0100
Subject: [PATCH 2/2] preserve comments
---
.../AArch64/GlobalISel/select-select.mir | 20 +++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
index 8d125a81df49a5..9602dd7ef0a7fd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir
@@ -88,6 +88,7 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
name: csinc_t_0_f_1
legalized: true
regBankSelected: true
@@ -115,6 +116,7 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
name: csinv_t_0_f_neg_1
legalized: true
regBankSelected: true
@@ -142,6 +144,7 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
name: csinc_t_1
legalized: true
regBankSelected: true
@@ -198,6 +201,7 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# G_SELECT cc, t, 1 -> CSINC t, zreg, cc
name: csinc_f_1
legalized: true
regBankSelected: true
@@ -226,6 +230,7 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# G_SELECT cc, t, -1 -> CSINC t, zreg, cc
name: csinc_f_neg_1
legalized: true
regBankSelected: true
@@ -254,6 +259,7 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
name: csinc_t_1_no_cmp
legalized: true
regBankSelected: true
@@ -280,6 +286,7 @@ body: |
...
---
+# G_SELECT cc, t, 1 -> CSINC t, zreg, cc
name: csinc_f_1_no_cmp
legalized: true
regBankSelected: true
@@ -306,6 +313,7 @@ body: |
...
---
+# G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
name: csinc_t_1_no_cmp_s64
legalized: true
regBankSelected: true
@@ -334,6 +342,7 @@ body: |
...
---
+# G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
name: csneg_s32
legalized: true
regBankSelected: true
@@ -363,6 +372,7 @@ body: |
...
---
+# G_SELECT cc, (G_SUB 0, %x), %false -> CSNEG %x, %false, inv_cc
name: csneg_inverted_cc
legalized: true
regBankSelected: true
@@ -392,6 +402,7 @@ body: |
...
---
+# G_SELECT cc, true, (G_SUB 0, x) -> CSNEG true, x, cc
name: csneg_s64
legalized: true
regBankSelected: true
@@ -422,6 +433,8 @@ body: |
RET_ReallyLR implicit $x0
...
---
+# We should prefer eliminating the G_SUB over eliminating the constant true
+# value.
name: csneg_with_true_cst
legalized: true
regBankSelected: true
@@ -451,6 +464,7 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc
name: csinv_s32
legalized: true
regBankSelected: true
@@ -480,6 +494,7 @@ body: |
...
---
+# G_SELECT cc, (G_XOR x, -1), %false -> CSINV %x, %false, inv_cc
name: csinv_inverted_cc
legalized: true
regBankSelected: true
@@ -509,6 +524,7 @@ body: |
...
---
+# G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc
name: csinv_s64
legalized: true
regBankSelected: true
@@ -540,6 +556,7 @@ body: |
...
---
+# zext(s32 -1) != s64 -1, so we can't fold it away.
name: xor_not_negative_one
legalized: true
regBankSelected: true
@@ -576,6 +593,7 @@ body: |
...
---
+# G_SELECT cc, %true, (G_ADD %x, 1) -> CSINC %true, %x, cc
name: csinc_s32
legalized: true
regBankSelected: true
@@ -604,6 +622,7 @@ body: |
...
---
+# G_SELECT cc, (G_ADD %x, 1), %false -> CSINC %x, %false, inv_cc
name: csinc_s32_inverted_cc
legalized: true
regBankSelected: true
@@ -632,6 +651,7 @@ body: |
...
---
+# G_SELECT cc, %true, (G_PTR_ADD %x, 1) -> CSINC %true, %x, cc
name: csinc_ptr_add
legalized: true
regBankSelected: true
More information about the llvm-commits
mailing list